You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@beam.apache.org by dh...@apache.org on 2016/03/24 03:47:25 UTC

[01/67] [partial] incubator-beam git commit: Directory reorganization

Repository: incubator-beam
Updated Branches:
  refs/heads/master 9f8dd182c -> 257a7a6be


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
deleted file mode 100644
index 2f350b2..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderRegistryTest.java
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static org.hamcrest.Matchers.allOf;
-import static org.hamcrest.Matchers.containsString;
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry.IncompatibleCoderException;
-import com.google.cloud.dataflow.sdk.coders.Proto2CoderTestMessages.MessageA;
-import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.ImmutableList;
-import com.google.protobuf.Duration;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.lang.reflect.Type;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Tests for CoderRegistry.
- */
-@RunWith(JUnit4.class)
-public class CoderRegistryTest {
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  public static CoderRegistry getStandardRegistry() {
-    CoderRegistry registry = new CoderRegistry();
-    registry.registerStandardCoders();
-    return registry;
-  }
-
-  private static class SerializableClass implements Serializable {
-  }
-
-  private static class NotSerializableClass { }
-
-  @Test
-  public void testSerializableFallbackCoderProvider() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    registry.setFallbackCoderProvider(SerializableCoder.PROVIDER);
-    Coder<?> serializableCoder = registry.getDefaultCoder(SerializableClass.class);
-
-    assertEquals(serializableCoder, SerializableCoder.of(SerializableClass.class));
-  }
-
-  @Test
-  public void testProtoCoderFallbackCoderProvider() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-
-    // MessageA is a Protocol Buffers test message with syntax 2
-    assertEquals(registry.getDefaultCoder(MessageA.class), ProtoCoder.of(MessageA.class));
-
-    // Duration is a Protocol Buffers default type with syntax 3
-    assertEquals(registry.getDefaultCoder(Duration.class), ProtoCoder.of(Duration.class));
-  }
-
-  @Test
-  public void testAvroFallbackCoderProvider() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    registry.setFallbackCoderProvider(AvroCoder.PROVIDER);
-    Coder<?> avroCoder = registry.getDefaultCoder(NotSerializableClass.class);
-
-    assertEquals(avroCoder, AvroCoder.of(NotSerializableClass.class));
-  }
-
-  @Test
-  public void testRegisterInstantiatedCoder() throws Exception {
-    CoderRegistry registry = new CoderRegistry();
-    registry.registerCoder(MyValue.class, MyValueCoder.of());
-    assertEquals(registry.getDefaultCoder(MyValue.class), MyValueCoder.of());
-  }
-
-  @SuppressWarnings("rawtypes") // this class exists to fail a test because of its rawtypes
-  private class MyListCoder extends DeterministicStandardCoder<List> {
-    @Override
-    public void encode(List value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-    }
-
-    @Override
-    public List decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      return Collections.emptyList();
-    }
-
-    @Override
-    public List<Coder<?>> getCoderArguments() {
-      return Collections.emptyList();
-    }
-  }
-
-  @Test
-  public void testRegisterInstantiatedCoderInvalidRawtype() throws Exception {
-    thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("may not be used with unspecialized generic classes");
-    CoderRegistry registry = new CoderRegistry();
-    registry.registerCoder(List.class, new MyListCoder());
-  }
-
-  @Test
-  public void testSimpleDefaultCoder() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    assertEquals(StringUtf8Coder.of(), registry.getDefaultCoder(String.class));
-  }
-
-  @Test
-  public void testSimpleUnknownDefaultCoder() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    thrown.expect(CannotProvideCoderException.class);
-    thrown.expectMessage(allOf(
-        containsString(UnknownType.class.getCanonicalName()),
-        containsString("No CoderFactory has been registered"),
-        containsString("does not have a @DefaultCoder annotation"),
-        containsString("does not implement Serializable")));
-    registry.getDefaultCoder(UnknownType.class);
-  }
-
-  @Test
-  public void testParameterizedDefaultListCoder() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    TypeDescriptor<List<Integer>> listToken = new TypeDescriptor<List<Integer>>() {};
-    assertEquals(ListCoder.of(VarIntCoder.of()),
-                 registry.getDefaultCoder(listToken));
-
-    registry.registerCoder(MyValue.class, MyValueCoder.class);
-    TypeDescriptor<KV<String, List<MyValue>>> kvToken =
-        new TypeDescriptor<KV<String, List<MyValue>>>() {};
-    assertEquals(KvCoder.of(StringUtf8Coder.of(),
-                            ListCoder.of(MyValueCoder.of())),
-                 registry.getDefaultCoder(kvToken));
-
-  }
-
-  @Test
-  public void testParameterizedDefaultMapCoder() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    TypeDescriptor<Map<Integer, String>> mapToken = new TypeDescriptor<Map<Integer, String>>() {};
-    assertEquals(MapCoder.of(VarIntCoder.of(), StringUtf8Coder.of()),
-                 registry.getDefaultCoder(mapToken));
-  }
-
-  @Test
-  public void testParameterizedDefaultNestedMapCoder() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    TypeDescriptor<Map<Integer, Map<String, Double>>> mapToken =
-        new TypeDescriptor<Map<Integer, Map<String, Double>>>() {};
-    assertEquals(
-        MapCoder.of(VarIntCoder.of(), MapCoder.of(StringUtf8Coder.of(), DoubleCoder.of())),
-        registry.getDefaultCoder(mapToken));
-  }
-
-  @Test
-  public void testParameterizedDefaultSetCoder() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    TypeDescriptor<Set<Integer>> setToken = new TypeDescriptor<Set<Integer>>() {};
-    assertEquals(SetCoder.of(VarIntCoder.of()), registry.getDefaultCoder(setToken));
-  }
-
-  @Test
-  public void testParameterizedDefaultNestedSetCoder() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    TypeDescriptor<Set<Set<Integer>>> setToken = new TypeDescriptor<Set<Set<Integer>>>() {};
-    assertEquals(SetCoder.of(SetCoder.of(VarIntCoder.of())), registry.getDefaultCoder(setToken));
-  }
-
-  @Test
-  public void testParameterizedDefaultCoderUnknown() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    TypeDescriptor<List<UnknownType>> listUnknownToken = new TypeDescriptor<List<UnknownType>>() {};
-
-    thrown.expect(CannotProvideCoderException.class);
-    thrown.expectMessage(String.format(
-        "Cannot provide coder for parameterized type %s: Unable to provide a default Coder for %s",
-        listUnknownToken,
-        UnknownType.class.getCanonicalName()));
-
-    registry.getDefaultCoder(listUnknownToken);
-  }
-
-  @Test
-  public void testTypeParameterInferenceForward() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    MyGenericClass<MyValue, List<MyValue>> instance =
-        new MyGenericClass<MyValue, List<MyValue>>() {};
-
-    Coder<?> bazCoder = registry.getDefaultCoder(
-        instance.getClass(),
-        MyGenericClass.class,
-        Collections.<Type, Coder<?>>singletonMap(
-            TypeDescriptor.of(MyGenericClass.class).getTypeParameter("FooT"), MyValueCoder.of()),
-        TypeDescriptor.of(MyGenericClass.class).getTypeParameter("BazT"));
-
-    assertEquals(ListCoder.of(MyValueCoder.of()), bazCoder);
-  }
-
-  @Test
-  public void testTypeParameterInferenceBackward() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    MyGenericClass<MyValue, List<MyValue>> instance =
-        new MyGenericClass<MyValue, List<MyValue>>() {};
-
-    Coder<?> fooCoder = registry.getDefaultCoder(
-        instance.getClass(),
-        MyGenericClass.class,
-        Collections.<Type, Coder<?>>singletonMap(
-            TypeDescriptor.of(MyGenericClass.class).getTypeParameter("BazT"),
-            ListCoder.of(MyValueCoder.of())),
-        TypeDescriptor.of(MyGenericClass.class).getTypeParameter("FooT"));
-
-    assertEquals(MyValueCoder.of(), fooCoder);
-  }
-
-  @Test
-  public void testGetDefaultCoderFromIntegerValue() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    Integer i = 13;
-    Coder<Integer> coder = registry.getDefaultCoder(i);
-    assertEquals(VarIntCoder.of(), coder);
-  }
-
-  @Test
-  public void testGetDefaultCoderFromNullValue() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    assertEquals(VoidCoder.of(), registry.getDefaultCoder((Void) null));
-  }
-
-  @Test
-  public void testGetDefaultCoderFromKvValue() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    KV<Integer, String> kv = KV.of(13, "hello");
-    Coder<KV<Integer, String>> coder = registry.getDefaultCoder(kv);
-    assertEquals(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()),
-        coder);
-  }
-
-  @Test
-  public void testGetDefaultCoderFromKvNullValue() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    KV<Void, Void> kv = KV.of((Void) null, (Void) null);
-    assertEquals(KvCoder.of(VoidCoder.of(), VoidCoder.of()),
-        registry.getDefaultCoder(kv));
-  }
-
-  @Test
-  public void testGetDefaultCoderFromNestedKvValue() throws Exception {
-    CoderRegistry registry = getStandardRegistry();
-    KV<Integer, KV<Long, KV<String, String>>> kv = KV.of(13, KV.of(17L, KV.of("hello", "goodbye")));
-    Coder<KV<Integer, KV<Long, KV<String, String>>>> coder = registry.getDefaultCoder(kv);
-    assertEquals(
-        KvCoder.of(VarIntCoder.of(),
-            KvCoder.of(VarLongCoder.of(),
-                KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))),
-        coder);
-  }
-
-  @Test
-  public void testTypeCompatibility() throws Exception {
-    CoderRegistry.verifyCompatible(BigEndianIntegerCoder.of(), Integer.class);
-    CoderRegistry.verifyCompatible(
-        ListCoder.of(BigEndianIntegerCoder.of()),
-        new TypeDescriptor<List<Integer>>() {}.getType());
-  }
-
-  @Test
-  public void testIntVersusStringIncompatibility() throws Exception {
-    thrown.expect(IncompatibleCoderException.class);
-    thrown.expectMessage("not assignable");
-    CoderRegistry.verifyCompatible(BigEndianIntegerCoder.of(), String.class);
-  }
-
-  private static class TooManyComponentCoders<T> extends ListCoder<T> {
-    public TooManyComponentCoders(Coder<T> actualComponentCoder) {
-      super(actualComponentCoder);
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return ImmutableList.<Coder<?>>builder()
-          .addAll(super.getCoderArguments())
-          .add(BigEndianLongCoder.of())
-          .build();
-    }
-  }
-
-  @Test
-  public void testTooManyCoderArguments() throws Exception {
-    thrown.expect(IncompatibleCoderException.class);
-    thrown.expectMessage("type parameters");
-    thrown.expectMessage("less than the number of coder arguments");
-    CoderRegistry.verifyCompatible(
-        new TooManyComponentCoders<>(BigEndianIntegerCoder.of()), List.class);
-  }
-
-  @Test
-  public void testComponentIncompatibility() throws Exception {
-    thrown.expect(IncompatibleCoderException.class);
-    thrown.expectMessage("component coder is incompatible");
-    CoderRegistry.verifyCompatible(
-        ListCoder.of(BigEndianIntegerCoder.of()),
-        new TypeDescriptor<List<String>>() {}.getType());
-  }
-
-  @Test
-  public void testDefaultCoderAnnotationGenericRawtype() throws Exception {
-    CoderRegistry registry = new CoderRegistry();
-    registry.registerStandardCoders();
-    assertEquals(
-        registry.getDefaultCoder(MySerializableGeneric.class),
-        SerializableCoder.of(MySerializableGeneric.class));
-  }
-
-  @Test
-  public void testDefaultCoderAnnotationGeneric() throws Exception {
-    CoderRegistry registry = new CoderRegistry();
-    registry.registerStandardCoders();
-    assertEquals(
-        registry.getDefaultCoder(new TypeDescriptor<MySerializableGeneric<String>>() {}),
-        SerializableCoder.of(MySerializableGeneric.class));
-  }
-
-  private static class PTransformOutputingMySerializableGeneric
-  extends PTransform<PCollection<String>, PCollection<KV<String, MySerializableGeneric<String>>>> {
-
-    private class OutputDoFn extends DoFn<String, KV<String, MySerializableGeneric<String>>> {
-      @Override
-      public void processElement(ProcessContext c) { }
-    }
-
-    @Override
-    public PCollection<KV<String, MySerializableGeneric<String>>>
-    apply(PCollection<String> input) {
-      return input.apply(ParDo.of(new OutputDoFn()));
-    }
-  }
-
-  /**
-   * Tests that the error message for a type variable includes a mention of where the
-   * type variable was declared.
-   */
-  @Test
-  public void testTypeVariableErrorMessage() throws Exception {
-    CoderRegistry registry = new CoderRegistry();
-
-    thrown.expect(CannotProvideCoderException.class);
-    thrown.expectMessage(allOf(
-        containsString("TestGenericT"),
-        containsString("erasure"),
-        containsString("com.google.cloud.dataflow.sdk.coders.CoderRegistryTest$TestGenericClass")));
-    registry.getDefaultCoder(TypeDescriptor.of(
-        TestGenericClass.class.getTypeParameters()[0]));
-  }
-
-  private static class TestGenericClass<TestGenericT> { }
-
-  /**
-   * In-context test that assures the functionality tested in
-   * {@link #testDefaultCoderAnnotationGeneric} is invoked in the right ways.
-   */
-  @Test
-  public void testSpecializedButIgnoredGenericInPipeline() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    pipeline
-        .apply(Create.of("hello", "goodbye"))
-        .apply(new PTransformOutputingMySerializableGeneric());
-
-    pipeline.run();
-  }
-
-  private static class GenericOutputMySerializedGeneric<T extends Serializable>
-  extends PTransform<
-      PCollection<String>,
-      PCollection<KV<String, MySerializableGeneric<T>>>> {
-
-    private class OutputDoFn extends DoFn<String, KV<String, MySerializableGeneric<T>>> {
-      @Override
-      public void processElement(ProcessContext c) { }
-    }
-
-    @Override
-    public PCollection<KV<String, MySerializableGeneric<T>>>
-    apply(PCollection<String> input) {
-      return input.apply(ParDo.of(new OutputDoFn()));
-    }
-  }
-
-  @Test
-  public void testIgnoredGenericInPipeline() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    pipeline
-        .apply(Create.of("hello", "goodbye"))
-        .apply(new GenericOutputMySerializedGeneric<String>());
-
-    pipeline.run();
-  }
-
-  private static class MyGenericClass<FooT, BazT> { }
-
-  private static class MyValue { }
-
-  private static class MyValueCoder implements Coder<MyValue> {
-
-    private static final MyValueCoder INSTANCE = new MyValueCoder();
-
-    public static MyValueCoder of() {
-      return INSTANCE;
-    }
-
-    @SuppressWarnings("unused")
-    public static List<Object> getInstanceComponents(
-        @SuppressWarnings("unused") MyValue exampleValue) {
-      return Arrays.asList();
-    }
-
-    @Override
-    public void encode(MyValue value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-    }
-
-    @Override
-    public MyValue decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      return new MyValue();
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return null;
-    }
-
-    @Override
-    public CloudObject asCloudObject() {
-      return null;
-    }
-
-    @Override
-    public void verifyDeterministic() { }
-
-    @Override
-    public boolean consistentWithEquals() {
-      return true;
-    }
-
-    @Override
-    public Object structuralValue(MyValue value) {
-      return value;
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(MyValue value, Context context) {
-      return true;
-    }
-
-    @Override
-    public void registerByteSizeObserver(
-        MyValue value, ElementByteSizeObserver observer, Context context)
-        throws Exception {
-      observer.update(0L);
-    }
-
-    @Override
-    public String getEncodingId() {
-      return getClass().getName();
-    }
-
-    @Override
-    public Collection<String> getAllowedEncodings() {
-      return Collections.singletonList(getEncodingId());
-    }
-  }
-
-  private static class UnknownType { }
-
-  @DefaultCoder(SerializableCoder.class)
-  private static class MySerializableGeneric<T extends Serializable> implements Serializable {
-    @SuppressWarnings("unused")
-    private T foo;
-  }
-}

[33/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
deleted file mode 100644
index 872cfef..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRunner.java
+++ /dev/null
@@ -1,1156 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Partition;
-import com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MapAggregatorValues;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.cloud.dataflow.sdk.values.TypedPValue;
-import com.google.common.base.Function;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-/**
- * Executes the operations in the pipeline directly, in this process, without
- * any optimization.  Useful for small local execution and tests.
- *
- * <p>Throws an exception from {@link #run} if execution fails.
- *
- * <p><h3>Permissions</h3>
- * When reading from a Dataflow source or writing to a Dataflow sink using
- * {@code DirectPipelineRunner}, the Cloud Platform account that you configured with the
- * <a href="https://cloud.google.com/sdk/gcloud">gcloud</a> executable will need access to the
- * corresponding source/sink.
- *
- * <p>Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
- * Dataflow Security and Permissions</a> for more details.
- */
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class DirectPipelineRunner
-    extends PipelineRunner<DirectPipelineRunner.EvaluationResults> {
-  private static final Logger LOG = LoggerFactory.getLogger(DirectPipelineRunner.class);
-
-  /**
-   * A source of random data, which can be seeded if determinism is desired.
-   */
-  private Random rand;
-
-  /**
-   * A map from PTransform class to the corresponding
-   * TransformEvaluator to use to evaluate that transform.
-   *
-   * <p>A static map that contains system-wide defaults.
-   */
-  private static Map<Class, TransformEvaluator> defaultTransformEvaluators =
-      new HashMap<>();
-
-  /**
-   * A map from PTransform class to the corresponding
-   * TransformEvaluator to use to evaluate that transform.
-   *
-   * <p>An instance map that contains bindings for this DirectPipelineRunner.
-   * Bindings in this map override those in the default map.
-   */
-  private Map<Class, TransformEvaluator> localTransformEvaluators =
-      new HashMap<>();
-
-  /**
-   * Records that instances of the specified PTransform class
-   * should be evaluated by default by the corresponding
-   * TransformEvaluator.
-   */
-  public static <TransformT extends PTransform<?, ?>>
-  void registerDefaultTransformEvaluator(
-      Class<TransformT> transformClass,
-      TransformEvaluator<? super TransformT> transformEvaluator) {
-    if (defaultTransformEvaluators.put(transformClass, transformEvaluator)
-        != null) {
-      throw new IllegalArgumentException(
-          "defining multiple evaluators for " + transformClass);
-    }
-  }
-
-  /**
-   * Records that instances of the specified PTransform class
-   * should be evaluated by the corresponding TransformEvaluator.
-   * Overrides any bindings specified by
-   * {@link #registerDefaultTransformEvaluator}.
-   */
-  public <TransformT extends PTransform<?, ?>>
-  void registerTransformEvaluator(
-      Class<TransformT> transformClass,
-      TransformEvaluator<TransformT> transformEvaluator) {
-    if (localTransformEvaluators.put(transformClass, transformEvaluator)
-        != null) {
-      throw new IllegalArgumentException(
-          "defining multiple evaluators for " + transformClass);
-    }
-  }
-
-  /**
-   * Returns the TransformEvaluator to use for instances of the
-   * specified PTransform class, or null if none registered.
-   */
-  public <TransformT extends PTransform<?, ?>>
-      TransformEvaluator<TransformT> getTransformEvaluator(Class<TransformT> transformClass) {
-    TransformEvaluator<TransformT> transformEvaluator =
-        localTransformEvaluators.get(transformClass);
-    if (transformEvaluator == null) {
-      transformEvaluator = defaultTransformEvaluators.get(transformClass);
-    }
-    return transformEvaluator;
-  }
-
-  /**
-   * Constructs a DirectPipelineRunner from the given options.
-   */
-  public static DirectPipelineRunner fromOptions(PipelineOptions options) {
-    DirectPipelineOptions directOptions =
-        PipelineOptionsValidator.validate(DirectPipelineOptions.class, options);
-    LOG.debug("Creating DirectPipelineRunner");
-    return new DirectPipelineRunner(directOptions);
-  }
-
-  /**
-   * Constructs a runner with default properties for testing.
-   *
-   * @return The newly created runner.
-   */
-  public static DirectPipelineRunner createForTest() {
-    DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
-    options.setStableUniqueNames(CheckEnabled.ERROR);
-    options.setGcpCredential(new TestCredential());
-    return new DirectPipelineRunner(options);
-  }
-
-  /**
-   * Enable runtime testing to verify that all functions and {@link Coder}
-   * instances can be serialized.
-   *
-   * <p>Enabled by default.
-   *
-   * <p>This method modifies the {@code DirectPipelineRunner} instance and
-   * returns itself.
-   */
-  public DirectPipelineRunner withSerializabilityTesting(boolean enable) {
-    this.testSerializability = enable;
-    return this;
-  }
-
-  /**
-   * Enable runtime testing to verify that all values can be encoded.
-   *
-   * <p>Enabled by default.
-   *
-   * <p>This method modifies the {@code DirectPipelineRunner} instance and
-   * returns itself.
-   */
-  public DirectPipelineRunner withEncodabilityTesting(boolean enable) {
-    this.testEncodability = enable;
-    return this;
-  }
-
-  /**
-   * Enable runtime testing to verify that functions do not depend on order
-   * of the elements.
-   *
-   * <p>This is accomplished by randomizing the order of elements.
-   *
-   * <p>Enabled by default.
-   *
-   * <p>This method modifies the {@code DirectPipelineRunner} instance and
-   * returns itself.
-   */
-  public DirectPipelineRunner withUnorderednessTesting(boolean enable) {
-    this.testUnorderedness = enable;
-    return this;
-  }
-
-  @Override
-  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
-      PTransform<InputT, OutputT> transform, InputT input) {
-    if (transform instanceof Combine.GroupedValues) {
-      return (OutputT) applyTestCombine((Combine.GroupedValues) transform, (PCollection) input);
-    } else if (transform instanceof TextIO.Write.Bound) {
-      return (OutputT) applyTextIOWrite((TextIO.Write.Bound) transform, (PCollection<?>) input);
-    } else if (transform instanceof AvroIO.Write.Bound) {
-      return (OutputT) applyAvroIOWrite((AvroIO.Write.Bound) transform, (PCollection<?>) input);
-    } else {
-      return super.apply(transform, input);
-    }
-  }
-
-  private <K, InputT, AccumT, OutputT> PCollection<KV<K, OutputT>> applyTestCombine(
-      Combine.GroupedValues<K, InputT, OutputT> transform,
-      PCollection<KV<K, Iterable<InputT>>> input) {
-
-    PCollection<KV<K, OutputT>> output = input
-        .apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability, rand))
-            .withSideInputs(transform.getSideInputs()));
-
-    try {
-      output.setCoder(transform.getDefaultOutputCoder(input));
-    } catch (CannotProvideCoderException exc) {
-      // let coder inference occur later, if it can
-    }
-    return output;
-  }
-
-  private static class ElementProcessingOrderPartitionFn<T> implements PartitionFn<T> {
-    private int elementNumber;
-    @Override
-    public int partitionFor(T elem, int numPartitions) {
-      return elementNumber++ % numPartitions;
-    }
-  }
-
-  /**
-   * Applies TextIO.Write honoring user requested sharding controls (i.e. withNumShards)
-   * by applying a partition function based upon the number of shards the user requested.
-   */
-  private static class DirectTextIOWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private final TextIO.Write.Bound<T> transform;
-
-    private DirectTextIOWrite(TextIO.Write.Bound<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      checkState(transform.getNumShards() > 1,
-          "DirectTextIOWrite is expected to only be used when sharding controls are required.");
-
-      // Evenly distribute all the elements across the partitions.
-      PCollectionList<T> partitionedElements =
-          input.apply(Partition.of(transform.getNumShards(),
-                                   new ElementProcessingOrderPartitionFn<T>()));
-
-      // For each input PCollection partition, create a write transform that represents
-      // one of the specific shards.
-      for (int i = 0; i < transform.getNumShards(); ++i) {
-        /*
-         * This logic mirrors the file naming strategy within
-         * {@link FileBasedSink#generateDestinationFilenames()}
-         */
-        String outputFilename = IOChannelUtils.constructName(
-            transform.getFilenamePrefix(),
-            transform.getShardNameTemplate(),
-            getFileExtension(transform.getFilenameSuffix()),
-            i,
-            transform.getNumShards());
-
-        String transformName = String.format("%s(Shard:%s)", transform.getName(), i);
-        partitionedElements.get(i).apply(transformName,
-            transform.withNumShards(1).withShardNameTemplate("").withSuffix("").to(outputFilename));
-      }
-      return PDone.in(input.getPipeline());
-    }
-  }
-
-  /**
-   * Returns the file extension to be used. If the user did not request a file
-   * extension then this method returns the empty string. Otherwise this method
-   * adds a {@code "."} to the beginning of the users extension if one is not present.
-   *
-   * <p>This is copied from {@link FileBasedSink} to not expose it.
-   */
-  private static String getFileExtension(String usersExtension) {
-    if (usersExtension == null || usersExtension.isEmpty()) {
-      return "";
-    }
-    if (usersExtension.startsWith(".")) {
-      return usersExtension;
-    }
-    return "." + usersExtension;
-  }
-
-  /**
-   * Apply the override for TextIO.Write.Bound if the user requested sharding controls
-   * greater than one.
-   */
-  private <T> PDone applyTextIOWrite(TextIO.Write.Bound<T> transform, PCollection<T> input) {
-    if (transform.getNumShards() <= 1) {
-      // By default, the DirectPipelineRunner outputs to only 1 shard. Since the user never
-      // requested sharding controls greater than 1, we default to outputting to 1 file.
-      return super.apply(transform.withNumShards(1), input);
-    }
-    return input.apply(new DirectTextIOWrite<>(transform));
-  }
-
-  /**
-   * Applies AvroIO.Write honoring user requested sharding controls (i.e. withNumShards)
-   * by applying a partition function based upon the number of shards the user requested.
-   */
-  private static class DirectAvroIOWrite<T> extends PTransform<PCollection<T>, PDone> {
-    private final AvroIO.Write.Bound<T> transform;
-
-    private DirectAvroIOWrite(AvroIO.Write.Bound<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      checkState(transform.getNumShards() > 1,
-          "DirectAvroIOWrite is expected to only be used when sharding controls are required.");
-
-      // Evenly distribute all the elements across the partitions.
-      PCollectionList<T> partitionedElements =
-          input.apply(Partition.of(transform.getNumShards(),
-                                   new ElementProcessingOrderPartitionFn<T>()));
-
-      // For each input PCollection partition, create a write transform that represents
-      // one of the specific shards.
-      for (int i = 0; i < transform.getNumShards(); ++i) {
-        /*
-         * This logic mirrors the file naming strategy within
-         * {@link FileBasedSink#generateDestinationFilenames()}
-         */
-        String outputFilename = IOChannelUtils.constructName(
-            transform.getFilenamePrefix(),
-            transform.getShardNameTemplate(),
-            getFileExtension(transform.getFilenameSuffix()),
-            i,
-            transform.getNumShards());
-
-        String transformName = String.format("%s(Shard:%s)", transform.getName(), i);
-        partitionedElements.get(i).apply(transformName,
-            transform.withNumShards(1).withShardNameTemplate("").withSuffix("").to(outputFilename));
-      }
-      return PDone.in(input.getPipeline());
-    }
-  }
-
-  /**
-   * Apply the override for AvroIO.Write.Bound if the user requested sharding controls
-   * greater than one.
-   */
-  private <T> PDone applyAvroIOWrite(AvroIO.Write.Bound<T> transform, PCollection<T> input) {
-    if (transform.getNumShards() <= 1) {
-      // By default, the DirectPipelineRunner outputs to only 1 shard. Since the user never
-      // requested sharding controls greater than 1, we default to outputting to 1 file.
-      return super.apply(transform.withNumShards(1), input);
-    }
-    return input.apply(new DirectAvroIOWrite<>(transform));
-  }
-
-  /**
-   * The implementation may split the {@link KeyedCombineFn} into ADD, MERGE and EXTRACT phases (
-   * see {@code com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}). In order to emulate
-   * this for the {@link DirectPipelineRunner} and provide an experience closer to the service, go
-   * through heavy serializability checks for the equivalent of the results of the ADD phase, but
-   * after the {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} shuffle, and the MERGE
-   * phase. Doing these checks ensure that not only is the accumulator coder serializable, but
-   * the accumulator coder can actually serialize the data in question.
-   */
-  public static class TestCombineDoFn<K, InputT, AccumT, OutputT>
-      extends DoFn<KV<K, Iterable<InputT>>, KV<K, OutputT>> {
-    private final PerKeyCombineFnRunner<? super K, ? super InputT, AccumT, OutputT> fnRunner;
-    private final Coder<AccumT> accumCoder;
-    private final boolean testSerializability;
-    private final Random rand;
-
-    public static <K, InputT, AccumT, OutputT> TestCombineDoFn<K, InputT, AccumT, OutputT> create(
-        Combine.GroupedValues<K, InputT, OutputT> transform,
-        PCollection<KV<K, Iterable<InputT>>> input,
-        boolean testSerializability,
-        Random rand) {
-
-      AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn = transform.getAppliedFn(
-          input.getPipeline().getCoderRegistry(), input.getCoder(), input.getWindowingStrategy());
-
-      return new TestCombineDoFn(
-          PerKeyCombineFnRunners.create(fn.getFn()),
-          fn.getAccumulatorCoder(),
-          testSerializability,
-          rand);
-    }
-
-    public TestCombineDoFn(
-        PerKeyCombineFnRunner<? super K, ? super InputT, AccumT, OutputT> fnRunner,
-        Coder<AccumT> accumCoder,
-        boolean testSerializability,
-        Random rand) {
-      this.fnRunner = fnRunner;
-      this.accumCoder = accumCoder;
-      this.testSerializability = testSerializability;
-      this.rand = rand;
-
-      // Check that this does not crash, specifically to catch anonymous CustomCoder subclasses.
-      this.accumCoder.getEncodingId();
-    }
-
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      K key = c.element().getKey();
-      Iterable<InputT> values = c.element().getValue();
-      List<AccumT> groupedPostShuffle =
-          ensureSerializableByCoder(ListCoder.of(accumCoder),
-              addInputsRandomly(fnRunner, key, values, rand, c),
-              "After addInputs of KeyedCombineFn " + fnRunner.fn().toString());
-      AccumT merged =
-          ensureSerializableByCoder(accumCoder,
-            fnRunner.mergeAccumulators(key, groupedPostShuffle, c),
-            "After mergeAccumulators of KeyedCombineFn " + fnRunner.fn().toString());
-      // Note: The serializability of KV<K, OutputT> is ensured by the
-      // runner itself, since it's a transform output.
-      c.output(KV.of(key, fnRunner.extractOutput(key, merged, c)));
-    }
-
-    /**
-     * Create a random list of accumulators from the given list of values.
-     *
-     * <p>Visible for testing purposes only.
-     */
-    public static <K, AccumT, InputT> List<AccumT> addInputsRandomly(
-        PerKeyCombineFnRunner<? super K, ? super InputT, AccumT, ?> fnRunner,
-        K key,
-        Iterable<InputT> values,
-        Random random,
-        DoFn<?, ?>.ProcessContext c) {
-      List<AccumT> out = new ArrayList<AccumT>();
-      int i = 0;
-      AccumT accumulator = fnRunner.createAccumulator(key, c);
-      boolean hasInput = false;
-
-      for (InputT value : values) {
-        accumulator = fnRunner.addInput(key, accumulator, value, c);
-        hasInput = true;
-
-        // For each index i, flip a 1/2^i weighted coin for whether to
-        // create a new accumulator after index i is added, i.e. [0]
-        // is guaranteed, [1] is an even 1/2, [2] is 1/4, etc. The
-        // goal is to partition the inputs into accumulators, and make
-        // the accumulators potentially lumpy.  Also compact about half
-        // of the accumulators.
-        if (i == 0 || random.nextInt(1 << Math.min(i, 30)) == 0) {
-          if (i % 2 == 0) {
-            accumulator = fnRunner.compact(key, accumulator, c);
-          }
-          out.add(accumulator);
-          accumulator = fnRunner.createAccumulator(key, c);
-          hasInput = false;
-        }
-        i++;
-      }
-      if (hasInput) {
-        out.add(accumulator);
-      }
-
-      Collections.shuffle(out, random);
-      return out;
-    }
-
-    public <T> T ensureSerializableByCoder(
-        Coder<T> coder, T value, String errorContext) {
-      if (testSerializability) {
-        return SerializableUtils.ensureSerializableByCoder(
-            coder, value, errorContext);
-      }
-      return value;
-    }
-  }
-
-  @Override
-  public EvaluationResults run(Pipeline pipeline) {
-    LOG.info("Executing pipeline using the DirectPipelineRunner.");
-
-    Evaluator evaluator = new Evaluator(rand);
-    evaluator.run(pipeline);
-
-    // Log all counter values for debugging purposes.
-    for (Counter counter : evaluator.getCounters()) {
-      LOG.info("Final aggregator value: {}", counter);
-    }
-
-    LOG.info("Pipeline execution complete.");
-
-    return evaluator;
-  }
-
-  /**
-   * An evaluator of a PTransform.
-   */
-  public interface TransformEvaluator<TransformT extends PTransform> {
-    public void evaluate(TransformT transform,
-                         EvaluationContext context);
-  }
-
-  /**
-   * The interface provided to registered callbacks for interacting
-   * with the {@code DirectPipelineRunner}, including reading and writing the
-   * values of {@link PCollection}s and {@link PCollectionView}s.
-   */
-  public interface EvaluationResults extends PipelineResult {
-    /**
-     * Retrieves the value of the given PCollection.
-     * Throws an exception if the PCollection's value hasn't already been set.
-     */
-    <T> List<T> getPCollection(PCollection<T> pc);
-
-    /**
-     * Retrieves the windowed value of the given PCollection.
-     * Throws an exception if the PCollection's value hasn't already been set.
-     */
-    <T> List<WindowedValue<T>> getPCollectionWindowedValues(PCollection<T> pc);
-
-    /**
-     * Retrieves the values of each PCollection in the given
-     * PCollectionList. Throws an exception if the PCollectionList's
-     * value hasn't already been set.
-     */
-    <T> List<List<T>> getPCollectionList(PCollectionList<T> pcs);
-
-    /**
-     * Retrieves the values indicated by the given {@link PCollectionView}.
-     * Note that within the {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context}
-     * implementation a {@link PCollectionView} should convert from this representation to a
-     * suitable side input value.
-     */
-    <T, WindowedT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view);
-  }
-
-  /**
-   * An immutable (value, timestamp) pair, along with other metadata necessary
-   * for the implementation of {@code DirectPipelineRunner}.
-   */
-  public static class ValueWithMetadata<V> {
-    /**
-     * Returns a new {@code ValueWithMetadata} with the {@code WindowedValue}.
-     * Key is null.
-     */
-    public static <V> ValueWithMetadata<V> of(WindowedValue<V> windowedValue) {
-      return new ValueWithMetadata<>(windowedValue, null);
-    }
-
-    /**
-     * Returns a new {@code ValueWithMetadata} with the implicit key associated
-     * with this value set.  The key is the last key grouped by in the chain of
-     * productions that produced this element.
-     * These keys are used internally by {@link DirectPipelineRunner} for keeping
-     * persisted state separate across keys.
-     */
-    public ValueWithMetadata<V> withKey(Object key) {
-      return new ValueWithMetadata<>(windowedValue, key);
-    }
-
-    /**
-     * Returns a new {@code ValueWithMetadata} that is a copy of this one, but with
-     * a different value.
-     */
-    public <T> ValueWithMetadata<T> withValue(T value) {
-      return new ValueWithMetadata(windowedValue.withValue(value), getKey());
-    }
-
-    /**
-     * Returns the {@code WindowedValue} associated with this element.
-     */
-    public WindowedValue<V> getWindowedValue() {
-      return windowedValue;
-    }
-
-    /**
-     * Returns the value associated with this element.
-     *
-     * @see #withValue
-     */
-    public V getValue() {
-      return windowedValue.getValue();
-    }
-
-    /**
-     * Returns the timestamp associated with this element.
-     */
-    public Instant getTimestamp() {
-      return windowedValue.getTimestamp();
-    }
-
-    /**
-     * Returns the collection of windows this element has been placed into.  May
-     * be null if the {@code PCollection} this element is in has not yet been
-     * windowed.
-     *
-     * @see #getWindows()
-     */
-    public Collection<? extends BoundedWindow> getWindows() {
-      return windowedValue.getWindows();
-    }
-
-
-    /**
-     * Returns the key associated with this element.  May be null if the
-     * {@code PCollection} this element is in is not keyed.
-     *
-     * @see #withKey
-     */
-    public Object getKey() {
-      return key;
-    }
-
-    ////////////////////////////////////////////////////////////////////////////
-
-  private final Object key;
-    private final WindowedValue<V> windowedValue;
-
-    private ValueWithMetadata(WindowedValue<V> windowedValue,
-                              Object key) {
-      this.windowedValue = windowedValue;
-      this.key = key;
-    }
-  }
-
-  /**
-   * The interface provided to registered callbacks for interacting
-   * with the {@code DirectPipelineRunner}, including reading and writing the
-   * values of {@link PCollection}s and {@link PCollectionView}s.
-   */
-  public interface EvaluationContext extends EvaluationResults {
-    /**
-     * Returns the configured pipeline options.
-     */
-    DirectPipelineOptions getPipelineOptions();
-
-    /**
-     * Returns the input of the currently being processed transform.
-     */
-    <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform);
-
-    /**
-     * Returns the output of the currently being processed transform.
-     */
-    <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform);
-
-    /**
-     * Sets the value of the given PCollection, where each element also has a timestamp
-     * and collection of windows.
-     * Throws an exception if the PCollection's value has already been set.
-     */
-    <T> void setPCollectionValuesWithMetadata(
-        PCollection<T> pc, List<ValueWithMetadata<T>> elements);
-
-    /**
-     * Sets the value of the given PCollection, where each element also has a timestamp
-     * and collection of windows.
-     * Throws an exception if the PCollection's value has already been set.
-     */
-    <T> void setPCollectionWindowedValue(PCollection<T> pc, List<WindowedValue<T>> elements);
-
-    /**
-     * Shorthand for setting the value of a PCollection where the elements do not have
-     * timestamps or windows.
-     * Throws an exception if the PCollection's value has already been set.
-     */
-    <T> void setPCollection(PCollection<T> pc, List<T> elements);
-
-    /**
-     * Retrieves the value of the given PCollection, along with element metadata
-     * such as timestamps and windows.
-     * Throws an exception if the PCollection's value hasn't already been set.
-     */
-    <T> List<ValueWithMetadata<T>> getPCollectionValuesWithMetadata(PCollection<T> pc);
-
-    /**
-     * Sets the value associated with the given {@link PCollectionView}.
-     * Throws an exception if the {@link PCollectionView}'s value has already been set.
-     */
-    <ElemT, T, WindowedT> void setPCollectionView(
-        PCollectionView<T> pc,
-        Iterable<WindowedValue<ElemT>> value);
-
-    /**
-     * Ensures that the element is encodable and decodable using the
-     * TypePValue's coder, by encoding it and decoding it, and
-     * returning the result.
-     */
-    <T> T ensureElementEncodable(TypedPValue<T> pvalue, T element);
-
-    /**
-     * If the evaluation context is testing unorderedness,
-     * randomly permutes the order of the elements, in a
-     * copy if !inPlaceAllowed, and returns the permuted list,
-     * otherwise returns the argument unchanged.
-     */
-    <T> List<T> randomizeIfUnordered(List<T> elements,
-                                     boolean inPlaceAllowed);
-
-    /**
-     * If the evaluation context is testing serializability, ensures
-     * that the argument function is serializable and deserializable
-     * by encoding it and then decoding it, and returning the result.
-     * Otherwise returns the argument unchanged.
-     */
-    <FunctionT extends Serializable> FunctionT ensureSerializable(FunctionT fn);
-
-    /**
-     * If the evaluation context is testing serializability, ensures
-     * that the argument Coder is serializable and deserializable
-     * by encoding it and then decoding it, and returning the result.
-     * Otherwise returns the argument unchanged.
-     */
-    <T> Coder<T> ensureCoderSerializable(Coder<T> coder);
-
-    /**
-     * If the evaluation context is testing serializability, ensures
-     * that the given data is serializable and deserializable with the
-     * given Coder by encoding it and then decoding it, and returning
-     * the result. Otherwise returns the argument unchanged.
-     *
-     * <p>Error context is prefixed to any thrown exceptions.
-     */
-    <T> T ensureSerializableByCoder(Coder<T> coder,
-                                    T data, String errorContext);
-
-    /**
-     * Returns a mutator, which can be used to add additional counters to
-     * this EvaluationContext.
-     */
-    CounterSet.AddCounterMutator getAddCounterMutator();
-
-    /**
-     * Gets the step name for this transform.
-     */
-    public String getStepName(PTransform<?, ?> transform);
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  class Evaluator implements PipelineVisitor, EvaluationContext {
-    /**
-     * A map from PTransform to the step name of that transform. This is the internal name for the
-     * transform (e.g. "s2").
-     */
-    private final Map<PTransform<?, ?>, String> stepNames = new HashMap<>();
-    private final Map<PValue, Object> store = new HashMap<>();
-    private final CounterSet counters = new CounterSet();
-    private AppliedPTransform<?, ?, ?> currentTransform;
-
-    private Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps = null;
-
-    /**
-     * A map from PTransform to the full name of that transform. This is the user name of the
-     * transform (e.g. "RemoveDuplicates/Combine/GroupByKey").
-     */
-    private final Map<PTransform<?, ?>, String> fullNames = new HashMap<>();
-
-    private Random rand;
-
-    public Evaluator() {
-      this(new Random());
-    }
-
-    public Evaluator(Random rand) {
-      this.rand = rand;
-    }
-
-    public void run(Pipeline pipeline) {
-      pipeline.traverseTopologically(this);
-      aggregatorSteps = new AggregatorPipelineExtractor(pipeline).getAggregatorSteps();
-    }
-
-    @Override
-    public DirectPipelineOptions getPipelineOptions() {
-      return options;
-    }
-
-    @Override
-    public <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform) {
-      checkArgument(currentTransform != null && currentTransform.getTransform() == transform,
-          "can only be called with current transform");
-      return (InputT) currentTransform.getInput();
-    }
-
-    @Override
-    public <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform) {
-      checkArgument(currentTransform != null && currentTransform.getTransform() == transform,
-          "can only be called with current transform");
-      return (OutputT) currentTransform.getOutput();
-    }
-
-    @Override
-    public void enterCompositeTransform(TransformTreeNode node) {
-    }
-
-    @Override
-    public void leaveCompositeTransform(TransformTreeNode node) {
-    }
-
-    @Override
-    public void visitTransform(TransformTreeNode node) {
-      PTransform<?, ?> transform = node.getTransform();
-      fullNames.put(transform, node.getFullName());
-      TransformEvaluator evaluator =
-          getTransformEvaluator(transform.getClass());
-      if (evaluator == null) {
-        throw new IllegalStateException(
-            "no evaluator registered for " + transform);
-      }
-      LOG.debug("Evaluating {}", transform);
-      currentTransform = AppliedPTransform.of(
-          node.getFullName(), node.getInput(), node.getOutput(), (PTransform) transform);
-      evaluator.evaluate(transform, this);
-      currentTransform = null;
-    }
-
-    @Override
-    public void visitValue(PValue value, TransformTreeNode producer) {
-      LOG.debug("Checking evaluation of {}", value);
-      if (value.getProducingTransformInternal() == null) {
-        throw new RuntimeException(
-            "internal error: expecting a PValue " +
-            "to have a producingTransform");
-      }
-      if (!producer.isCompositeNode()) {
-        // Verify that primitive transform outputs are already computed.
-        getPValue(value);
-      }
-    }
-
-    /**
-     * Sets the value of the given PValue.
-     * Throws an exception if the PValue's value has already been set.
-     */
-    void setPValue(PValue pvalue, Object contents) {
-      if (store.containsKey(pvalue)) {
-        throw new IllegalStateException(
-            "internal error: setting the value of " + pvalue +
-            " more than once");
-      }
-      store.put(pvalue, contents);
-    }
-
-    /**
-     * Retrieves the value of the given PValue.
-     * Throws an exception if the PValue's value hasn't already been set.
-     */
-    Object getPValue(PValue pvalue) {
-      if (!store.containsKey(pvalue)) {
-        throw new IllegalStateException(
-            "internal error: getting the value of " + pvalue +
-            " before it has been computed");
-      }
-      return store.get(pvalue);
-    }
-
-    /**
-     * Convert a list of T to a list of {@code ValueWithMetadata<T>}, with a timestamp of 0
-     * and null windows.
-     */
-    <T> List<ValueWithMetadata<T>> toValueWithMetadata(List<T> values) {
-      List<ValueWithMetadata<T>> result = new ArrayList<>(values.size());
-      for (T value : values) {
-        result.add(ValueWithMetadata.of(WindowedValue.valueInGlobalWindow(value)));
-      }
-      return result;
-    }
-
-    /**
-     * Convert a list of {@code WindowedValue<T>} to a list of {@code ValueWithMetadata<T>}.
-     */
-    <T> List<ValueWithMetadata<T>> toValueWithMetadataFromWindowedValue(
-        List<WindowedValue<T>> values) {
-      List<ValueWithMetadata<T>> result = new ArrayList<>(values.size());
-      for (WindowedValue<T> value : values) {
-        result.add(ValueWithMetadata.of(value));
-      }
-      return result;
-    }
-
-    @Override
-    public <T> void setPCollection(PCollection<T> pc, List<T> elements) {
-      setPCollectionValuesWithMetadata(pc, toValueWithMetadata(elements));
-    }
-
-    @Override
-    public <T> void setPCollectionWindowedValue(
-        PCollection<T> pc, List<WindowedValue<T>> elements) {
-      setPCollectionValuesWithMetadata(pc, toValueWithMetadataFromWindowedValue(elements));
-    }
-
-    @Override
-    public <T> void setPCollectionValuesWithMetadata(
-        PCollection<T> pc, List<ValueWithMetadata<T>> elements) {
-      LOG.debug("Setting {} = {}", pc, elements);
-      ensurePCollectionEncodable(pc, elements);
-      setPValue(pc, elements);
-    }
-
-    @Override
-    public <ElemT, T, WindowedT> void setPCollectionView(
-        PCollectionView<T> view,
-        Iterable<WindowedValue<ElemT>> value) {
-      LOG.debug("Setting {} = {}", view, value);
-      setPValue(view, value);
-    }
-
-    /**
-     * Retrieves the value of the given {@link PCollection}.
-     * Throws an exception if the {@link PCollection}'s value hasn't already been set.
-     */
-    @Override
-    public <T> List<T> getPCollection(PCollection<T> pc) {
-      List<T> result = new ArrayList<>();
-      for (ValueWithMetadata<T> elem : getPCollectionValuesWithMetadata(pc)) {
-        result.add(elem.getValue());
-      }
-      return result;
-    }
-
-    @Override
-    public <T> List<WindowedValue<T>> getPCollectionWindowedValues(PCollection<T> pc) {
-      return Lists.transform(
-          getPCollectionValuesWithMetadata(pc),
-          new Function<ValueWithMetadata<T>, WindowedValue<T>>() {
-            @Override
-            public WindowedValue<T> apply(ValueWithMetadata<T> input) {
-              return input.getWindowedValue();
-            }});
-    }
-
-    @Override
-    public <T> List<ValueWithMetadata<T>> getPCollectionValuesWithMetadata(PCollection<T> pc) {
-      List<ValueWithMetadata<T>> elements = (List<ValueWithMetadata<T>>) getPValue(pc);
-      elements = randomizeIfUnordered(elements, false /* not inPlaceAllowed */);
-      LOG.debug("Getting {} = {}", pc, elements);
-      return elements;
-    }
-
-    @Override
-    public <T> List<List<T>> getPCollectionList(PCollectionList<T> pcs) {
-      List<List<T>> elementsList = new ArrayList<>();
-      for (PCollection<T> pc : pcs.getAll()) {
-        elementsList.add(getPCollection(pc));
-      }
-      return elementsList;
-    }
-
-    /**
-     * Retrieves the value indicated by the given {@link PCollectionView}.
-     * Note that within the {@link DoFnContext} a {@link PCollectionView}
-     * converts from this representation to a suitable side input value.
-     */
-    @Override
-    public <T, WindowedT> Iterable<WindowedValue<?>> getPCollectionView(PCollectionView<T> view) {
-      Iterable<WindowedValue<?>> value = (Iterable<WindowedValue<?>>) getPValue(view);
-      LOG.debug("Getting {} = {}", view, value);
-      return value;
-    }
-
-    /**
-     * If {@code testEncodability}, ensures that the {@link PCollection}'s coder and elements are
-     * encodable and decodable by encoding them and decoding them, and returning the result.
-     * Otherwise returns the argument elements.
-     */
-    <T> List<ValueWithMetadata<T>> ensurePCollectionEncodable(
-        PCollection<T> pc, List<ValueWithMetadata<T>> elements) {
-      ensureCoderSerializable(pc.getCoder());
-      if (!testEncodability) {
-        return elements;
-      }
-      List<ValueWithMetadata<T>> elementsCopy = new ArrayList<>(elements.size());
-      for (ValueWithMetadata<T> element : elements) {
-        elementsCopy.add(
-            element.withValue(ensureElementEncodable(pc, element.getValue())));
-      }
-      return elementsCopy;
-    }
-
-    @Override
-    public <T> T ensureElementEncodable(TypedPValue<T> pvalue, T element) {
-      return ensureSerializableByCoder(
-          pvalue.getCoder(), element, "Within " + pvalue.toString());
-    }
-
-    @Override
-    public <T> List<T> randomizeIfUnordered(List<T> elements,
-                                            boolean inPlaceAllowed) {
-      if (!testUnorderedness) {
-        return elements;
-      }
-      List<T> elementsCopy = new ArrayList<>(elements);
-      Collections.shuffle(elementsCopy, rand);
-      return elementsCopy;
-    }
-
-    @Override
-    public <FunctionT extends Serializable> FunctionT ensureSerializable(FunctionT fn) {
-      if (!testSerializability) {
-        return fn;
-      }
-      return SerializableUtils.ensureSerializable(fn);
-    }
-
-    @Override
-    public <T> Coder<T> ensureCoderSerializable(Coder<T> coder) {
-      if (testSerializability) {
-        SerializableUtils.ensureSerializable(coder);
-      }
-      return coder;
-    }
-
-    @Override
-    public <T> T ensureSerializableByCoder(
-        Coder<T> coder, T value, String errorContext) {
-      if (testSerializability) {
-        return SerializableUtils.ensureSerializableByCoder(
-            coder, value, errorContext);
-      }
-      return value;
-    }
-
-    @Override
-    public CounterSet.AddCounterMutator getAddCounterMutator() {
-      return counters.getAddCounterMutator();
-    }
-
-    @Override
-    public String getStepName(PTransform<?, ?> transform) {
-      String stepName = stepNames.get(transform);
-      if (stepName == null) {
-        stepName = "s" + (stepNames.size() + 1);
-        stepNames.put(transform, stepName);
-      }
-      return stepName;
-    }
-
-    /**
-     * Returns the CounterSet generated during evaluation, which includes
-     * user-defined Aggregators and may include system-defined counters.
-     */
-    public CounterSet getCounters() {
-      return counters;
-    }
-
-    /**
-     * Returns JobState.DONE in all situations. The Evaluator is not returned
-     * until the pipeline has been traversed, so it will either be returned
-     * after a successful run or the run call will terminate abnormally.
-     */
-    @Override
-    public State getState() {
-      return State.DONE;
-    }
-
-    @Override
-    public <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator) {
-      Map<String, T> stepValues = new HashMap<>();
-      for (PTransform<?, ?> step : aggregatorSteps.get(aggregator)) {
-        String stepName = String.format("user-%s-%s", stepNames.get(step), aggregator.getName());
-        String fullName = fullNames.get(step);
-        Counter<?> counter = counters.getExistingCounter(stepName);
-        if (counter == null) {
-          throw new IllegalArgumentException(
-              "Aggregator " + aggregator + " is not used in this pipeline");
-        }
-        stepValues.put(fullName, (T) counter.getAggregate());
-      }
-      return new MapAggregatorValues<>(stepValues);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private final DirectPipelineOptions options;
-  private boolean testSerializability;
-  private boolean testEncodability;
-  private boolean testUnorderedness;
-
-  /** Returns a new DirectPipelineRunner. */
-  private DirectPipelineRunner(DirectPipelineOptions options) {
-    this.options = options;
-    // (Re-)register standard IO factories. Clobbers any prior credentials.
-    IOChannelUtils.registerStandardIOFactories(options);
-    long randomSeed;
-    if (options.getDirectPipelineRunnerRandomSeed() != null) {
-      randomSeed = options.getDirectPipelineRunnerRandomSeed();
-    } else {
-      randomSeed = new Random().nextLong();
-    }
-
-    LOG.debug("DirectPipelineRunner using random seed {}.", randomSeed);
-    rand = new Random(randomSeed);
-
-    testSerializability = options.isTestSerializability();
-    testEncodability = options.isTestEncodability();
-    testUnorderedness = options.isTestUnorderedness();
-  }
-
-  /**
-   * Get the options used in this {@link Pipeline}.
-   */
-  public DirectPipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  @Override
-  public String toString() {
-    return "DirectPipelineRunner#" + hashCode();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
deleted file mode 100644
index 26d8e1e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunner.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.common.base.Preconditions;
-
-/**
- * A {@link PipelineRunner} can execute, translate, or otherwise process a
- * {@link Pipeline}.
- *
- * @param <ResultT> the type of the result of {@link #run}.
- */
-public abstract class PipelineRunner<ResultT extends PipelineResult> {
-
-  /**
-   * Constructs a runner from the provided options.
-   *
-   * @return The newly created runner.
-   */
-  public static PipelineRunner<? extends PipelineResult> fromOptions(PipelineOptions options) {
-    GcsOptions gcsOptions = PipelineOptionsValidator.validate(GcsOptions.class, options);
-    Preconditions.checkNotNull(options);
-
-    // (Re-)register standard IO factories. Clobbers any prior credentials.
-    IOChannelUtils.registerStandardIOFactories(gcsOptions);
-
-    @SuppressWarnings("unchecked")
-    PipelineRunner<? extends PipelineResult> result =
-        InstanceBuilder.ofType(PipelineRunner.class)
-        .fromClass(options.getRunner())
-        .fromFactoryMethod("fromOptions")
-        .withArg(PipelineOptions.class, options)
-        .build();
-    return result;
-  }
-
-  /**
-   * Processes the given Pipeline, returning the results.
-   */
-  public abstract ResultT run(Pipeline pipeline);
-
-  /**
-   * Applies a transform to the given input, returning the output.
-   *
-   * <p>The default implementation calls PTransform.apply(input), but can be overridden
-   * to customize behavior for a particular runner.
-   */
-  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
-      PTransform<InputT, OutputT> transform, InputT input) {
-    return transform.apply(input);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
deleted file mode 100644
index 1ca3346..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/PipelineRunnerRegistrar.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.auto.service.AutoService;
-import java.util.ServiceLoader;
-
-/**
- * {@link PipelineRunner} creators have the ability to automatically have their
- * {@link PipelineRunner} registered with this SDK by creating a {@link ServiceLoader} entry
- * and a concrete implementation of this interface.
- *
- * <p>Note that automatic registration of any
- * {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} requires users
- * conform to the limit that each {@link PipelineRunner}'s
- * {@link Class#getSimpleName() simple name} must be unique.
- *
- * <p>It is optional but recommended to use one of the many build time tools such as
- * {@link AutoService} to generate the necessary META-INF files automatically.
- */
-public interface PipelineRunnerRegistrar {
-  /**
-   * Get the set of {@link PipelineRunner PipelineRunners} to register.
-   */
-  public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
deleted file mode 100644
index ca02b39..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/RecordingPipelineVisitor.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.values.PValue;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Provides a simple {@link com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor}
- * that records the transformation tree.
- *
- * <p>Provided for internal unit tests.
- */
-public class RecordingPipelineVisitor implements Pipeline.PipelineVisitor {
-
-  public final List<PTransform<?, ?>> transforms = new ArrayList<>();
-  public final List<PValue> values = new ArrayList<>();
-
-  @Override
-  public void enterCompositeTransform(TransformTreeNode node) {
-  }
-
-  @Override
-  public void leaveCompositeTransform(TransformTreeNode node) {
-  }
-
-  @Override
-  public void visitTransform(TransformTreeNode node) {
-    transforms.add(node.getTransform());
-  }
-
-  @Override
-  public void visitValue(PValue value, TransformTreeNode producer) {
-    values.add(value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
deleted file mode 100644
index d62192d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformHierarchy.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.base.Preconditions;
-
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Captures information about a collection of transformations and their
- * associated {@link PValue}s.
- */
-public class TransformHierarchy {
-  private final Deque<TransformTreeNode> transformStack = new LinkedList<>();
-  private final Map<PInput, TransformTreeNode> producingTransformNode = new HashMap<>();
-
-  /**
-   * Create a {@code TransformHierarchy} containing a root node.
-   */
-  public TransformHierarchy() {
-    // First element in the stack is the root node, holding all child nodes.
-    transformStack.add(new TransformTreeNode(null, null, "", null));
-  }
-
-  /**
-   * Returns the last TransformTreeNode on the stack.
-   */
-  public TransformTreeNode getCurrent() {
-    return transformStack.peek();
-  }
-
-  /**
-   * Add a TransformTreeNode to the stack.
-   */
-  public void pushNode(TransformTreeNode current) {
-    transformStack.push(current);
-  }
-
-  /**
-   * Removes the last TransformTreeNode from the stack.
-   */
-  public void popNode() {
-    transformStack.pop();
-    Preconditions.checkState(!transformStack.isEmpty());
-  }
-
-  /**
-   * Adds an input to the given node.
-   *
-   * <p>This forces the producing node to be finished.
-   */
-  public void addInput(TransformTreeNode node, PInput input) {
-    for (PValue i : input.expand()) {
-      TransformTreeNode producer = producingTransformNode.get(i);
-      if (producer == null) {
-        throw new IllegalStateException("Producer unknown for input: " + i);
-      }
-
-      producer.finishSpecifying();
-      node.addInputProducer(i, producer);
-    }
-  }
-
-  /**
-   * Sets the output of a transform node.
-   */
-  public void setOutput(TransformTreeNode producer, POutput output) {
-    producer.setOutput(output);
-
-    for (PValue o : output.expand()) {
-      producingTransformNode.put(o, producer);
-    }
-  }
-
-  /**
-   * Visits all nodes in the transform hierarchy, in transitive order.
-   */
-  public void visit(Pipeline.PipelineVisitor visitor,
-                    Set<PValue> visitedNodes) {
-    transformStack.peekFirst().visit(visitor, visitedNodes);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
deleted file mode 100644
index 2649458..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/TransformTreeNode.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.base.Preconditions;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * Provides internal tracking of transform relationships with helper methods
- * for initialization and ordered visitation.
- */
-public class TransformTreeNode {
-  private final TransformTreeNode enclosingNode;
-
-  // The PTransform for this node, which may be a composite PTransform.
-  // The root of a TransformHierarchy is represented as a TransformTreeNode
-  // with a null transform field.
-  private final PTransform<?, ?> transform;
-
-  private final String fullName;
-
-  // Nodes for sub-transforms of a composite transform.
-  private final Collection<TransformTreeNode> parts = new ArrayList<>();
-
-  // Inputs to the transform, in expanded form and mapped to the producer
-  // of the input.
-  private final Map<PValue, TransformTreeNode> inputs = new HashMap<>();
-
-  // Input to the transform, in unexpanded form.
-  private final PInput input;
-
-  // TODO: track which outputs need to be exported to parent.
-  // Output of the transform, in unexpanded form.
-  private POutput output;
-
-  private boolean finishedSpecifying = false;
-
-  /**
-   * Creates a new TransformTreeNode with the given parent and transform.
-   *
-   * <p>EnclosingNode and transform may both be null for
-   * a root-level node, which holds all other nodes.
-   *
-   * @param enclosingNode the composite node containing this node
-   * @param transform the PTransform tracked by this node
-   * @param fullName the fully qualified name of the transform
-   * @param input the unexpanded input to the transform
-   */
-  public TransformTreeNode(@Nullable TransformTreeNode enclosingNode,
-                           @Nullable PTransform<?, ?> transform,
-                           String fullName,
-                           @Nullable PInput input) {
-    this.enclosingNode = enclosingNode;
-    this.transform = transform;
-    Preconditions.checkArgument((enclosingNode == null && transform == null)
-        || (enclosingNode != null && transform != null),
-        "EnclosingNode and transform must both be specified, or both be null");
-    this.fullName = fullName;
-    this.input = input;
-  }
-
-  /**
-   * Returns the transform associated with this transform node.
-   */
-  public PTransform<?, ?> getTransform() {
-    return transform;
-  }
-
-  /**
-   * Returns the enclosing composite transform node, or null if there is none.
-   */
-  public TransformTreeNode getEnclosingNode() {
-    return enclosingNode;
-  }
-
-  /**
-   * Adds a composite operation to the transform node.
-   *
-   * <p>As soon as a node is added, the transform node is considered a
-   * composite operation instead of a primitive transform.
-   */
-  public void addComposite(TransformTreeNode node) {
-    parts.add(node);
-  }
-
-  /**
-   * Returns true if this node represents a composite transform that does not perform
-   * processing of its own, but merely encapsulates a sub-pipeline (which may be empty).
-   *
-   * <p>Note that a node may be composite with no sub-transforms if it  returns its input directly
-   * extracts a component of a tuple, or other operations that occur at pipeline assembly time.
-   */
-  public boolean isCompositeNode() {
-    return !parts.isEmpty() || returnsOthersOutput() || isRootNode();
-  }
-
-  private boolean returnsOthersOutput() {
-    PTransform<?, ?> transform = getTransform();
-    for (PValue output : getExpandedOutputs()) {
-      if (!output.getProducingTransformInternal().getTransform().equals(transform)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  public boolean isRootNode() {
-    return transform == null;
-  }
-
-  public String getFullName() {
-    return fullName;
-  }
-
-  /**
-   * Adds an input to the transform node.
-   */
-  public void addInputProducer(PValue expandedInput, TransformTreeNode producer) {
-    Preconditions.checkState(!finishedSpecifying);
-    inputs.put(expandedInput, producer);
-  }
-
-  /**
-   * Returns the transform input, in unexpanded form.
-   */
-  public PInput getInput() {
-    return input;
-  }
-
-  /**
-   * Returns a mapping of inputs to the producing nodes for all inputs to
-   * the transform.
-   */
-  public Map<PValue, TransformTreeNode> getInputs() {
-    return Collections.unmodifiableMap(inputs);
-  }
-
-  /**
-   * Adds an output to the transform node.
-   */
-  public void setOutput(POutput output) {
-    Preconditions.checkState(!finishedSpecifying);
-    Preconditions.checkState(this.output == null);
-    this.output = output;
-  }
-
-  /**
-   * Returns the transform output, in unexpanded form.
-   */
-  public POutput getOutput() {
-    return output;
-  }
-
-  /**
-   * Returns the transform outputs, in expanded form.
-   */
-  public Collection<? extends PValue> getExpandedOutputs() {
-    if (output != null) {
-      return output.expand();
-    } else {
-      return Collections.emptyList();
-    }
-  }
-
-  /**
-   * Visit the transform node.
-   *
-   * <p>Provides an ordered visit of the input values, the primitive
-   * transform (or child nodes for composite transforms), then the
-   * output values.
-   */
-  public void visit(Pipeline.PipelineVisitor visitor,
-                    Set<PValue> visitedValues) {
-    if (!finishedSpecifying) {
-      finishSpecifying();
-    }
-
-    // Visit inputs.
-    for (Map.Entry<PValue, TransformTreeNode> entry : inputs.entrySet()) {
-      if (visitedValues.add(entry.getKey())) {
-        visitor.visitValue(entry.getKey(), entry.getValue());
-      }
-    }
-
-    if (isCompositeNode()) {
-      visitor.enterCompositeTransform(this);
-      for (TransformTreeNode child : parts) {
-        child.visit(visitor, visitedValues);
-      }
-      visitor.leaveCompositeTransform(this);
-    } else {
-      visitor.visitTransform(this);
-    }
-
-    // Visit outputs.
-    for (PValue pValue : getExpandedOutputs()) {
-      if (visitedValues.add(pValue)) {
-        visitor.visitValue(pValue, this);
-      }
-    }
-  }
-
-  /**
-   * Finish specifying a transform.
-   *
-   * <p>All inputs are finished first, then the transform, then
-   * all outputs.
-   */
-  public void finishSpecifying() {
-    if (finishedSpecifying) {
-      return;
-    }
-    finishedSpecifying = true;
-
-    for (TransformTreeNode input : inputs.values()) {
-      if (input != null) {
-        input.finishSpecifying();
-      }
-    }
-
-    if (output != null) {
-      output.finishSpecifyingOutput();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java
deleted file mode 100644
index 093783d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/AssignWindows.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-/**
- * A primitive {@link PTransform} that implements the {@link Window#into(WindowFn)}
- * {@link PTransform}.
- *
- * For an application of {@link Window#into(WindowFn)} that changes the {@link WindowFn}, applies
- * a primitive {@link PTransform} in the Dataflow service.
- *
- * For an application of {@link Window#into(WindowFn)} that does not change the {@link WindowFn},
- * applies an identity {@link ParDo} and sets the windowing strategy of the output
- * {@link PCollection}.
- *
- * For internal use only.
- *
- * @param <T> the type of input element
- */
-public class AssignWindows<T> extends PTransform<PCollection<T>, PCollection<T>> {
-  private final Window.Bound<T> transform;
-
-  /**
-   * Builds an instance of this class from the overriden transform.
-   */
-  @SuppressWarnings("unused") // Used via reflection
-  public AssignWindows(Window.Bound<T> transform) {
-    this.transform = transform;
-  }
-
-  @Override
-  public PCollection<T> apply(PCollection<T> input) {
-    WindowingStrategy<?, ?> outputStrategy =
-        transform.getOutputStrategyInternal(input.getWindowingStrategy());
-    if (transform.getWindowFn() != null) {
-      // If the windowFn changed, we create a primitive, and run the AssignWindows operation here.
-      return PCollection.<T>createPrimitiveOutputInternal(
-                            input.getPipeline(), outputStrategy, input.isBounded());
-    } else {
-      // If the windowFn didn't change, we just run a pass-through transform and then set the
-      // new windowing strategy.
-      return input.apply(ParDo.named("Identity").of(new DoFn<T, T>() {
-        @Override
-        public void processElement(DoFn<T, T>.ProcessContext c) throws Exception {
-          c.output(c.element());
-        }
-      })).setWindowingStrategyInternal(outputStrategy);
-    }
-  }
-
-  @Override
-  public void validate(PCollection<T> input) {
-    transform.validate(input);
-  }
-
-  @Override
-  protected Coder<?> getDefaultOutputCoder(PCollection<T> input) {
-    return input.getCoder();
-  }
-
-  @Override
-  protected String getKindString() {
-    return "Window.Into()";
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
deleted file mode 100644
index 538901c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/BigQueryIOTranslator.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.api.client.json.JsonFactory;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-
-/**
- * BigQuery transform support code for the Dataflow backend.
- */
-public class BigQueryIOTranslator {
-  private static final JsonFactory JSON_FACTORY = Transport.getJsonFactory();
-  private static final Logger LOG = LoggerFactory.getLogger(BigQueryIOTranslator.class);
-
-  /**
-   * Implements BigQueryIO Read translation for the Dataflow backend.
-   */
-  public static class ReadTranslator
-      implements DataflowPipelineTranslator.TransformTranslator<BigQueryIO.Read.Bound> {
-
-    @Override
-    public void translate(
-        BigQueryIO.Read.Bound transform, DataflowPipelineTranslator.TranslationContext context) {
-      // Actual translation.
-      context.addStep(transform, "ParallelRead");
-      context.addInput(PropertyNames.FORMAT, "bigquery");
-      context.addInput(PropertyNames.BIGQUERY_EXPORT_FORMAT, "FORMAT_AVRO");
-
-      if (transform.getQuery() != null) {
-        context.addInput(PropertyNames.BIGQUERY_QUERY, transform.getQuery());
-        context.addInput(PropertyNames.BIGQUERY_FLATTEN_RESULTS, transform.getFlattenResults());
-      } else {
-        TableReference table = transform.getTable();
-        if (table.getProjectId() == null) {
-          // If user does not specify a project we assume the table to be located in the project
-          // that owns the Dataflow job.
-          String projectIdFromOptions = context.getPipelineOptions().getProject();
-          LOG.warn(String.format(BigQueryIO.SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
-              table.getDatasetId(), table.getTableId(), projectIdFromOptions));
-          table.setProjectId(projectIdFromOptions);
-        }
-
-        context.addInput(PropertyNames.BIGQUERY_TABLE, table.getTableId());
-        context.addInput(PropertyNames.BIGQUERY_DATASET, table.getDatasetId());
-        if (table.getProjectId() != null) {
-          context.addInput(PropertyNames.BIGQUERY_PROJECT, table.getProjectId());
-        }
-      }
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-    }
-  }
-
-  /**
-   * Implements BigQueryIO Write translation for the Dataflow backend.
-   */
-  public static class WriteTranslator
-      implements DataflowPipelineTranslator.TransformTranslator<BigQueryIO.Write.Bound> {
-
-    @Override
-    public void translate(BigQueryIO.Write.Bound transform,
-                          DataflowPipelineTranslator.TranslationContext context) {
-      if (context.getPipelineOptions().isStreaming()) {
-        // Streaming is handled by the streaming runner.
-        throw new AssertionError(
-            "BigQueryIO is specified to use streaming write in batch mode.");
-      }
-
-      TableReference table = transform.getTable();
-
-      // Actual translation.
-      context.addStep(transform, "ParallelWrite");
-      context.addInput(PropertyNames.FORMAT, "bigquery");
-      context.addInput(PropertyNames.BIGQUERY_TABLE,
-                       table.getTableId());
-      context.addInput(PropertyNames.BIGQUERY_DATASET,
-                       table.getDatasetId());
-      if (table.getProjectId() != null) {
-        context.addInput(PropertyNames.BIGQUERY_PROJECT, table.getProjectId());
-      }
-      if (transform.getSchema() != null) {
-        try {
-          context.addInput(PropertyNames.BIGQUERY_SCHEMA,
-                           JSON_FACTORY.toString(transform.getSchema()));
-        } catch (IOException exn) {
-          throw new IllegalArgumentException("Invalid table schema.", exn);
-        }
-      }
-      context.addInput(
-          PropertyNames.BIGQUERY_CREATE_DISPOSITION,
-          transform.getCreateDisposition().name());
-      context.addInput(
-          PropertyNames.BIGQUERY_WRITE_DISPOSITION,
-          transform.getWriteDisposition().name());
-      // Set sink encoding to TableRowJsonCoder.
-      context.addEncodingInput(
-          WindowedValue.getValueOnlyCoder(TableRowJsonCoder.of()));
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
deleted file mode 100644
index 8160693..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/CustomSources.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import static com.google.api.client.util.Base64.encodeBase64String;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.services.dataflow.model.SourceMetadata;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.Source;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.protobuf.ByteString;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.ArrayList;
-import java.util.List;
-
-
-/**
- * A helper class for supporting sources defined as {@code Source}.
- *
- * <p>Provides a bridge between the high-level {@code Source} API and the
- * low-level {@code CloudSource} class.
- */
-public class CustomSources {
-  private static final String SERIALIZED_SOURCE = "serialized_source";
-  @VisibleForTesting static final String SERIALIZED_SOURCE_SPLITS = "serialized_source_splits";
-  /**
-   * The current limit on the size of a ReportWorkItemStatus RPC to Google Cloud Dataflow, which
-   * includes the initial splits, is 20 MB.
-   */
-  public static final long DATAFLOW_SPLIT_RESPONSE_API_SIZE_BYTES = 20 * (1 << 20);
-
-  private static final Logger LOG = LoggerFactory.getLogger(CustomSources.class);
-
-  private static final ByteString firstSplitKey = ByteString.copyFromUtf8("0000000000000001");
-
-  public static boolean isFirstUnboundedSourceSplit(ByteString splitKey) {
-    return splitKey.equals(firstSplitKey);
-  }
-
-  private static int getDesiredNumUnboundedSourceSplits(DataflowPipelineOptions options) {
-    if (options.getMaxNumWorkers() > 0) {
-      return options.getMaxNumWorkers();
-    } else if (options.getNumWorkers() > 0) {
-      return options.getNumWorkers() * 3;
-    } else {
-      return 20;
-    }
-  }
-
-  public static com.google.api.services.dataflow.model.Source serializeToCloudSource(
-      Source<?> source, PipelineOptions options) throws Exception {
-    com.google.api.services.dataflow.model.Source cloudSource =
-        new com.google.api.services.dataflow.model.Source();
-    // We ourselves act as the SourceFormat.
-    cloudSource.setSpec(CloudObject.forClass(CustomSources.class));
-    addString(
-        cloudSource.getSpec(), SERIALIZED_SOURCE, encodeBase64String(serializeToByteArray(source)));
-
-    SourceMetadata metadata = new SourceMetadata();
-    if (source instanceof BoundedSource) {
-      BoundedSource<?> boundedSource = (BoundedSource<?>) source;
-      try {
-        metadata.setProducesSortedKeys(boundedSource.producesSortedKeys(options));
-      } catch (Exception e) {
-        LOG.warn("Failed to check if the source produces sorted keys: " + source, e);
-      }
-
-      // Size estimation is best effort so we continue even if it fails here.
-      try {
-        metadata.setEstimatedSizeBytes(boundedSource.getEstimatedSizeBytes(options));
-      } catch (Exception e) {
-        LOG.warn("Size estimation of the source failed: " + source, e);
-      }
-    } else if (source instanceof UnboundedSource) {
-      UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) source;
-      metadata.setInfinite(true);
-      List<String> encodedSplits = new ArrayList<>();
-      int desiredNumSplits =
-          getDesiredNumUnboundedSourceSplits(options.as(DataflowPipelineOptions.class));
-      for (UnboundedSource<?, ?> split :
-          unboundedSource.generateInitialSplits(desiredNumSplits, options)) {
-        encodedSplits.add(encodeBase64String(serializeToByteArray(split)));
-      }
-      checkArgument(!encodedSplits.isEmpty(), "UnboundedSources must have at least one split");
-      addStringList(cloudSource.getSpec(), SERIALIZED_SOURCE_SPLITS, encodedSplits);
-    } else {
-      throw new IllegalArgumentException("Unexpected source kind: " + source.getClass());
-    }
-
-    cloudSource.setMetadata(metadata);
-    return cloudSource;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java
deleted file mode 100644
index e1d7301..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowAggregatorTransforms.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.common.collect.BiMap;
-import com.google.common.collect.HashBiMap;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Multimap;
-
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Map;
-
-/**
- * A mapping relating {@link Aggregator}s and the {@link PTransform} in which they are used.
- */
-public class DataflowAggregatorTransforms {
-  private final Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorTransforms;
-  private final Multimap<PTransform<?, ?>, AppliedPTransform<?, ?, ?>> transformAppliedTransforms;
-  private final BiMap<AppliedPTransform<?, ?, ?>, String> appliedStepNames;
-
-  public DataflowAggregatorTransforms(
-      Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorTransforms,
-      Map<AppliedPTransform<?, ?, ?>, String> transformStepNames) {
-    this.aggregatorTransforms = aggregatorTransforms;
-    appliedStepNames = HashBiMap.create(transformStepNames);
-
-    transformAppliedTransforms = HashMultimap.create();
-    for (AppliedPTransform<?, ?, ?> appliedTransform : transformStepNames.keySet()) {
-      transformAppliedTransforms.put(appliedTransform.getTransform(), appliedTransform);
-    }
-  }
-
-  /**
-   * Returns true if the provided {@link Aggregator} is used in the constructing {@link Pipeline}.
-   */
-  public boolean contains(Aggregator<?, ?> aggregator) {
-    return aggregatorTransforms.containsKey(aggregator);
-  }
-
-  /**
-   * Gets the step names in which the {@link Aggregator} is used.
-   */
-  public Collection<String> getAggregatorStepNames(Aggregator<?, ?> aggregator) {
-    Collection<String> names = new HashSet<>();
-    Collection<PTransform<?, ?>> transforms = aggregatorTransforms.get(aggregator);
-    for (PTransform<?, ?> transform : transforms) {
-      for (AppliedPTransform<?, ?, ?> applied : transformAppliedTransforms.get(transform)) {
-        names.add(appliedStepNames.get(applied));
-      }
-    }
-    return names;
-  }
-
-  /**
-   * Gets the {@link PTransform} that was assigned the provided step name.
-   */
-  public AppliedPTransform<?, ?, ?> getAppliedTransformForStepName(String stepName) {
-    return appliedStepNames.inverse().get(stepName);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
deleted file mode 100644
index 13016dd..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/DataflowMetricUpdateExtractor.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.api.services.dataflow.model.MetricStructuredName;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Methods for extracting the values of an {@link Aggregator} from a collection of {@link
- * MetricUpdate MetricUpdates}.
- */
-public final class DataflowMetricUpdateExtractor {
-  private static final String STEP_NAME_CONTEXT_KEY = "step";
-  private static final String IS_TENTATIVE_KEY = "tentative";
-
-  private DataflowMetricUpdateExtractor() {
-    // Do not instantiate.
-  }
-
-  /**
-   * Extract the values of the provided {@link Aggregator} at each {@link PTransform} it was used in
-   * according to the provided {@link DataflowAggregatorTransforms} from the given list of {@link
-   * MetricUpdate MetricUpdates}.
-   */
-  public static <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, OutputT> aggregator,
-      DataflowAggregatorTransforms aggregatorTransforms, List<MetricUpdate> metricUpdates) {
-    Map<String, OutputT> results = new HashMap<>();
-    if (metricUpdates == null) {
-      return results;
-    }
-
-    String aggregatorName = aggregator.getName();
-    Collection<String> aggregatorSteps = aggregatorTransforms.getAggregatorStepNames(aggregator);
-
-    for (MetricUpdate metricUpdate : metricUpdates) {
-      MetricStructuredName metricStructuredName = metricUpdate.getName();
-      Map<String, String> context = metricStructuredName.getContext();
-      if (metricStructuredName.getName().equals(aggregatorName) && context != null
-          && aggregatorSteps.contains(context.get(STEP_NAME_CONTEXT_KEY))) {
-        AppliedPTransform<?, ?, ?> transform =
-            aggregatorTransforms.getAppliedTransformForStepName(
-                context.get(STEP_NAME_CONTEXT_KEY));
-        String fullName = transform.getFullName();
-        // Prefer the tentative (fresher) value if it exists.
-        if (Boolean.parseBoolean(context.get(IS_TENTATIVE_KEY)) || !results.containsKey(fullName)) {
-          results.put(fullName, toValue(aggregator, metricUpdate));
-        }
-      }
-    }
-
-    return results;
-
-  }
-
-  private static <OutputT> OutputT toValue(
-      Aggregator<?, OutputT> aggregator, MetricUpdate metricUpdate) {
-    CombineFn<?, ?, OutputT> combineFn = aggregator.getCombineFn();
-    Class<? super OutputT> outputType = combineFn.getOutputType().getRawType();
-
-    if (outputType.equals(Long.class)) {
-      @SuppressWarnings("unchecked")
-      OutputT asLong = (OutputT) Long.valueOf(toNumber(metricUpdate).longValue());
-      return asLong;
-    }
-    if (outputType.equals(Integer.class)) {
-      @SuppressWarnings("unchecked")
-      OutputT asInt = (OutputT) Integer.valueOf(toNumber(metricUpdate).intValue());
-      return asInt;
-    }
-    if (outputType.equals(Double.class)) {
-      @SuppressWarnings("unchecked")
-      OutputT asDouble = (OutputT) Double.valueOf(toNumber(metricUpdate).doubleValue());
-      return asDouble;
-    }
-    throw new UnsupportedOperationException(
-        "Unsupported Output Type " + outputType + " in aggregator " + aggregator);
-  }
-
-  private static Number toNumber(MetricUpdate update) {
-    if (update.getScalar() instanceof Number) {
-      return (Number) update.getScalar();
-    }
-    throw new IllegalArgumentException(
-        "Metric Update " + update + " does not have a numeric scalar");
-  }
-}
-

[64/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

Directory reorganization

Move Java 8-specific tests from "java8tests" into "sdks/java/java8tests/".


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/d4233aa0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/d4233aa0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/d4233aa0

Branch: refs/heads/master
Commit: d4233aa009375ec9a3e393bcec6b496920a6a54e
Parents: 11bb9e0
Author: Davor Bonaci <da...@google.com>
Authored: Wed Mar 23 17:44:35 2016 -0700
Committer: Davor Bonaci <da...@google.com>
Committed: Wed Mar 23 18:33:33 2016 -0700

----------------------------------------------------------------------
 java8tests/pom.xml                              | 184 -------------------
 .../sdk/transforms/CombineJava8Test.java        | 133 --------------
 .../sdk/transforms/FilterJava8Test.java         | 118 ------------
 .../transforms/FlatMapElementsJava8Test.java    |  84 ---------
 .../sdk/transforms/MapElementsJava8Test.java    |  77 --------
 .../sdk/transforms/PartitionJava8Test.java      |  74 --------
 .../transforms/RemoveDuplicatesJava8Test.java   |  98 ----------
 .../sdk/transforms/WithKeysJava8Test.java       |  73 --------
 .../sdk/transforms/WithTimestampsJava8Test.java |  65 -------
 pom.xml                                         |   2 +-
 sdks/java/java8tests/pom.xml                    | 184 +++++++++++++++++++
 .../sdk/transforms/CombineJava8Test.java        | 133 ++++++++++++++
 .../sdk/transforms/FilterJava8Test.java         | 118 ++++++++++++
 .../transforms/FlatMapElementsJava8Test.java    |  84 +++++++++
 .../sdk/transforms/MapElementsJava8Test.java    |  77 ++++++++
 .../sdk/transforms/PartitionJava8Test.java      |  74 ++++++++
 .../transforms/RemoveDuplicatesJava8Test.java   |  98 ++++++++++
 .../sdk/transforms/WithKeysJava8Test.java       |  73 ++++++++
 .../sdk/transforms/WithTimestampsJava8Test.java |  65 +++++++
 19 files changed, 907 insertions(+), 907 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/pom.xml
----------------------------------------------------------------------
diff --git a/java8tests/pom.xml b/java8tests/pom.xml
deleted file mode 100644
index 1d253dc..0000000
--- a/java8tests/pom.xml
+++ /dev/null
@@ -1,184 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>parent</artifactId>
-    <version>0.1.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>java8tests-all</artifactId>
-  <name>Apache Beam :: Tests :: Java 8 All</name>
-  <description>Apache Beam Java SDK provides a simple, Java-based
-    interface for processing virtually any size data.
-    This artifact includes tests of the SDK from a Java 8
-    user.</description>
-
-  <packaging>jar</packaging>
-
-  <profiles>
-    <profile>
-      <id>DataflowPipelineTests</id>
-      <properties>
-        <runIntegrationTestOnService>true</runIntegrationTestOnService>
-        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
-        <testParallelValue>both</testParallelValue>
-      </properties>
-    </profile>
-  </profiles>
-
-  <build>
-    <plugins>
-      <plugin>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <configuration>
-          <testSource>1.8</testSource>
-          <testTarget>1.8</testTarget>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals><goal>analyze-only</goal></goals>
-            <configuration>
-              <failOnWarning>true</failOnWarning>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>2.12</version>
-        <dependencies>
-          <dependency>
-            <groupId>com.puppycrawl.tools</groupId>
-            <artifactId>checkstyle</artifactId>
-            <version>6.6</version>
-          </dependency>
-        </dependencies>
-        <configuration>
-          <configLocation>../checkstyle.xml</configLocation>
-          <consoleOutput>true</consoleOutput>
-          <failOnViolation>true</failOnViolation>
-          <includeTestSourceDirectory>true</includeTestSourceDirectory>
-          <includeResources>false</includeResources>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>check</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Source plugin for generating source and test-source JARs. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <version>2.4</version>
-        <executions>
-          <execution>
-            <id>attach-sources</id>
-            <phase>compile</phase>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>attach-test-sources</id>
-            <phase>test-compile</phase>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>default-jar</id>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>default-test-jar</id>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Coverage analysis for unit tests. -->
-      <plugin>
-        <groupId>org.jacoco</groupId>
-        <artifactId>jacoco-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>java-sdk-all</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>${guava.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>${joda.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <version>${hamcrest.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>${junit.version}</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
deleted file mode 100644
index b569e49..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-
-/**
- * Java 8 Tests for {@link Combine}.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("serial")
-public class CombineJava8Test implements Serializable {
-
-  @Rule
-  public transient ExpectedException thrown = ExpectedException.none();
-
-  /**
-   * Class for use in testing use of Java 8 method references.
-   */
-  private static class Summer implements Serializable {
-    public int sum(Iterable<Integer> integers) {
-      int sum = 0;
-      for (int i : integers) {
-        sum += i;
-      }
-      return sum;
-    }
-  }
-
-  /**
-   * Tests creation of a global {@link Combine} via Java 8 lambda.
-   */
-  @Test
-  public void testCombineGloballyLambda() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3, 4))
-        .apply(Combine.globally(integers -> {
-          int sum = 0;
-          for (int i : integers) {
-            sum += i;
-          }
-          return sum;
-        }));
-
-    DataflowAssert.that(output).containsInAnyOrder(10);
-    pipeline.run();
-  }
-
-  /**
-   * Tests creation of a global {@link Combine} via a Java 8 method reference.
-   */
-  @Test
-  public void testCombineGloballyInstanceMethodReference() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3, 4))
-        .apply(Combine.globally(new Summer()::sum));
-
-    DataflowAssert.that(output).containsInAnyOrder(10);
-    pipeline.run();
-  }
-
-  /**
-   * Tests creation of a per-key {@link Combine} via a Java 8 lambda.
-   */
-  @Test
-  public void testCombinePerKeyLambda() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4)))
-        .apply(Combine.perKey(integers -> {
-          int sum = 0;
-          for (int i : integers) {
-            sum += i;
-          }
-          return sum;
-        }));
-
-    DataflowAssert.that(output).containsInAnyOrder(
-        KV.of("a", 4),
-        KV.of("b", 2),
-        KV.of("c", 4));
-    pipeline.run();
-  }
-
-  /**
-   * Tests creation of a per-key {@link Combine} via a Java 8 method reference.
-   */
-  @Test
-  public void testCombinePerKeyInstanceMethodReference() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<KV<String, Integer>> output = pipeline
-        .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4)))
-        .apply(Combine.perKey(new Summer()::sum));
-
-    DataflowAssert.that(output).containsInAnyOrder(
-        KV.of("a", 4),
-        KV.of("b", 2),
-        KV.of("c", 4));
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
deleted file mode 100644
index db65932..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-
-/**
- * Java 8 Tests for {@link Filter}.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("serial")
-public class FilterJava8Test implements Serializable {
-
-  @Rule
-  public transient ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testIdentityFilterByPredicate() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(591, 11789, 1257, 24578, 24799, 307))
-        .apply(Filter.byPredicate(i -> true));
-
-    DataflowAssert.that(output).containsInAnyOrder(591, 11789, 1257, 24578, 24799, 307);
-    pipeline.run();
-  }
-
-  @Test
-  public void testNoFilterByPredicate() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 4, 5))
-        .apply(Filter.byPredicate(i -> false));
-
-    DataflowAssert.that(output).empty();
-    pipeline.run();
-  }
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testFilterByPredicate() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
-        .apply(Filter.byPredicate(i -> i % 2 == 0));
-
-    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
-    pipeline.run();
-  }
-
-  /**
-   * Confirms that in Java 8 style, where a lambda results in a rawtype, the output type token is
-   * not useful. If this test ever fails there may be simplifications available to us.
-   */
-  @Test
-  public void testFilterParDoOutputTypeDescriptorRaw() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    PCollection<String> output = pipeline
-        .apply(Create.of("hello"))
-        .apply(Filter.by(s -> true));
-
-    thrown.expect(CannotProvideCoderException.class);
-    pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor());
-  }
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testFilterByMethodReference() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
-        .apply(Filter.byPredicate(new EvenFilter()::isEven));
-
-    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
-    pipeline.run();
-  }
-
-  private static class EvenFilter implements Serializable {
-    public boolean isEven(int i) {
-      return i % 2 == 0;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
deleted file mode 100644
index e0b946b..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.ImmutableList;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-import java.util.List;
-
-/**
- * Java 8 Tests for {@link FlatMapElements}.
- */
-@RunWith(JUnit4.class)
-public class FlatMapElementsJava8Test implements Serializable {
-
-  @Rule
-  public transient ExpectedException thrown = ExpectedException.none();
-
-  /**
-   * Basic test of {@link FlatMapElements} with a lambda (which is instantiated as a
-   * {@link SerializableFunction}).
-   */
-  @Test
-  public void testFlatMapBasic() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3))
-        .apply(FlatMapElements
-            // Note that the input type annotation is required.
-            .via((Integer i) -> ImmutableList.of(i, -i))
-            .withOutputType(new TypeDescriptor<Integer>() {}));
-
-    DataflowAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
-    pipeline.run();
-  }
-
-  /**
-   * Basic test of {@link FlatMapElements} with a method reference.
-   */
-  @Test
-  public void testFlatMapMethodReference() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3))
-        .apply(FlatMapElements
-            // Note that the input type annotation is required.
-            .via(new Negater()::numAndNegation)
-            .withOutputType(new TypeDescriptor<Integer>() {}));
-
-    DataflowAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
-    pipeline.run();
-  }
-
-  private static class Negater implements Serializable {
-    public List<Integer> numAndNegation(int input) {
-      return ImmutableList.of(input, -input);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
deleted file mode 100644
index 123e680..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-
-/**
- * Java 8 tests for {@link MapElements}.
- */
-@RunWith(JUnit4.class)
-public class MapElementsJava8Test implements Serializable {
-
-  /**
-   * Basic test of {@link MapElements} with a lambda (which is instantiated as a
-   * {@link SerializableFunction}).
-   */
-  @Test
-  public void testMapBasic() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3))
-        .apply(MapElements
-            // Note that the type annotation is required (for Java, not for Dataflow)
-            .via((Integer i) -> i * 2)
-            .withOutputType(new TypeDescriptor<Integer>() {}));
-
-    DataflowAssert.that(output).containsInAnyOrder(6, 2, 4);
-    pipeline.run();
-  }
-
-  /**
-   * Basic test of {@link MapElements} with a method reference.
-   */
-  @Test
-  public void testMapMethodReference() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-    PCollection<Integer> output = pipeline
-        .apply(Create.of(1, 2, 3))
-        .apply(MapElements
-            // Note that the type annotation is required (for Java, not for Dataflow)
-            .via(new Doubler()::doubleIt)
-            .withOutputType(new TypeDescriptor<Integer>() {}));
-
-    DataflowAssert.that(output).containsInAnyOrder(6, 2, 4);
-    pipeline.run();
-  }
-
-  private static class Doubler implements Serializable {
-    public int doubleIt(int val) {
-      return val * 2;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
deleted file mode 100644
index c459ada..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static org.junit.Assert.assertEquals;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-
-/**
- * Java 8 Tests for {@link Filter}.
- */
-@RunWith(JUnit4.class)
-@SuppressWarnings("serial")
-public class PartitionJava8Test implements Serializable {
-
-  @Rule
-  public transient ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void testModPartition() {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollectionList<Integer> outputs = pipeline
-        .apply(Create.of(1, 2, 4, 5))
-        .apply(Partition.of(3, (element, numPartitions) -> element % numPartitions));
-    assertEquals(3, outputs.size());
-    DataflowAssert.that(outputs.get(0)).empty();
-    DataflowAssert.that(outputs.get(1)).containsInAnyOrder(1, 4);
-    DataflowAssert.that(outputs.get(2)).containsInAnyOrder(2, 5);
-    pipeline.run();
-  }
-
-  /**
-   * Confirms that in Java 8 style, where a lambda results in a rawtype, the output type token is
-   * not useful. If this test ever fails there may be simplifications available to us.
-   */
-  @Test
-  public void testPartitionFnOutputTypeDescriptorRaw() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollectionList<String> output = pipeline
-        .apply(Create.of("hello"))
-        .apply(Partition.of(1, (element, numPartitions) -> 0));
-
-    thrown.expect(CannotProvideCoderException.class);
-    pipeline.getCoderRegistry().getDefaultCoder(output.get(0).getTypeDescriptor());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
deleted file mode 100644
index dfa1ca6..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.hasItem;
-import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Multimap;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.HashSet;
-import java.util.Set;
-
-/**
- * Java 8 tests for {@link RemoveDuplicates}.
- */
-@RunWith(JUnit4.class)
-public class RemoveDuplicatesJava8Test {
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void withLambdaRepresentativeValuesFnAndTypeDescriptorShouldApplyFn() {
-    TestPipeline p = TestPipeline.create();
-
-    Multimap<Integer, String> predupedContents = HashMultimap.create();
-    predupedContents.put(3, "foo");
-    predupedContents.put(4, "foos");
-    predupedContents.put(6, "barbaz");
-    predupedContents.put(6, "bazbar");
-    PCollection<String> dupes =
-        p.apply(Create.of("foo", "foos", "barbaz", "barbaz", "bazbar", "foo"));
-    PCollection<String> deduped =
-        dupes.apply(RemoveDuplicates.withRepresentativeValueFn((String s) -> s.length())
-                                    .withRepresentativeType(TypeDescriptor.of(Integer.class)));
-
-    DataflowAssert.that(deduped).satisfies((Iterable<String> strs) -> {
-      Set<Integer> seenLengths = new HashSet<>();
-      for (String s : strs) {
-        assertThat(predupedContents.values(), hasItem(s));
-        assertThat(seenLengths, not(contains(s.length())));
-        seenLengths.add(s.length());
-      }
-      return null;
-    });
-
-    p.run();
-  }
-
-  @Test
-  public void withLambdaRepresentativeValuesFnNoTypeDescriptorShouldThrow() {
-    TestPipeline p = TestPipeline.create();
-
-    Multimap<Integer, String> predupedContents = HashMultimap.create();
-    predupedContents.put(3, "foo");
-    predupedContents.put(4, "foos");
-    predupedContents.put(6, "barbaz");
-    predupedContents.put(6, "bazbar");
-    PCollection<String> dupes =
-        p.apply(Create.of("foo", "foos", "barbaz", "barbaz", "bazbar", "foo"));
-
-    thrown.expect(IllegalStateException.class);
-    thrown.expectMessage("Unable to return a default Coder for RemoveRepresentativeDupes");
-    thrown.expectMessage("Cannot provide a coder for type variable K");
-    thrown.expectMessage("the actual type is unknown due to erasure.");
-
-    // Thrown when applying a transform to the internal WithKeys that withRepresentativeValueFn is
-    // implemented with
-    dupes.apply("RemoveRepresentativeDupes",
-        RemoveDuplicates.withRepresentativeValueFn((String s) -> s.length()));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
deleted file mode 100644
index 3771f78..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Java 8 Tests for {@link WithKeys}.
- */
-@RunWith(JUnit4.class)
-public class WithKeysJava8Test {
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void withLambdaAndTypeDescriptorShouldSucceed() {
-    TestPipeline p = TestPipeline.create();
-
-    PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12"));
-    PCollection<KV<Integer, String>> kvs = values.apply(
-        WithKeys.of((String s) -> Integer.valueOf(s))
-                .withKeyType(TypeDescriptor.of(Integer.class)));
-
-    DataflowAssert.that(kvs).containsInAnyOrder(
-        KV.of(1234, "1234"), KV.of(0, "0"), KV.of(-12, "-12"), KV.of(3210, "3210"));
-
-    p.run();
-  }
-
-  @Test
-  public void withLambdaAndNoTypeDescriptorShouldThrow() {
-    TestPipeline p = TestPipeline.create();
-
-    PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12"));
-
-    values.apply("ApplyKeysWithWithKeys", WithKeys.of((String s) -> Integer.valueOf(s)));
-
-    thrown.expect(PipelineExecutionException.class);
-    thrown.expectMessage("Unable to return a default Coder for ApplyKeysWithWithKeys");
-    thrown.expectMessage("Cannot provide a coder for type variable K");
-    thrown.expectMessage("the actual type is unknown due to erasure.");
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
----------------------------------------------------------------------
diff --git a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java b/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
deleted file mode 100644
index b2b6dbc..0000000
--- a/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Instant;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-
-/**
- * Java 8 tests for {@link WithTimestamps}.
- */
-@RunWith(JUnit4.class)
-public class WithTimestampsJava8Test implements Serializable {
-  @Test
-  @Category(RunnableOnService.class)
-  public void withTimestampsLambdaShouldApplyTimestamps() {
-    TestPipeline p = TestPipeline.create();
-
-    String yearTwoThousand = "946684800000";
-    PCollection<String> timestamped =
-        p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE), yearTwoThousand))
-         .apply(WithTimestamps.of((String input) -> new Instant(Long.valueOf(yearTwoThousand))));
-
-    PCollection<KV<String, Instant>> timestampedVals =
-        timestamped.apply(ParDo.of(new DoFn<String, KV<String, Instant>>() {
-          @Override
-          public void processElement(DoFn<String, KV<String, Instant>>.ProcessContext c)
-              throws Exception {
-            c.output(KV.of(c.element(), c.timestamp()));
-          }
-        }));
-
-    DataflowAssert.that(timestamped)
-        .containsInAnyOrder(yearTwoThousand, "0", "1234", Integer.toString(Integer.MAX_VALUE));
-    DataflowAssert.that(timestampedVals)
-        .containsInAnyOrder(
-            KV.of("0", new Instant(0)),
-            KV.of("1234", new Instant("1234")),
-            KV.of(Integer.toString(Integer.MAX_VALUE), new Instant(Integer.MAX_VALUE)),
-            KV.of(yearTwoThousand, new Instant(Long.valueOf(yearTwoThousand))));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3803007..6b2fd93 100644
--- a/pom.xml
+++ b/pom.xml
@@ -138,7 +138,7 @@
         <jdk>[1.8,)</jdk>
       </activation>
       <modules>
-        <module>java8tests</module>
+        <module>sdks/java/java8tests</module>
       </modules>
     </profile>
     <profile>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/pom.xml b/sdks/java/java8tests/pom.xml
new file mode 100644
index 0000000..bb8d629
--- /dev/null
+++ b/sdks/java/java8tests/pom.xml
@@ -0,0 +1,184 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>parent</artifactId>
+    <version>0.1.0-incubating-SNAPSHOT</version>
+    <relativePath>../../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>java8tests-all</artifactId>
+  <name>Apache Beam :: Tests :: Java 8 All</name>
+  <description>Apache Beam Java SDK provides a simple, Java-based
+    interface for processing virtually any size data.
+    This artifact includes tests of the SDK from a Java 8
+    user.</description>
+
+  <packaging>jar</packaging>
+
+  <profiles>
+    <profile>
+      <id>DataflowPipelineTests</id>
+      <properties>
+        <runIntegrationTestOnService>true</runIntegrationTestOnService>
+        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
+        <testParallelValue>both</testParallelValue>
+      </properties>
+    </profile>
+  </profiles>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <testSource>1.8</testSource>
+          <testTarget>1.8</testTarget>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals><goal>analyze-only</goal></goals>
+            <configuration>
+              <failOnWarning>true</failOnWarning>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>6.6</version>
+          </dependency>
+        </dependencies>
+        <configuration>
+          <configLocation>../../../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <includeResources>false</includeResources>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Source plugin for generating source and test-source JARs. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>attach-test-sources</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>default-test-jar</id>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Coverage analysis for unit tests. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-sdk-all</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>${joda.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>${hamcrest.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
new file mode 100644
index 0000000..b569e49
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/CombineJava8Test.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 Tests for {@link Combine}.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class CombineJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Class for use in testing use of Java 8 method references.
+   */
+  private static class Summer implements Serializable {
+    public int sum(Iterable<Integer> integers) {
+      int sum = 0;
+      for (int i : integers) {
+        sum += i;
+      }
+      return sum;
+    }
+  }
+
+  /**
+   * Tests creation of a global {@link Combine} via Java 8 lambda.
+   */
+  @Test
+  public void testCombineGloballyLambda() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4))
+        .apply(Combine.globally(integers -> {
+          int sum = 0;
+          for (int i : integers) {
+            sum += i;
+          }
+          return sum;
+        }));
+
+    DataflowAssert.that(output).containsInAnyOrder(10);
+    pipeline.run();
+  }
+
+  /**
+   * Tests creation of a global {@link Combine} via a Java 8 method reference.
+   */
+  @Test
+  public void testCombineGloballyInstanceMethodReference() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4))
+        .apply(Combine.globally(new Summer()::sum));
+
+    DataflowAssert.that(output).containsInAnyOrder(10);
+    pipeline.run();
+  }
+
+  /**
+   * Tests creation of a per-key {@link Combine} via a Java 8 lambda.
+   */
+  @Test
+  public void testCombinePerKeyLambda() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4)))
+        .apply(Combine.perKey(integers -> {
+          int sum = 0;
+          for (int i : integers) {
+            sum += i;
+          }
+          return sum;
+        }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 4),
+        KV.of("b", 2),
+        KV.of("c", 4));
+    pipeline.run();
+  }
+
+  /**
+   * Tests creation of a per-key {@link Combine} via a Java 8 method reference.
+   */
+  @Test
+  public void testCombinePerKeyInstanceMethodReference() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> output = pipeline
+        .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4)))
+        .apply(Combine.perKey(new Summer()::sum));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", 4),
+        KV.of("b", 2),
+        KV.of("c", 4));
+    pipeline.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
new file mode 100644
index 0000000..db65932
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FilterJava8Test.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 Tests for {@link Filter}.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class FilterJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testIdentityFilterByPredicate() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(591, 11789, 1257, 24578, 24799, 307))
+        .apply(Filter.byPredicate(i -> true));
+
+    DataflowAssert.that(output).containsInAnyOrder(591, 11789, 1257, 24578, 24799, 307);
+    pipeline.run();
+  }
+
+  @Test
+  public void testNoFilterByPredicate() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 4, 5))
+        .apply(Filter.byPredicate(i -> false));
+
+    DataflowAssert.that(output).empty();
+    pipeline.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFilterByPredicate() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.byPredicate(i -> i % 2 == 0));
+
+    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
+    pipeline.run();
+  }
+
+  /**
+   * Confirms that in Java 8 style, where a lambda results in a rawtype, the output type token is
+   * not useful. If this test ever fails there may be simplifications available to us.
+   */
+  @Test
+  public void testFilterParDoOutputTypeDescriptorRaw() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    PCollection<String> output = pipeline
+        .apply(Create.of("hello"))
+        .apply(Filter.by(s -> true));
+
+    thrown.expect(CannotProvideCoderException.class);
+    pipeline.getCoderRegistry().getDefaultCoder(output.getTypeDescriptor());
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testFilterByMethodReference() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3, 4, 5, 6, 7))
+        .apply(Filter.byPredicate(new EvenFilter()::isEven));
+
+    DataflowAssert.that(output).containsInAnyOrder(2, 4, 6);
+    pipeline.run();
+  }
+
+  private static class EvenFilter implements Serializable {
+    public boolean isEven(int i) {
+      return i % 2 == 0;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
new file mode 100644
index 0000000..e0b946b
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElementsJava8Test.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.List;
+
+/**
+ * Java 8 Tests for {@link FlatMapElements}.
+ */
+@RunWith(JUnit4.class)
+public class FlatMapElementsJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  /**
+   * Basic test of {@link FlatMapElements} with a lambda (which is instantiated as a
+   * {@link SerializableFunction}).
+   */
+  @Test
+  public void testFlatMapBasic() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(FlatMapElements
+            // Note that the input type annotation is required.
+            .via((Integer i) -> ImmutableList.of(i, -i))
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
+    pipeline.run();
+  }
+
+  /**
+   * Basic test of {@link FlatMapElements} with a method reference.
+   */
+  @Test
+  public void testFlatMapMethodReference() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(FlatMapElements
+            // Note that the input type annotation is required.
+            .via(new Negater()::numAndNegation)
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(1, 3, -1, -3, 2, -2);
+    pipeline.run();
+  }
+
+  private static class Negater implements Serializable {
+    public List<Integer> numAndNegation(int input) {
+      return ImmutableList.of(input, -input);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
new file mode 100644
index 0000000..123e680
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/MapElementsJava8Test.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 tests for {@link MapElements}.
+ */
+@RunWith(JUnit4.class)
+public class MapElementsJava8Test implements Serializable {
+
+  /**
+   * Basic test of {@link MapElements} with a lambda (which is instantiated as a
+   * {@link SerializableFunction}).
+   */
+  @Test
+  public void testMapBasic() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(MapElements
+            // Note that the type annotation is required (for Java, not for Dataflow)
+            .via((Integer i) -> i * 2)
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(6, 2, 4);
+    pipeline.run();
+  }
+
+  /**
+   * Basic test of {@link MapElements} with a method reference.
+   */
+  @Test
+  public void testMapMethodReference() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<Integer> output = pipeline
+        .apply(Create.of(1, 2, 3))
+        .apply(MapElements
+            // Note that the type annotation is required (for Java, not for Dataflow)
+            .via(new Doubler()::doubleIt)
+            .withOutputType(new TypeDescriptor<Integer>() {}));
+
+    DataflowAssert.that(output).containsInAnyOrder(6, 2, 4);
+    pipeline.run();
+  }
+
+  private static class Doubler implements Serializable {
+    public int doubleIt(int val) {
+      return val * 2;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
new file mode 100644
index 0000000..c459ada
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/PartitionJava8Test.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.junit.Assert.assertEquals;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 Tests for {@link Filter}.
+ */
+@RunWith(JUnit4.class)
+@SuppressWarnings("serial")
+public class PartitionJava8Test implements Serializable {
+
+  @Rule
+  public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testModPartition() {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollectionList<Integer> outputs = pipeline
+        .apply(Create.of(1, 2, 4, 5))
+        .apply(Partition.of(3, (element, numPartitions) -> element % numPartitions));
+    assertEquals(3, outputs.size());
+    DataflowAssert.that(outputs.get(0)).empty();
+    DataflowAssert.that(outputs.get(1)).containsInAnyOrder(1, 4);
+    DataflowAssert.that(outputs.get(2)).containsInAnyOrder(2, 5);
+    pipeline.run();
+  }
+
+  /**
+   * Confirms that in Java 8 style, where a lambda results in a rawtype, the output type token is
+   * not useful. If this test ever fails there may be simplifications available to us.
+   */
+  @Test
+  public void testPartitionFnOutputTypeDescriptorRaw() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    PCollectionList<String> output = pipeline
+        .apply(Create.of("hello"))
+        .apply(Partition.of(1, (element, numPartitions) -> 0));
+
+    thrown.expect(CannotProvideCoderException.class);
+    pipeline.getCoderRegistry().getDefaultCoder(output.get(0).getTypeDescriptor());
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
new file mode 100644
index 0000000..dfa1ca6
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicatesJava8Test.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.hasItem;
+import static org.hamcrest.Matchers.not;
+import static org.junit.Assert.assertThat;
+
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Java 8 tests for {@link RemoveDuplicates}.
+ */
+@RunWith(JUnit4.class)
+public class RemoveDuplicatesJava8Test {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void withLambdaRepresentativeValuesFnAndTypeDescriptorShouldApplyFn() {
+    TestPipeline p = TestPipeline.create();
+
+    Multimap<Integer, String> predupedContents = HashMultimap.create();
+    predupedContents.put(3, "foo");
+    predupedContents.put(4, "foos");
+    predupedContents.put(6, "barbaz");
+    predupedContents.put(6, "bazbar");
+    PCollection<String> dupes =
+        p.apply(Create.of("foo", "foos", "barbaz", "barbaz", "bazbar", "foo"));
+    PCollection<String> deduped =
+        dupes.apply(RemoveDuplicates.withRepresentativeValueFn((String s) -> s.length())
+                                    .withRepresentativeType(TypeDescriptor.of(Integer.class)));
+
+    DataflowAssert.that(deduped).satisfies((Iterable<String> strs) -> {
+      Set<Integer> seenLengths = new HashSet<>();
+      for (String s : strs) {
+        assertThat(predupedContents.values(), hasItem(s));
+        assertThat(seenLengths, not(contains(s.length())));
+        seenLengths.add(s.length());
+      }
+      return null;
+    });
+
+    p.run();
+  }
+
+  @Test
+  public void withLambdaRepresentativeValuesFnNoTypeDescriptorShouldThrow() {
+    TestPipeline p = TestPipeline.create();
+
+    Multimap<Integer, String> predupedContents = HashMultimap.create();
+    predupedContents.put(3, "foo");
+    predupedContents.put(4, "foos");
+    predupedContents.put(6, "barbaz");
+    predupedContents.put(6, "bazbar");
+    PCollection<String> dupes =
+        p.apply(Create.of("foo", "foos", "barbaz", "barbaz", "bazbar", "foo"));
+
+    thrown.expect(IllegalStateException.class);
+    thrown.expectMessage("Unable to return a default Coder for RemoveRepresentativeDupes");
+    thrown.expectMessage("Cannot provide a coder for type variable K");
+    thrown.expectMessage("the actual type is unknown due to erasure.");
+
+    // Thrown when applying a transform to the internal WithKeys that withRepresentativeValueFn is
+    // implemented with
+    dupes.apply("RemoveRepresentativeDupes",
+        RemoveDuplicates.withRepresentativeValueFn((String s) -> s.length()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
new file mode 100644
index 0000000..3771f78
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithKeysJava8Test.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Java 8 Tests for {@link WithKeys}.
+ */
+@RunWith(JUnit4.class)
+public class WithKeysJava8Test {
+
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void withLambdaAndTypeDescriptorShouldSucceed() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12"));
+    PCollection<KV<Integer, String>> kvs = values.apply(
+        WithKeys.of((String s) -> Integer.valueOf(s))
+                .withKeyType(TypeDescriptor.of(Integer.class)));
+
+    DataflowAssert.that(kvs).containsInAnyOrder(
+        KV.of(1234, "1234"), KV.of(0, "0"), KV.of(-12, "-12"), KV.of(3210, "3210"));
+
+    p.run();
+  }
+
+  @Test
+  public void withLambdaAndNoTypeDescriptorShouldThrow() {
+    TestPipeline p = TestPipeline.create();
+
+    PCollection<String> values = p.apply(Create.of("1234", "3210", "0", "-12"));
+
+    values.apply("ApplyKeysWithWithKeys", WithKeys.of((String s) -> Integer.valueOf(s)));
+
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectMessage("Unable to return a default Coder for ApplyKeysWithWithKeys");
+    thrown.expectMessage("Cannot provide a coder for type variable K");
+    thrown.expectMessage("the actual type is unknown due to erasure.");
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d4233aa0/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
new file mode 100644
index 0000000..b2b6dbc
--- /dev/null
+++ b/sdks/java/java8tests/src/test/java/com/google/cloud/dataflow/sdk/transforms/WithTimestampsJava8Test.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.cloud.dataflow.sdk.transforms;
+
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+
+/**
+ * Java 8 tests for {@link WithTimestamps}.
+ */
+@RunWith(JUnit4.class)
+public class WithTimestampsJava8Test implements Serializable {
+  @Test
+  @Category(RunnableOnService.class)
+  public void withTimestampsLambdaShouldApplyTimestamps() {
+    TestPipeline p = TestPipeline.create();
+
+    String yearTwoThousand = "946684800000";
+    PCollection<String> timestamped =
+        p.apply(Create.of("1234", "0", Integer.toString(Integer.MAX_VALUE), yearTwoThousand))
+         .apply(WithTimestamps.of((String input) -> new Instant(Long.valueOf(yearTwoThousand))));
+
+    PCollection<KV<String, Instant>> timestampedVals =
+        timestamped.apply(ParDo.of(new DoFn<String, KV<String, Instant>>() {
+          @Override
+          public void processElement(DoFn<String, KV<String, Instant>>.ProcessContext c)
+              throws Exception {
+            c.output(KV.of(c.element(), c.timestamp()));
+          }
+        }));
+
+    DataflowAssert.that(timestamped)
+        .containsInAnyOrder(yearTwoThousand, "0", "1234", Integer.toString(Integer.MAX_VALUE));
+    DataflowAssert.that(timestampedVals)
+        .containsInAnyOrder(
+            KV.of("0", new Instant(0)),
+            KV.of("1234", new Instant("1234")),
+            KV.of(Integer.toString(Integer.MAX_VALUE), new Instant(Integer.MAX_VALUE)),
+            KV.of(yearTwoThousand, new Instant(Long.valueOf(yearTwoThousand))));
+  }
+}

[08/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
deleted file mode 100644
index 3a18336..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StringUtils.java
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Utilities for working with JSON and other human-readable string formats.
- */
-public class StringUtils {
-  /**
-   * Converts the given array of bytes into a legal JSON string.
-   *
-   * <p>Uses a simple strategy of converting each byte to a single char,
-   * except for non-printable chars, non-ASCII chars, and '%', '\',
-   * and '"', which are encoded as three chars in '%xx' format, where
-   * 'xx' is the hexadecimal encoding of the byte.
-   */
-  public static String byteArrayToJsonString(byte[] bytes) {
-    StringBuilder sb = new StringBuilder(bytes.length * 2);
-    for (byte b : bytes) {
-      if (b >= 32 && b < 127) {
-        // A printable ascii character.
-        char c = (char) b;
-        if (c != '%' && c != '\\' && c != '\"') {
-          // Not an escape prefix or special character, either.
-          // Send through unchanged.
-          sb.append(c);
-          continue;
-        }
-      }
-      // Send through escaped.  Use '%xx' format.
-      sb.append(String.format("%%%02x", b));
-    }
-    return sb.toString();
-  }
-
-  /**
-   * Converts the given string, encoded using {@link #byteArrayToJsonString},
-   * into a byte array.
-   *
-   * @throws IllegalArgumentException if the argument string is not legal
-   */
-  public static byte[] jsonStringToByteArray(String string) {
-    List<Byte> bytes = new ArrayList<>();
-    for (int i = 0; i < string.length(); ) {
-      char c = string.charAt(i);
-      Byte b;
-      if (c == '%') {
-        // Escaped.  Expect '%xx' format.
-        try {
-          b = (byte) Integer.parseInt(string.substring(i + 1, i + 3), 16);
-        } catch (IndexOutOfBoundsException | NumberFormatException exn) {
-          throw new IllegalArgumentException(
-              "not in legal encoded format; " +
-              "substring [" + i + ".." + (i + 2) + "] not in format \"%xx\"",
-              exn);
-        }
-        i += 3;
-      } else {
-        // Send through unchanged.
-        b = (byte) c;
-        i++;
-      }
-      bytes.add(b);
-    }
-    byte[] byteArray = new byte[bytes.size()];
-    int i = 0;
-    for (Byte b : bytes) {
-      byteArray[i++] = b;
-    }
-    return byteArray;
-  }
-
-  private static final String[] STANDARD_NAME_SUFFIXES =
-      new String[]{"DoFn", "Fn"};
-
-  /**
-   * Pattern to match a non-anonymous inner class.
-   * Eg, matches "Foo$Bar", or even "Foo$1$Bar", but not "Foo$1" or "Foo$1$2".
-   */
-  private static final Pattern NAMED_INNER_CLASS =
-      Pattern.compile(".+\\$(?<INNER>[^0-9].*)");
-
-  private static final String ANONYMOUS_CLASS_REGEX = "\\$[0-9]+\\$";
-
-  /**
-   * Returns a simple name for a class.
-   *
-   * <p>Note: this is non-invertible - the name may be simplified to an
-   * extent that it cannot be mapped back to the original class.
-   *
-   * <p>This can be used to generate human-readable names. It
-   * removes the package and outer classes from the name,
-   * and removes common suffixes.
-   *
-   * <p>Examples:
-   * <ul>
-   *   <li>{@code some.package.Word.SummaryDoFn} -> "Summary"
-   *   <li>{@code another.package.PairingFn} -> "Pairing"
-   * </ul>
-   *
-   * @throws IllegalArgumentException if the class is anonymous
-   */
-  public static String approximateSimpleName(Class<?> clazz) {
-    return approximateSimpleName(clazz, /* dropOuterClassNames */ true);
-  }
-
-  /**
-   * Returns a name for a PTransform class.
-   *
-   * <p>This can be used to generate human-readable transform names. It
-   * removes the package from the name, and removes common suffixes.
-   *
-   * <p>It is different than approximateSimpleName:
-   * <ul>
-   *   <li>1. It keeps the outer classes names.
-   *   <li>2. It removes the common transform inner class: "Bound".
-   * </ul>
-   *
-   * <p>Examples:
-   * <ul>
-   *   <li>{@code some.package.Word.Summary} -> "Word.Summary"
-   *   <li>{@code another.package.Pairing.Bound} -> "Pairing"
-   * </ul>
-   */
-  public static String approximatePTransformName(Class<?> clazz) {
-    Preconditions.checkArgument(PTransform.class.isAssignableFrom(clazz));
-    return approximateSimpleName(clazz, /* dropOuterClassNames */ false)
-        .replaceFirst("\\.Bound$", "");
-  }
-
-  /**
-   * Calculate the Levenshtein distance between two strings.
-   *
-   * <p>The Levenshtein distance between two words is the minimum number of single-character edits
-   * (i.e. insertions, deletions or substitutions) required to change one string into the other.
-   */
-  public static int getLevenshteinDistance(final String s, final String t) {
-    Preconditions.checkNotNull(s);
-    Preconditions.checkNotNull(t);
-
-    // base cases
-    if (s.equals(t)) {
-      return 0;
-    }
-    if (s.length() == 0) {
-      return t.length();
-    }
-    if (t.length() == 0) {
-      return s.length();
-    }
-
-    // create two work arrays to store integer distances
-    final int[] v0 = new int[t.length() + 1];
-    final int[] v1 = new int[t.length() + 1];
-
-    // initialize v0 (the previous row of distances)
-    // this row is A[0][i]: edit distance for an empty s
-    // the distance is just the number of characters to delete from t
-    for (int i = 0; i < v0.length; i++) {
-      v0[i] = i;
-    }
-
-    for (int i = 0; i < s.length(); i++) {
-      // calculate v1 (current row distances) from the previous row v0
-
-      // first element of v1 is A[i+1][0]
-      //   edit distance is delete (i+1) chars from s to match empty t
-      v1[0] = i + 1;
-
-      // use formula to fill in the rest of the row
-      for (int j = 0; j < t.length(); j++) {
-        int cost = (s.charAt(i) == t.charAt(j)) ? 0 : 1;
-        v1[j + 1] = Math.min(Math.min(v1[j] + 1, v0[j + 1] + 1), v0[j] + cost);
-      }
-
-      // copy v1 (current row) to v0 (previous row) for next iteration
-      System.arraycopy(v1, 0, v0, 0, v0.length);
-    }
-
-    return v1[t.length()];
-  }
-
-  private static String approximateSimpleName(Class<?> clazz, boolean dropOuterClassNames) {
-    Preconditions.checkArgument(!clazz.isAnonymousClass(),
-        "Attempted to get simple name of anonymous class");
-
-    String fullName = clazz.getName();
-    String shortName = fullName.substring(fullName.lastIndexOf('.') + 1);
-
-    // Drop common suffixes for each named component.
-    String[] names = shortName.split("\\$");
-    for (int i = 0; i < names.length; i++) {
-      names[i] = simplifyNameComponent(names[i]);
-    }
-    shortName = Joiner.on('$').join(names);
-
-    if (dropOuterClassNames) {
-      // Simplify inner class name by dropping outer class prefixes.
-      Matcher m = NAMED_INNER_CLASS.matcher(shortName);
-      if (m.matches()) {
-        shortName = m.group("INNER");
-      }
-    } else {
-      // Dropping anonymous outer classes
-      shortName = shortName.replaceAll(ANONYMOUS_CLASS_REGEX, ".");
-      shortName = shortName.replaceAll("\\$", ".");
-    }
-    return shortName;
-  }
-
-  private static String simplifyNameComponent(String name) {
-    for (String suffix : STANDARD_NAME_SUFFIXES) {
-      if (name.endsWith(suffix) && name.length() > suffix.length()) {
-        return name.substring(0, name.length() - suffix.length());
-      }
-    }
-    return name;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
deleted file mode 100644
index c621c55..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Structs.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.Data;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * A collection of static methods for manipulating datastructure representations
- * transferred via the Dataflow API.
- */
-public final class Structs {
-  private Structs() {}  // Non-instantiable
-
-  public static String getString(Map<String, Object> map, String name) throws Exception {
-    return getValue(map, name, String.class, "a string");
-  }
-
-  public static String getString(
-      Map<String, Object> map, String name, @Nullable String defaultValue)
-      throws Exception {
-    return getValue(map, name, String.class, "a string", defaultValue);
-  }
-
-  public static byte[] getBytes(Map<String, Object> map, String name) throws Exception {
-    @Nullable byte[] result = getBytes(map, name, null);
-    if (result == null) {
-      throw new ParameterNotFoundException(name, map);
-    }
-    return result;
-  }
-
-  @Nullable
-  public static byte[] getBytes(Map<String, Object> map, String name, @Nullable byte[] defaultValue)
-      throws Exception {
-    @Nullable String jsonString = getString(map, name, null);
-    if (jsonString == null) {
-      return defaultValue;
-    }
-    // TODO: Need to agree on a format for encoding bytes in
-    // a string that can be sent to the backend, over the cloud
-    // map task work API.  base64 encoding seems pretty common.  Switch to it?
-    return StringUtils.jsonStringToByteArray(jsonString);
-  }
-
-  public static Boolean getBoolean(Map<String, Object> map, String name) throws Exception {
-    return getValue(map, name, Boolean.class, "a boolean");
-  }
-
-  @Nullable
-  public static Boolean getBoolean(
-      Map<String, Object> map, String name, @Nullable Boolean defaultValue)
-      throws Exception {
-    return getValue(map, name, Boolean.class, "a boolean", defaultValue);
-  }
-
-  public static Long getLong(Map<String, Object> map, String name) throws Exception {
-    return getValue(map, name, Long.class, "a long");
-  }
-
-  @Nullable
-  public static Long getLong(Map<String, Object> map, String name, @Nullable Long defaultValue)
-      throws Exception {
-    return getValue(map, name, Long.class, "a long", defaultValue);
-  }
-
-  public static Integer getInt(Map<String, Object> map, String name) throws Exception {
-    return getValue(map, name, Integer.class, "an int");
-  }
-
-  @Nullable
-  public static Integer getInt(Map<String, Object> map, String name, @Nullable Integer defaultValue)
-      throws Exception {
-    return getValue(map, name, Integer.class, "an int", defaultValue);
-  }
-
-  @Nullable
-  public static List<String> getStrings(
-      Map<String, Object> map, String name, @Nullable List<String> defaultValue)
-      throws Exception {
-    @Nullable Object value = map.get(name);
-    if (value == null) {
-      if (map.containsKey(name)) {
-        throw new IncorrectTypeException(name, map, "a string or a list");
-      }
-      return defaultValue;
-    }
-    if (Data.isNull(value)) {
-      // This is a JSON literal null.  When represented as a list of strings,
-      // this is an empty list.
-      return Collections.<String>emptyList();
-    }
-    @Nullable String singletonString = decodeValue(value, String.class);
-    if (singletonString != null) {
-      return Collections.singletonList(singletonString);
-    }
-    if (!(value instanceof List)) {
-      throw new IncorrectTypeException(name, map, "a string or a list");
-    }
-    @SuppressWarnings("unchecked")
-    List<Object> elements = (List<Object>) value;
-    List<String> result = new ArrayList<>(elements.size());
-    for (Object o : elements) {
-      @Nullable String s = decodeValue(o, String.class);
-      if (s == null) {
-        throw new IncorrectTypeException(name, map, "a list of strings");
-      }
-      result.add(s);
-    }
-    return result;
-  }
-
-  public static Map<String, Object> getObject(Map<String, Object> map, String name)
-      throws Exception {
-    @Nullable Map<String, Object> result = getObject(map, name, null);
-    if (result == null) {
-      throw new ParameterNotFoundException(name, map);
-    }
-    return result;
-  }
-
-  @Nullable
-  public static Map<String, Object> getObject(
-      Map<String, Object> map, String name, @Nullable Map<String, Object> defaultValue)
-      throws Exception {
-    @Nullable Object value = map.get(name);
-    if (value == null) {
-      if (map.containsKey(name)) {
-        throw new IncorrectTypeException(name, map, "an object");
-      }
-      return defaultValue;
-    }
-    return checkObject(value, map, name);
-  }
-
-  private static Map<String, Object> checkObject(
-      Object value, Map<String, Object> map, String name) throws Exception {
-    if (Data.isNull(value)) {
-      // This is a JSON literal null.  When represented as an object, this is an
-      // empty map.
-      return Collections.<String, Object>emptyMap();
-    }
-    if (!(value instanceof Map)) {
-      throw new IncorrectTypeException(name, map, "an object (not a map)");
-    }
-    @SuppressWarnings("unchecked")
-    Map<String, Object> mapValue = (Map<String, Object>) value;
-    if (!mapValue.containsKey(PropertyNames.OBJECT_TYPE_NAME)) {
-      throw new IncorrectTypeException(name, map,
-          "an object (no \"" + PropertyNames.OBJECT_TYPE_NAME + "\" field)");
-    }
-    return mapValue;
-  }
-
-  @Nullable
-  public static List<Map<String, Object>> getListOfMaps(Map<String, Object> map, String name,
-      @Nullable List<Map<String, Object>> defaultValue) throws Exception {
-    @Nullable
-    Object value = map.get(name);
-    if (value == null) {
-      if (map.containsKey(name)) {
-        throw new IncorrectTypeException(name, map, "a list");
-      }
-      return defaultValue;
-    }
-    if (Data.isNull(value)) {
-      // This is a JSON literal null.  When represented as a list,
-      // this is an empty list.
-      return Collections.<Map<String, Object>>emptyList();
-    }
-
-    if (!(value instanceof List)) {
-      throw new IncorrectTypeException(name, map, "a list");
-    }
-
-    List<?> elements = (List<?>) value;
-    for (Object elem : elements) {
-      if (!(elem instanceof Map)) {
-        throw new IncorrectTypeException(name, map, "a list of Map objects");
-      }
-    }
-
-    @SuppressWarnings("unchecked")
-    List<Map<String, Object>> result = (List<Map<String, Object>>) elements;
-    return result;
-  }
-
-  public static Map<String, Object> getDictionary(
-      Map<String, Object> map, String name) throws Exception {
-    @Nullable Object value = map.get(name);
-    if (value == null) {
-      throw new ParameterNotFoundException(name, map);
-    }
-    if (Data.isNull(value)) {
-      // This is a JSON literal null.  When represented as a dictionary, this is
-      // an empty map.
-      return Collections.<String, Object>emptyMap();
-    }
-    if (!(value instanceof Map)) {
-      throw new IncorrectTypeException(name, map, "a dictionary");
-    }
-    @SuppressWarnings("unchecked")
-    Map<String, Object> result = (Map<String, Object>) value;
-    return result;
-  }
-
-  @Nullable
-  public static Map<String, Object> getDictionary(
-      Map<String, Object> map, String name, @Nullable Map<String, Object> defaultValue)
-      throws Exception {
-    @Nullable Object value = map.get(name);
-    if (value == null) {
-      if (map.containsKey(name)) {
-        throw new IncorrectTypeException(name, map, "a dictionary");
-      }
-      return defaultValue;
-    }
-    if (Data.isNull(value)) {
-      // This is a JSON literal null.  When represented as a dictionary, this is
-      // an empty map.
-      return Collections.<String, Object>emptyMap();
-    }
-    if (!(value instanceof Map)) {
-      throw new IncorrectTypeException(name, map, "a dictionary");
-    }
-    @SuppressWarnings("unchecked")
-    Map<String, Object> result = (Map<String, Object>) value;
-    return result;
-  }
-
-  // Builder operations.
-
-  public static void addString(Map<String, Object> map, String name, String value) {
-    addObject(map, name, CloudObject.forString(value));
-  }
-
-  public static void addBoolean(Map<String, Object> map, String name, boolean value) {
-    addObject(map, name, CloudObject.forBoolean(value));
-  }
-
-  public static void addLong(Map<String, Object> map, String name, long value) {
-    addObject(map, name, CloudObject.forInteger(value));
-  }
-
-  public static void addObject(
-      Map<String, Object> map, String name, Map<String, Object> value) {
-    map.put(name, value);
-  }
-
-  public static void addNull(Map<String, Object> map, String name) {
-    map.put(name, Data.nullOf(Object.class));
-  }
-
-  public static void addLongs(Map<String, Object> map, String name, long... longs) {
-    List<Map<String, Object>> elements = new ArrayList<>(longs.length);
-    for (Long value : longs) {
-      elements.add(CloudObject.forInteger(value));
-    }
-    map.put(name, elements);
-  }
-
-  public static void addList(
-      Map<String, Object> map, String name, List<? extends Map<String, Object>> elements) {
-    map.put(name, elements);
-  }
-
-  public static void addStringList(Map<String, Object> map, String name, List<String> elements) {
-    ArrayList<CloudObject> objects = new ArrayList<>(elements.size());
-    for (String element : elements) {
-      objects.add(CloudObject.forString(element));
-    }
-    addList(map, name, objects);
-  }
-
-  public static <T extends Map<String, Object>> void addList(
-      Map<String, Object> map, String name, T[] elements) {
-    map.put(name, Arrays.asList(elements));
-  }
-
-  public static void addDictionary(
-      Map<String, Object> map, String name, Map<String, Object> value) {
-    map.put(name, value);
-  }
-
-  public static void addDouble(Map<String, Object> map, String name, Double value) {
-    addObject(map, name, CloudObject.forFloat(value));
-  }
-
-  // Helper methods for a few of the accessor methods.
-
-  private static <T> T getValue(Map<String, Object> map, String name, Class<T> clazz, String type)
-      throws Exception {
-    @Nullable T result = getValue(map, name, clazz, type, null);
-    if (result == null) {
-      throw new ParameterNotFoundException(name, map);
-    }
-    return result;
-  }
-
-  @Nullable
-  private static <T> T getValue(
-      Map<String, Object> map, String name, Class<T> clazz, String type, @Nullable T defaultValue)
-      throws Exception {
-    @Nullable Object value = map.get(name);
-    if (value == null) {
-      if (map.containsKey(name)) {
-        throw new IncorrectTypeException(name, map, type);
-      }
-      return defaultValue;
-    }
-    T result = decodeValue(value, clazz);
-    if (result == null) {
-      // The value exists, but can't be decoded.
-      throw new IncorrectTypeException(name, map, type);
-    }
-    return result;
-  }
-
-  @Nullable
-  private static <T> T decodeValue(Object value, Class<T> clazz) {
-    try {
-      if (value.getClass() == clazz) {
-        // decodeValue() is only called for final classes; if the class matches,
-        // it's safe to just return the value, and if it doesn't match, decoding
-        // is needed.
-        return clazz.cast(value);
-      }
-      if (!(value instanceof Map)) {
-        return null;
-      }
-      @SuppressWarnings("unchecked")
-      Map<String, Object> map = (Map<String, Object>) value;
-      @Nullable String typeName = (String) map.get(PropertyNames.OBJECT_TYPE_NAME);
-      if (typeName == null) {
-        return null;
-      }
-      @Nullable CloudKnownType knownType = CloudKnownType.forUri(typeName);
-      if (knownType == null) {
-        return null;
-      }
-      @Nullable Object scalar = map.get(PropertyNames.SCALAR_FIELD_NAME);
-      if (scalar == null) {
-        return null;
-      }
-      return knownType.parse(scalar, clazz);
-    } catch (ClassCastException e) {
-      // If any class cast fails during decoding, the value's not decodable.
-      return null;
-    }
-  }
-
-  private static final class ParameterNotFoundException extends Exception {
-    public ParameterNotFoundException(String name, Map<String, Object> map) {
-      super("didn't find required parameter " + name + " in " + map);
-    }
-  }
-
-  private static final class IncorrectTypeException extends Exception {
-    public IncorrectTypeException(String name, Map<String, Object> map, String type) {
-      super("required parameter " + name + " in " + map + " not " + type);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java
deleted file mode 100644
index 3255ede..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemDoFnInternal.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-/**
- * Annotation to mark {@link DoFn DoFns} as an internal component of the Dataflow SDK.
- *
- * <p>Currently, the only effect of this is to mark any aggregators reported by an annotated
- * {@code DoFn} as a system counter (as opposed to a user counter).
- *
- * <p>This is internal to the Dataflow SDK.
- */
-@Documented
-@Retention(RetentionPolicy.RUNTIME)
-@Target(ElementType.TYPE)
-public @interface SystemDoFnInternal {}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
deleted file mode 100644
index 1665792..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SystemReduceFn.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
-import com.google.cloud.dataflow.sdk.util.state.BagState;
-import com.google.cloud.dataflow.sdk.util.state.CombiningState;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.ReadableState;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateMerging;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-
-/**
- * {@link ReduceFn} implementing the default reduction behaviors of {@link GroupByKey}.
- *
- * @param <K> The type of key being processed.
- * @param <InputT> The type of values associated with the key.
- * @param <OutputT> The output type that will be produced for each key.
- * @param <W> The type of windows this operates on.
- */
-public abstract class SystemReduceFn<K, InputT, AccumT, OutputT, W extends BoundedWindow>
-    extends ReduceFn<K, InputT, OutputT, W> {
-  private static final String BUFFER_NAME = "buf";
-
-  /**
-   * Create a factory that produces {@link SystemReduceFn} instances that that buffer all of the
-   * input values in persistent state and produces an {@code Iterable<T>}.
-   */
-  public static <K, T, W extends BoundedWindow> SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>
-      buffering(final Coder<T> inputCoder) {
-    final StateTag<Object, BagState<T>> bufferTag =
-        StateTags.makeSystemTagInternal(StateTags.bag(BUFFER_NAME, inputCoder));
-    return new SystemReduceFn<K, T, Iterable<T>, Iterable<T>, W>(bufferTag) {
-      @Override
-      public void prefetchOnMerge(MergingStateAccessor<K, W> state) throws Exception {
-        StateMerging.prefetchBags(state, bufferTag);
-      }
-
-      @Override
-      public void onMerge(OnMergeContext c) throws Exception {
-        StateMerging.mergeBags(c.state(), bufferTag);
-      }
-    };
-  }
-
-  /**
-   * Create a factory that produces {@link SystemReduceFn} instances that combine all of the input
-   * values using a {@link CombineFn}.
-   */
-  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> SystemReduceFn<K, InputT,
-      AccumT, OutputT, W>
-      combining(
-          final Coder<K> keyCoder, final AppliedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
-    final StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> bufferTag;
-    if (combineFn.getFn() instanceof KeyedCombineFnWithContext) {
-      bufferTag = StateTags.makeSystemTagInternal(
-          StateTags.<K, InputT, AccumT, OutputT>keyedCombiningValueWithContext(
-              BUFFER_NAME, combineFn.getAccumulatorCoder(),
-              (KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>) combineFn.getFn()));
-
-    } else {
-      bufferTag = StateTags.makeSystemTagInternal(
-            StateTags.<K, InputT, AccumT, OutputT>keyedCombiningValue(
-                BUFFER_NAME, combineFn.getAccumulatorCoder(),
-                (KeyedCombineFn<K, InputT, AccumT, OutputT>) combineFn.getFn()));
-    }
-    return new SystemReduceFn<K, InputT, AccumT, OutputT, W>(bufferTag) {
-      @Override
-      public void prefetchOnMerge(MergingStateAccessor<K, W> state) throws Exception {
-        StateMerging.prefetchCombiningValues(state, bufferTag);
-      }
-
-      @Override
-      public void onMerge(OnMergeContext c) throws Exception {
-        StateMerging.mergeCombiningValues(c.state(), bufferTag);
-      }
-    };
-  }
-
-  private StateTag<? super K, ? extends CombiningState<InputT, OutputT>> bufferTag;
-
-  public SystemReduceFn(
-      StateTag<? super K, ? extends CombiningState<InputT, OutputT>> bufferTag) {
-    this.bufferTag = bufferTag;
-  }
-
-  @Override
-  public void processValue(ProcessValueContext c) throws Exception {
-    c.state().access(bufferTag).add(c.value());
-  }
-
-  @Override
-  public void prefetchOnTrigger(StateAccessor<K> state) {
-    state.access(bufferTag).readLater();
-  }
-
-  @Override
-  public void onTrigger(OnTriggerContext c) throws Exception {
-    c.output(c.state().access(bufferTag).read());
-  }
-
-  @Override
-  public void clearState(Context c) throws Exception {
-    c.state().access(bufferTag).clear();
-  }
-
-  @Override
-  public ReadableState<Boolean> isEmpty(StateAccessor<K> state) {
-    return state.access(bufferTag).isEmpty();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
deleted file mode 100644
index 359e157..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TestCredential.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.BearerToken;
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.auth.oauth2.TokenResponse;
-import com.google.api.client.testing.http.MockHttpTransport;
-
-import java.io.IOException;
-
-/**
- * Fake credential, for use in testing.
- */
-public class TestCredential extends Credential {
-
-  private final String token;
-
-  public TestCredential() {
-    this("NULL");
-  }
-
-  public TestCredential(String token) {
-    super(new Builder(
-        BearerToken.authorizationHeaderAccessMethod())
-        .setTransport(new MockHttpTransport()));
-    this.token = token;
-  }
-
-  @Override
-  protected TokenResponse executeRefreshToken() throws IOException {
-    TokenResponse response = new TokenResponse();
-    response.setExpiresInSeconds(5L * 60);
-    response.setAccessToken(token);
-    return response;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
deleted file mode 100644
index 4ff36f7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeDomain.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * {@code TimeDomain} specifies whether an operation is based on
- * timestamps of elements or current "real-world" time as reported while processing.
- */
-public enum TimeDomain {
-  /**
-   * The {@code EVENT_TIME} domain corresponds to the timestamps on the elements. Time advances
-   * on the system watermark advances.
-   */
-  EVENT_TIME,
-
-  /**
-   * The {@code PROCESSING_TIME} domain corresponds to the current to the current (system) time.
-   * This is advanced during execution of the Dataflow pipeline.
-   */
-  PROCESSING_TIME,
-
-  /**
-   * Same as the {@code PROCESSING_TIME} domain, except it won't fire a timer set for time
-   * {@code T} until all timers from earlier stages set for a time earlier than {@code T} have
-   * fired.
-   */
-  SYNCHRONIZED_PROCESSING_TIME;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
deleted file mode 100644
index 93195a7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimeUtil.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import org.joda.time.DateTime;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.ReadableDuration;
-import org.joda.time.ReadableInstant;
-import org.joda.time.chrono.ISOChronology;
-
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * A helper class for converting between Dataflow API and SDK time
- * representations.
- *
- * <p>Dataflow API times are strings of the form
- * {@code YYYY-MM-dd'T'HH:mm:ss[.nnnn]'Z'}: that is, RFC 3339
- * strings with optional fractional seconds and a 'Z' offset.
- *
- * <p>Dataflow API durations are strings of the form {@code ['-']sssss[.nnnn]'s'}:
- * that is, seconds with optional fractional seconds and a literal 's' at the end.
- *
- * <p>In both formats, fractional seconds are either three digits (millisecond
- * resolution), six digits (microsecond resolution), or nine digits (nanosecond
- * resolution).
- */
-public final class TimeUtil {
-  private TimeUtil() {}  // Non-instantiable.
-
-  private static final Pattern DURATION_PATTERN = Pattern.compile("(\\d+)(?:\\.(\\d+))?s");
-  private static final Pattern TIME_PATTERN =
-      Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d+))?Z");
-
-  /**
-   * Converts a {@link ReadableInstant} into a Dateflow API time value.
-   */
-  public static String toCloudTime(ReadableInstant instant) {
-    // Note that since Joda objects use millisecond resolution, we always
-    // produce either no fractional seconds or fractional seconds with
-    // millisecond resolution.
-
-    // Translate the ReadableInstant to a DateTime with ISOChronology.
-    DateTime time = new DateTime(instant);
-
-    int millis = time.getMillisOfSecond();
-    if (millis == 0) {
-      return String.format("%04d-%02d-%02dT%02d:%02d:%02dZ",
-          time.getYear(),
-          time.getMonthOfYear(),
-          time.getDayOfMonth(),
-          time.getHourOfDay(),
-          time.getMinuteOfHour(),
-          time.getSecondOfMinute());
-    } else {
-      return String.format("%04d-%02d-%02dT%02d:%02d:%02d.%03dZ",
-          time.getYear(),
-          time.getMonthOfYear(),
-          time.getDayOfMonth(),
-          time.getHourOfDay(),
-          time.getMinuteOfHour(),
-          time.getSecondOfMinute(),
-          millis);
-    }
-  }
-
-  /**
-   * Converts a time value received via the Dataflow API into the corresponding
-   * {@link Instant}.
-   * @return the parsed time, or null if a parse error occurs
-   */
-  @Nullable
-  public static Instant fromCloudTime(String time) {
-    Matcher matcher = TIME_PATTERN.matcher(time);
-    if (!matcher.matches()) {
-      return null;
-    }
-    int year = Integer.valueOf(matcher.group(1));
-    int month = Integer.valueOf(matcher.group(2));
-    int day = Integer.valueOf(matcher.group(3));
-    int hour = Integer.valueOf(matcher.group(4));
-    int minute = Integer.valueOf(matcher.group(5));
-    int second = Integer.valueOf(matcher.group(6));
-    int millis = 0;
-
-    String frac = matcher.group(7);
-    if (frac != null) {
-      int fracs = Integer.valueOf(frac);
-      if (frac.length() == 3) {  // millisecond resolution
-        millis = fracs;
-      } else if (frac.length() == 6) {  // microsecond resolution
-        millis = fracs / 1000;
-      } else if (frac.length() == 9) {  // nanosecond resolution
-        millis = fracs / 1000000;
-      } else {
-        return null;
-      }
-    }
-
-    return new DateTime(year, month, day, hour, minute, second, millis,
-        ISOChronology.getInstanceUTC()).toInstant();
-  }
-
-  /**
-   * Converts a {@link ReadableDuration} into a Dataflow API duration string.
-   */
-  public static String toCloudDuration(ReadableDuration duration) {
-    // Note that since Joda objects use millisecond resolution, we always
-    // produce either no fractional seconds or fractional seconds with
-    // millisecond resolution.
-    long millis = duration.getMillis();
-    long seconds = millis / 1000;
-    millis = millis % 1000;
-    if (millis == 0) {
-      return String.format("%ds", seconds);
-    } else {
-      return String.format("%d.%03ds", seconds, millis);
-    }
-  }
-
-  /**
-   * Converts a Dataflow API duration string into a {@link Duration}.
-   * @return the parsed duration, or null if a parse error occurs
-   */
-  @Nullable
-  public static Duration fromCloudDuration(String duration) {
-    Matcher matcher = DURATION_PATTERN.matcher(duration);
-    if (!matcher.matches()) {
-      return null;
-    }
-    long millis = Long.valueOf(matcher.group(1)) * 1000;
-    String frac = matcher.group(2);
-    if (frac != null) {
-      long fracs = Long.valueOf(frac);
-      if (frac.length() == 3) {  // millisecond resolution
-        millis += fracs;
-      } else if (frac.length() == 6) {  // microsecond resolution
-        millis += fracs / 1000;
-      } else if (frac.length() == 9) {  // nanosecond resolution
-        millis += fracs / 1000000;
-      } else {
-        return null;
-      }
-    }
-    return Duration.millis(millis);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
deleted file mode 100644
index c823ed3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TimerInternals.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * Encapsulate interaction with time within the execution environment.
- *
- * <p>This class allows setting and deleting timers, and also retrieving an
- * estimate of the current time.
- */
-public interface TimerInternals {
-
-  /**
-   * Writes out a timer to be fired when the watermark reaches the given
-   * timestamp.
-   *
-   * <p>The combination of {@code namespace}, {@code timestamp} and {@code domain} uniquely
-   * identify a timer. Multiple timers set for the same parameters can be safely deduplicated.
-   */
-  void setTimer(TimerData timerKey);
-
-  /**
-   * Deletes the given timer.
-   */
-  void deleteTimer(TimerData timerKey);
-
-  /**
-   * Returns the current timestamp in the {@link TimeDomain#PROCESSING_TIME} time domain.
-   */
-  Instant currentProcessingTime();
-
-  /**
-   * Returns the current timestamp in the {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} time
-   * domain or {@code null} if unknown.
-   */
-  @Nullable
-  Instant currentSynchronizedProcessingTime();
-
-  /**
-   * Return the current, local input watermark timestamp for this computation
-   * in the {@link TimeDomain#EVENT_TIME} time domain. Return {@code null} if unknown.
-   *
-   * <p>This value:
-   * <ol>
-   * <li>Is monotonically increasing.
-   * <li>May differ between workers due to network and other delays.
-   * <li>Will never be ahead of the global input watermark for this computation. But it
-   * may be arbitrarily behind the global input watermark.
-   * <li>Any element with a timestamp before the local input watermark can be considered
-   * 'locally late' and be subject to special processing or be dropped entirely.
-   * </ol>
-   *
-   * <p>Note that because the local input watermark can be behind the global input watermark,
-   * it is possible for an element to be considered locally on-time even though it is
-   * globally late.
-   */
-  @Nullable
-  Instant currentInputWatermarkTime();
-
-  /**
-   * Return the current, local output watermark timestamp for this computation
-   * in the {@link TimeDomain#EVENT_TIME} time domain. Return {@code null} if unknown.
-   *
-   * <p>This value:
-   * <ol>
-   * <li>Is monotonically increasing.
-   * <li>Will never be ahead of {@link #currentInputWatermarkTime} as returned above.
-   * <li>May differ between workers due to network and other delays.
-   * <li>However will never be behind the global input watermark for any following computation.
-   * </ol>
-   *
-   * <p> In pictures:
-   * <pre>
-   *  |              |       |       |       |
-   *  |              |   D   |   C   |   B   |   A
-   *  |              |       |       |       |
-   * GIWM     <=    GOWM <= LOWM <= LIWM <= GIWM
-   * (next stage)
-   * -------------------------------------------------> event time
-   * </pre>
-   * where
-   * <ul>
-   * <li> LOWM = local output water mark.
-   * <li> GOWM = global output water mark.
-   * <li> GIWM = global input water mark.
-   * <li> LIWM = local input water mark.
-   * <li> A = A globally on-time element.
-   * <li> B = A globally late, but locally on-time element.
-   * <li> C = A locally late element which may still contribute to the timestamp of a pane.
-   * <li> D = A locally late element which cannot contribute to the timestamp of a pane.
-   * </ul>
-   *
-   * <p>Note that if a computation emits an element which is not before the current output watermark
-   * then that element will always appear locally on-time in all following computations. However,
-   * it is possible for an element emitted before the current output watermark to appear locally
-   * on-time in a following computation. Thus we must be careful to never assume locally late data
-   * viewed on the output of a computation remains locally late on the input of a following
-   * computation.
-   */
-  @Nullable
-  Instant currentOutputWatermarkTime();
-
-  /**
-   * Data about a timer as represented within {@link TimerInternals}.
-   */
-  public static class TimerData implements Comparable<TimerData> {
-    private final StateNamespace namespace;
-    private final Instant timestamp;
-    private final TimeDomain domain;
-
-    private TimerData(StateNamespace namespace, Instant timestamp, TimeDomain domain) {
-      this.namespace = checkNotNull(namespace);
-      this.timestamp = checkNotNull(timestamp);
-      this.domain = checkNotNull(domain);
-    }
-
-    public StateNamespace getNamespace() {
-      return namespace;
-    }
-
-    public Instant getTimestamp() {
-      return timestamp;
-    }
-
-    public TimeDomain getDomain() {
-      return domain;
-    }
-
-    /**
-     * Construct the {@code TimerKey} for the given parameters.
-     */
-    public static TimerData of(StateNamespace namespace, Instant timestamp, TimeDomain domain) {
-      return new TimerData(namespace, timestamp, domain);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (this == obj) {
-        return true;
-      }
-
-      if (!(obj instanceof TimerData)) {
-        return false;
-      }
-
-      TimerData that = (TimerData) obj;
-      return Objects.equals(this.domain, that.domain)
-          && this.timestamp.isEqual(that.timestamp)
-          && Objects.equals(this.namespace, that.namespace);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(domain, timestamp, namespace);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("namespace", namespace)
-          .add("timestamp", timestamp)
-          .add("domain", domain)
-          .toString();
-    }
-
-    @Override
-    public int compareTo(TimerData o) {
-      return Long.compare(timestamp.getMillis(), o.getTimestamp().getMillis());
-    }
-  }
-
-  /**
-   * A {@link Coder} for {@link TimerData}.
-   */
-  public class TimerDataCoder extends StandardCoder<TimerData> {
-    private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
-    private static final InstantCoder INSTANT_CODER = InstantCoder.of();
-    private final Coder<? extends BoundedWindow> windowCoder;
-
-    public static TimerDataCoder of(Coder<? extends BoundedWindow> windowCoder) {
-      return new TimerDataCoder(windowCoder);
-    }
-
-    @SuppressWarnings("unchecked")
-    @JsonCreator
-    public static TimerDataCoder of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<?>> components) {
-      Preconditions.checkArgument(components.size() == 1,
-          "Expecting 1 components, got " + components.size());
-      return of((Coder<? extends BoundedWindow>) components.get(0));
-    }
-
-    private TimerDataCoder(Coder<? extends BoundedWindow> windowCoder) {
-      this.windowCoder = windowCoder;
-    }
-
-    @Override
-    public void encode(TimerData timer, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      Context nestedContext = context.nested();
-      STRING_CODER.encode(timer.namespace.stringKey(), outStream, nestedContext);
-      INSTANT_CODER.encode(timer.timestamp, outStream, nestedContext);
-      STRING_CODER.encode(timer.domain.name(), outStream, nestedContext);
-    }
-
-    @Override
-    public TimerData decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      Context nestedContext = context.nested();
-      StateNamespace namespace =
-          StateNamespaces.fromString(STRING_CODER.decode(inStream, nestedContext), windowCoder);
-      Instant timestamp = INSTANT_CODER.decode(inStream, nestedContext);
-      TimeDomain domain = TimeDomain.valueOf(STRING_CODER.decode(inStream, nestedContext));
-      return TimerData.of(namespace, timestamp, domain);
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(windowCoder);
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic("window coder must be deterministic", windowCoder);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java
deleted file mode 100644
index 7d4b4f2..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Timers.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-
-import org.joda.time.Instant;
-
-import javax.annotation.Nullable;
-
-/**
- * Interface for interacting with time.
- */
-@Experimental(Experimental.Kind.TIMERS)
-public interface Timers {
-  /**
-   * Sets a timer to fire when the event time watermark, the current processing time, or
-   * the synchronized processing time watermark surpasses a given timestamp.
-   *
-   * <p>See {@link TimeDomain} for details on the time domains available.
-   *
-   * <p>Timers are not guaranteed to fire immediately, but will be delivered at some time
-   * afterwards.
-   *
-   * <p>An implementation of {@link Timers} implicitly scopes timers that are set - they may
-   * be scoped to a key and window, or a key, window, and trigger, etc.
-   *
-   * @param timestamp the time at which the timer should be delivered
-   * @param timeDomain the domain that the {@code timestamp} applies to
-   */
-  public abstract void setTimer(Instant timestamp, TimeDomain timeDomain);
-
-  /** Removes the timer set in this context for the {@code timestmap} and {@code timeDomain}. */
-  public abstract void deleteTimer(Instant timestamp, TimeDomain timeDomain);
-
-  /** Returns the current processing time. */
-  public abstract Instant currentProcessingTime();
-
-  /** Returns the current synchronized processing time or {@code null} if unknown. */
-  @Nullable
-  public abstract Instant currentSynchronizedProcessingTime();
-
-  /** Returns the current event time or {@code null} if unknown. */
-  @Nullable
-  public abstract Instant currentEventTime();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
deleted file mode 100644
index 15fe286..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Transport.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
-import com.google.api.client.http.HttpRequestInitializer;
-import com.google.api.client.http.HttpTransport;
-import com.google.api.client.json.JsonFactory;
-import com.google.api.client.json.jackson2.JacksonFactory;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.clouddebugger.v2.Clouddebugger;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.storage.Storage;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.hadoop.util.ChainingHttpRequestInitializer;
-import com.google.common.collect.ImmutableList;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.security.GeneralSecurityException;
-
-/**
- * Helpers for cloud communication.
- */
-public class Transport {
-
-  private static class SingletonHelper {
-    /** Global instance of the JSON factory. */
-    private static final JsonFactory JSON_FACTORY;
-
-    /** Global instance of the HTTP transport. */
-    private static final HttpTransport HTTP_TRANSPORT;
-
-    static {
-      try {
-        JSON_FACTORY = JacksonFactory.getDefaultInstance();
-        HTTP_TRANSPORT = GoogleNetHttpTransport.newTrustedTransport();
-      } catch (GeneralSecurityException | IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-
-  public static HttpTransport getTransport() {
-    return SingletonHelper.HTTP_TRANSPORT;
-  }
-
-  public static JsonFactory getJsonFactory() {
-    return SingletonHelper.JSON_FACTORY;
-  }
-
-  private static class ApiComponents {
-    public String rootUrl;
-    public String servicePath;
-
-    public ApiComponents(String root, String path) {
-      this.rootUrl = root;
-      this.servicePath = path;
-    }
-  }
-
-  private static ApiComponents apiComponentsFromUrl(String urlString) {
-    try {
-      URL url = new URL(urlString);
-      String rootUrl = url.getProtocol() + "://" + url.getHost() +
-          (url.getPort() > 0 ? ":" + url.getPort() : "");
-      return new ApiComponents(rootUrl, url.getPath());
-    } catch (MalformedURLException e) {
-      throw new RuntimeException("Invalid URL: " + urlString);
-    }
-  }
-
-  /**
-   * Returns a BigQuery client builder.
-   *
-   * <p>Note: this client's endpoint is <b>not</b> modified by the
-   * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
-   */
-  public static Bigquery.Builder
-      newBigQueryClient(BigQueryOptions options) {
-    return new Bigquery.Builder(getTransport(), getJsonFactory(),
-        chainHttpRequestInitializer(
-            options.getGcpCredential(),
-            // Do not log 404. It clutters the output and is possibly even required by the caller.
-            new RetryHttpRequestInitializer(ImmutableList.of(404))))
-        .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
-  }
-
-  /**
-   * Returns a Pubsub client builder.
-   *
-   * <p>Note: this client's endpoint is <b>not</b> modified by the
-   * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
-   */
-  public static Pubsub.Builder
-      newPubsubClient(DataflowPipelineOptions options) {
-    return new Pubsub.Builder(getTransport(), getJsonFactory(),
-        chainHttpRequestInitializer(
-            options.getGcpCredential(),
-            // Do not log 404. It clutters the output and is possibly even required by the caller.
-            new RetryHttpRequestInitializer(ImmutableList.of(404))))
-        .setRootUrl(options.getPubsubRootUrl())
-        .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
-  }
-
-  /**
-   * Returns a Google Cloud Dataflow client builder.
-   */
-  public static Dataflow.Builder newDataflowClient(DataflowPipelineOptions options) {
-    String servicePath = options.getDataflowEndpoint();
-    ApiComponents components;
-    if (servicePath.contains("://")) {
-      components = apiComponentsFromUrl(servicePath);
-    } else {
-      components = new ApiComponents(options.getApiRootUrl(), servicePath);
-    }
-
-    return new Dataflow.Builder(getTransport(),
-        getJsonFactory(),
-        chainHttpRequestInitializer(
-            options.getGcpCredential(),
-            // Do not log 404. It clutters the output and is possibly even required by the caller.
-            new RetryHttpRequestInitializer(ImmutableList.of(404))))
-        .setApplicationName(options.getAppName())
-        .setRootUrl(components.rootUrl)
-        .setServicePath(components.servicePath)
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
-  }
-
-  public static Clouddebugger.Builder newClouddebuggerClient(DataflowPipelineOptions options) {
-    return new Clouddebugger.Builder(getTransport(),
-        getJsonFactory(),
-        chainHttpRequestInitializer(options.getGcpCredential(), new RetryHttpRequestInitializer()))
-        .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
-  }
-
-  /**
-   * Returns a Dataflow client that does not automatically retry failed
-   * requests.
-   */
-  public static Dataflow.Builder
-      newRawDataflowClient(DataflowPipelineOptions options) {
-    return newDataflowClient(options)
-        .setHttpRequestInitializer(options.getGcpCredential())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
-  }
-
-  /**
-   * Returns a Cloud Storage client builder.
-   *
-   * <p>Note: this client's endpoint is <b>not</b> modified by the
-   * {@link DataflowPipelineDebugOptions#getApiRootUrl()} option.
-   */
-  public static Storage.Builder
-      newStorageClient(GcsOptions options) {
-    String servicePath = options.getGcsEndpoint();
-    Storage.Builder storageBuilder = new Storage.Builder(getTransport(), getJsonFactory(),
-        chainHttpRequestInitializer(
-            options.getGcpCredential(),
-            // Do not log the code 404. Code up the stack will deal with 404's if needed, and
-            // logging it by default clutters the output during file staging.
-            new RetryHttpRequestInitializer(
-                ImmutableList.of(404), new UploadIdResponseInterceptor())))
-        .setApplicationName(options.getAppName())
-        .setGoogleClientRequestInitializer(options.getGoogleApiTrace());
-    if (servicePath != null) {
-      ApiComponents components = apiComponentsFromUrl(servicePath);
-      storageBuilder.setRootUrl(components.rootUrl);
-      storageBuilder.setServicePath(components.servicePath);
-    }
-    return storageBuilder;
-  }
-
-  private static HttpRequestInitializer chainHttpRequestInitializer(
-      Credential credential, HttpRequestInitializer httpRequestInitializer) {
-    if (credential == null) {
-      return httpRequestInitializer;
-    } else {
-      return new ChainingHttpRequestInitializer(credential, httpRequestInitializer);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
deleted file mode 100644
index 64ff402..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerContextFactory.java
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.MergingTriggerInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.TriggerInfo;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.common.base.Predicate;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Maps;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * Factory for creating instances of the various {@link Trigger} contexts.
- *
- * <p>These contexts are highly interdependent and share many fields; it is inadvisable
- * to create them via any means other than this factory class.
- */
-public class TriggerContextFactory<W extends BoundedWindow> {
-
-  private final WindowingStrategy<?, W> windowingStrategy;
-  private StateInternals<?> stateInternals;
-  // Future triggers may be able to exploit the active window to state address window mapping.
-  @SuppressWarnings("unused")
-  private ActiveWindowSet<W> activeWindows;
-  private final Coder<W> windowCoder;
-
-  public TriggerContextFactory(WindowingStrategy<?, W> windowingStrategy,
-      StateInternals<?> stateInternals, ActiveWindowSet<W> activeWindows) {
-    this.windowingStrategy = windowingStrategy;
-    this.stateInternals = stateInternals;
-    this.activeWindows = activeWindows;
-    this.windowCoder = windowingStrategy.getWindowFn().windowCoder();
-  }
-
-  public Trigger<W>.TriggerContext base(W window, Timers timers,
-      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet) {
-    return new TriggerContextImpl(window, timers, rootTrigger, finishedSet);
-  }
-
-  public Trigger<W>.OnElementContext createOnElementContext(
-      W window, Timers timers, Instant elementTimestamp,
-      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet) {
-    return new OnElementContextImpl(window, timers, rootTrigger, finishedSet, elementTimestamp);
-  }
-
-  public Trigger<W>.OnMergeContext createOnMergeContext(W window, Timers timers,
-      ExecutableTrigger<W> rootTrigger, FinishedTriggers finishedSet,
-      Map<W, FinishedTriggers> finishedSets) {
-    return new OnMergeContextImpl(window, timers, rootTrigger, finishedSet, finishedSets);
-  }
-
-  public StateAccessor<?> createStateAccessor(W window, ExecutableTrigger<W> trigger) {
-    return new StateAccessorImpl(window, trigger);
-  }
-
-  public MergingStateAccessor<?, W> createMergingStateAccessor(
-      W mergeResult, Collection<W> mergingWindows, ExecutableTrigger<W> trigger) {
-    return new MergingStateAccessorImpl(trigger, mergingWindows, mergeResult);
-  }
-
-  private class TriggerInfoImpl implements Trigger.TriggerInfo<W> {
-
-    protected final ExecutableTrigger<W> trigger;
-    protected final FinishedTriggers finishedSet;
-    private final Trigger<W>.TriggerContext context;
-
-    public TriggerInfoImpl(ExecutableTrigger<W> trigger, FinishedTriggers finishedSet,
-        Trigger<W>.TriggerContext context) {
-      this.trigger = trigger;
-      this.finishedSet = finishedSet;
-      this.context = context;
-    }
-
-    @Override
-    public boolean isMerging() {
-      return !windowingStrategy.getWindowFn().isNonMerging();
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> subTriggers() {
-      return trigger.subTriggers();
-    }
-
-    @Override
-    public ExecutableTrigger<W> subTrigger(int subtriggerIndex) {
-      return trigger.subTriggers().get(subtriggerIndex);
-    }
-
-    @Override
-    public boolean isFinished() {
-      return finishedSet.isFinished(trigger);
-    }
-
-    @Override
-    public boolean isFinished(int subtriggerIndex) {
-      return finishedSet.isFinished(subTrigger(subtriggerIndex));
-    }
-
-    @Override
-    public boolean areAllSubtriggersFinished() {
-      return Iterables.isEmpty(unfinishedSubTriggers());
-    }
-
-    @Override
-    public Iterable<ExecutableTrigger<W>> unfinishedSubTriggers() {
-      return FluentIterable
-          .from(trigger.subTriggers())
-          .filter(new Predicate<ExecutableTrigger<W>>() {
-            @Override
-            public boolean apply(ExecutableTrigger<W> trigger) {
-              return !finishedSet.isFinished(trigger);
-            }
-          });
-    }
-
-    @Override
-    public ExecutableTrigger<W> firstUnfinishedSubTrigger() {
-      for (ExecutableTrigger<W> subTrigger : trigger.subTriggers()) {
-        if (!finishedSet.isFinished(subTrigger)) {
-          return subTrigger;
-        }
-      }
-      return null;
-    }
-
-    @Override
-    public void resetTree() throws Exception {
-      finishedSet.clearRecursively(trigger);
-      trigger.invokeClear(context);
-    }
-
-    @Override
-    public void setFinished(boolean finished) {
-      finishedSet.setFinished(trigger, finished);
-    }
-
-    @Override
-    public void setFinished(boolean finished, int subTriggerIndex) {
-      finishedSet.setFinished(subTrigger(subTriggerIndex), finished);
-    }
-  }
-
-  private class TriggerTimers implements Timers {
-
-    private final Timers timers;
-    private final W window;
-
-    public TriggerTimers(W window, Timers timers) {
-      this.timers = timers;
-      this.window = window;
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain timeDomain) {
-      timers.setTimer(timestamp, timeDomain);
-    }
-
-    @Override
-    public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
-      if (timeDomain == TimeDomain.EVENT_TIME
-          && timestamp.equals(window.maxTimestamp())) {
-        // Don't allow triggers to unset the at-max-timestamp timer. This is necessary for on-time
-        // state transitions.
-        return;
-      }
-      timers.deleteTimer(timestamp, timeDomain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timers.currentProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentSynchronizedProcessingTime() {
-      return timers.currentSynchronizedProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentEventTime() {
-      return timers.currentEventTime();
-    }
-  }
-
-  private class MergingTriggerInfoImpl
-      extends TriggerInfoImpl implements Trigger.MergingTriggerInfo<W> {
-
-    private final Map<W, FinishedTriggers> finishedSets;
-
-    public MergingTriggerInfoImpl(
-        ExecutableTrigger<W> trigger,
-        FinishedTriggers finishedSet,
-        Trigger<W>.TriggerContext context,
-        Map<W, FinishedTriggers> finishedSets) {
-      super(trigger, finishedSet, context);
-      this.finishedSets = finishedSets;
-    }
-
-    @Override
-    public boolean finishedInAnyMergingWindow() {
-      for (FinishedTriggers finishedSet : finishedSets.values()) {
-        if (finishedSet.isFinished(trigger)) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    @Override
-    public boolean finishedInAllMergingWindows() {
-      for (FinishedTriggers finishedSet : finishedSets.values()) {
-        if (!finishedSet.isFinished(trigger)) {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    @Override
-    public Iterable<W> getFinishedMergingWindows() {
-      return Maps.filterValues(finishedSets, new Predicate<FinishedTriggers>() {
-        @Override
-        public boolean apply(FinishedTriggers finishedSet) {
-          return finishedSet.isFinished(trigger);
-        }
-      }).keySet();
-    }
-  }
-
-  private class StateAccessorImpl implements StateAccessor<Object> {
-    protected final int triggerIndex;
-    protected final StateNamespace windowNamespace;
-
-    public StateAccessorImpl(
-        W window,
-        ExecutableTrigger<W> trigger) {
-      this.triggerIndex = trigger.getTriggerIndex();
-      this.windowNamespace = namespaceFor(window);
-    }
-
-    protected StateNamespace namespaceFor(W window) {
-      return StateNamespaces.windowAndTrigger(windowCoder, window, triggerIndex);
-    }
-
-    @Override
-    public <StateT extends State> StateT access(StateTag<? super Object, StateT> address) {
-      return stateInternals.state(windowNamespace, address);
-    }
-  }
-
-  private class MergingStateAccessorImpl extends StateAccessorImpl
-  implements MergingStateAccessor<Object, W> {
-    private final Collection<W> activeToBeMerged;
-
-    public MergingStateAccessorImpl(ExecutableTrigger<W> trigger, Collection<W> activeToBeMerged,
-        W mergeResult) {
-      super(mergeResult, trigger);
-      this.activeToBeMerged = activeToBeMerged;
-    }
-
-    @Override
-    public <StateT extends State> StateT access(
-        StateTag<? super Object, StateT> address) {
-      return stateInternals.state(windowNamespace, address);
-    }
-
-    @Override
-    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-        StateTag<? super Object, StateT> address) {
-      ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
-      for (W mergingWindow : activeToBeMerged) {
-        StateT stateForWindow = stateInternals.state(namespaceFor(mergingWindow), address);
-        builder.put(mergingWindow, stateForWindow);
-      }
-      return builder.build();
-    }
-  }
-
-  private class TriggerContextImpl extends Trigger<W>.TriggerContext {
-
-    private final W window;
-    private final StateAccessorImpl state;
-    private final Timers timers;
-    private final TriggerInfoImpl triggerInfo;
-
-    private TriggerContextImpl(
-        W window,
-        Timers timers,
-        ExecutableTrigger<W> trigger,
-        FinishedTriggers finishedSet) {
-      trigger.getSpec().super();
-      this.window = window;
-      this.state = new StateAccessorImpl(window, trigger);
-      this.timers = new TriggerTimers(window, timers);
-      this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
-    }
-
-    @Override
-    public Trigger<W>.TriggerContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new TriggerContextImpl(window, timers, trigger, triggerInfo.finishedSet);
-    }
-
-    @Override
-    public TriggerInfo<W> trigger() {
-      return triggerInfo;
-    }
-
-    @Override
-    public StateAccessor state() {
-      return state;
-    }
-
-    @Override
-    public W window() {
-      return window;
-    }
-
-    @Override
-    public void deleteTimer(Instant timestamp, TimeDomain domain) {
-      timers.deleteTimer(timestamp, domain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timers.currentProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentSynchronizedProcessingTime() {
-      return timers.currentSynchronizedProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentEventTime() {
-      return timers.currentEventTime();
-    }
-  }
-
-  private class OnElementContextImpl extends Trigger<W>.OnElementContext {
-
-    private final W window;
-    private final StateAccessorImpl state;
-    private final Timers timers;
-    private final TriggerInfoImpl triggerInfo;
-    private final Instant eventTimestamp;
-
-    private OnElementContextImpl(
-        W window,
-        Timers timers,
-        ExecutableTrigger<W> trigger,
-        FinishedTriggers finishedSet,
-        Instant eventTimestamp) {
-      trigger.getSpec().super();
-      this.window = window;
-      this.state = new StateAccessorImpl(window, trigger);
-      this.timers = new TriggerTimers(window, timers);
-      this.triggerInfo = new TriggerInfoImpl(trigger, finishedSet, this);
-      this.eventTimestamp = eventTimestamp;
-    }
-
-
-    @Override
-    public Instant eventTimestamp() {
-      return eventTimestamp;
-    }
-
-    @Override
-    public Trigger<W>.OnElementContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnElementContextImpl(
-          window, timers, trigger, triggerInfo.finishedSet, eventTimestamp);
-    }
-
-    @Override
-    public TriggerInfo<W> trigger() {
-      return triggerInfo;
-    }
-
-    @Override
-    public StateAccessor state() {
-      return state;
-    }
-
-    @Override
-    public W window() {
-      return window;
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain domain) {
-      timers.setTimer(timestamp, domain);
-    }
-
-
-    @Override
-    public void deleteTimer(Instant timestamp, TimeDomain domain) {
-      timers.deleteTimer(timestamp, domain);
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timers.currentProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentSynchronizedProcessingTime() {
-      return timers.currentSynchronizedProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentEventTime() {
-      return timers.currentEventTime();
-    }
-  }
-
-  private class OnMergeContextImpl extends Trigger<W>.OnMergeContext {
-    private final MergingStateAccessor<?, W> state;
-    private final W window;
-    private final Collection<W> mergingWindows;
-    private final Timers timers;
-    private final MergingTriggerInfoImpl triggerInfo;
-
-    private OnMergeContextImpl(
-        W window,
-        Timers timers,
-        ExecutableTrigger<W> trigger,
-        FinishedTriggers finishedSet,
-        Map<W, FinishedTriggers> finishedSets) {
-      trigger.getSpec().super();
-      this.mergingWindows = finishedSets.keySet();
-      this.window = window;
-      this.state = new MergingStateAccessorImpl(trigger, mergingWindows, window);
-      this.timers = new TriggerTimers(window, timers);
-      this.triggerInfo = new MergingTriggerInfoImpl(trigger, finishedSet, this, finishedSets);
-    }
-
-    @Override
-    public Trigger<W>.OnMergeContext forTrigger(ExecutableTrigger<W> trigger) {
-      return new OnMergeContextImpl(
-          window, timers, trigger, triggerInfo.finishedSet, triggerInfo.finishedSets);
-    }
-
-    @Override
-    public MergingStateAccessor<?, W> state() {
-      return state;
-    }
-
-    @Override
-    public MergingTriggerInfo<W> trigger() {
-      return triggerInfo;
-    }
-
-    @Override
-    public W window() {
-      return window;
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain domain) {
-      timers.setTimer(timestamp, domain);
-    }
-
-    @Override
-    public void deleteTimer(Instant timestamp, TimeDomain domain) {
-      timers.setTimer(timestamp, domain);
-
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timers.currentProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentSynchronizedProcessingTime() {
-      return timers.currentSynchronizedProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentEventTime() {
-      return timers.currentEventTime();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
deleted file mode 100644
index dcfd035..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/TriggerRunner.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableMap;
-
-import org.joda.time.Instant;
-
-import java.util.BitSet;
-import java.util.Collection;
-import java.util.Map;
-
-/**
- * Executes a trigger while managing persistence of information about which subtriggers are
- * finished. Subtriggers include all recursive trigger expressions as well as the entire trigger.
- *
- * <p>Specifically, the responsibilities are:
- *
- * <ul>
- *   <li>Invoking the trigger's methods via its {@link ExecutableTrigger} wrapper by
- *       constructing the appropriate trigger contexts.</li>
- *   <li>Committing a record of which subtriggers are finished to persistent state.</li>
- *   <li>Restoring the record of which subtriggers are finished from persistent state.</li>
- *   <li>Clearing out the persisted finished set when a caller indicates
- *       (via {#link #clearFinished}) that it is no longer needed.</li>
- * </ul>
- *
- * <p>These responsibilities are intertwined: trigger contexts include mutable information about
- * which subtriggers are finished. This class provides the information when building the contexts
- * and commits the information when the method of the {@link ExecutableTrigger} returns.
- *
- * @param <W> The kind of windows being processed.
- */
-public class TriggerRunner<W extends BoundedWindow> {
-  @VisibleForTesting
-  static final StateTag<Object, ValueState<BitSet>> FINISHED_BITS_TAG =
-      StateTags.makeSystemTagInternal(StateTags.value("closed", BitSetCoder.of()));
-
-  private final ExecutableTrigger<W> rootTrigger;
-  private final TriggerContextFactory<W> contextFactory;
-
-  public TriggerRunner(ExecutableTrigger<W> rootTrigger, TriggerContextFactory<W> contextFactory) {
-    Preconditions.checkState(rootTrigger.getTriggerIndex() == 0);
-    this.rootTrigger = rootTrigger;
-    this.contextFactory = contextFactory;
-  }
-
-  private FinishedTriggersBitSet readFinishedBits(ValueState<BitSet> state) {
-    if (!isFinishedSetNeeded()) {
-      // If no trigger in the tree will ever have finished bits, then we don't need to read them.
-      // So that the code can be agnostic to that fact, we create a BitSet that is all 0 (not
-      // finished) for each trigger in the tree.
-      return FinishedTriggersBitSet.emptyWithCapacity(rootTrigger.getFirstIndexAfterSubtree());
-    }
-
-    BitSet bitSet = state.read();
-    return bitSet == null
-        ? FinishedTriggersBitSet.emptyWithCapacity(rootTrigger.getFirstIndexAfterSubtree())
-            : FinishedTriggersBitSet.fromBitSet(bitSet);
-  }
-
-  /** Return true if the trigger is closed in the window corresponding to the specified state. */
-  public boolean isClosed(StateAccessor<?> state) {
-    return readFinishedBits(state.access(FINISHED_BITS_TAG)).isFinished(rootTrigger);
-  }
-
-  public void prefetchForValue(W window, StateAccessor<?> state) {
-    if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).readLater();
-    }
-    rootTrigger.getSpec().prefetchOnElement(
-        contextFactory.createStateAccessor(window, rootTrigger));
-  }
-
-  public void prefetchOnFire(W window, StateAccessor<?> state) {
-    if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).readLater();
-    }
-    rootTrigger.getSpec().prefetchOnFire(contextFactory.createStateAccessor(window, rootTrigger));
-  }
-
-  public void prefetchShouldFire(W window, StateAccessor<?> state) {
-    if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).readLater();
-    }
-    rootTrigger.getSpec().prefetchShouldFire(
-        contextFactory.createStateAccessor(window, rootTrigger));
-  }
-
-  /**
-   * Run the trigger logic to deal with a new value.
-   */
-  public void processValue(W window, Instant timestamp, Timers timers, StateAccessor<?> state)
-      throws Exception {
-    // Clone so that we can detect changes and so that changes here don't pollute merging.
-    FinishedTriggersBitSet finishedSet =
-        readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
-    Trigger<W>.OnElementContext triggerContext = contextFactory.createOnElementContext(
-        window, timers, timestamp, rootTrigger, finishedSet);
-    rootTrigger.invokeOnElement(triggerContext);
-    persistFinishedSet(state, finishedSet);
-  }
-
-  public void prefetchForMerge(
-      W window, Collection<W> mergingWindows, MergingStateAccessor<?, W> state) {
-    if (isFinishedSetNeeded()) {
-      for (ValueState<?> value : state.accessInEachMergingWindow(FINISHED_BITS_TAG).values()) {
-        value.readLater();
-      }
-    }
-    rootTrigger.getSpec().prefetchOnMerge(contextFactory.createMergingStateAccessor(
-        window, mergingWindows, rootTrigger));
-  }
-
-  /**
-   * Run the trigger merging logic as part of executing the specified merge.
-   */
-  public void onMerge(W window, Timers timers, MergingStateAccessor<?, W> state) throws Exception {
-    // Clone so that we can detect changes and so that changes here don't pollute merging.
-    FinishedTriggersBitSet finishedSet =
-        readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
-
-    // And read the finished bits in each merging window.
-    ImmutableMap.Builder<W, FinishedTriggers> builder = ImmutableMap.builder();
-    for (Map.Entry<W, ValueState<BitSet>> entry :
-        state.accessInEachMergingWindow(FINISHED_BITS_TAG).entrySet()) {
-      // Don't need to clone these, since the trigger context doesn't allow modification
-      builder.put(entry.getKey(), readFinishedBits(entry.getValue()));
-    }
-    ImmutableMap<W, FinishedTriggers> mergingFinishedSets = builder.build();
-
-    Trigger<W>.OnMergeContext mergeContext = contextFactory.createOnMergeContext(
-        window, timers, rootTrigger, finishedSet, mergingFinishedSets);
-
-    // Run the merge from the trigger
-    rootTrigger.invokeOnMerge(mergeContext);
-
-    persistFinishedSet(state, finishedSet);
-
-    // Clear the finished bits.
-    clearFinished(state);
-  }
-
-  public boolean shouldFire(W window, Timers timers, StateAccessor<?> state) throws Exception {
-    FinishedTriggers finishedSet = readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
-    Trigger<W>.TriggerContext context = contextFactory.base(window, timers,
-        rootTrigger, finishedSet);
-    return rootTrigger.invokeShouldFire(context);
-  }
-
-  public void onFire(W window, Timers timers, StateAccessor<?> state) throws Exception {
-    FinishedTriggersBitSet finishedSet =
-        readFinishedBits(state.access(FINISHED_BITS_TAG)).copy();
-    Trigger<W>.TriggerContext context = contextFactory.base(window, timers,
-        rootTrigger, finishedSet);
-    rootTrigger.invokeOnFire(context);
-    persistFinishedSet(state, finishedSet);
-  }
-
-  private void persistFinishedSet(
-      StateAccessor<?> state, FinishedTriggersBitSet modifiedFinishedSet) {
-    if (!isFinishedSetNeeded()) {
-      return;
-    }
-
-    ValueState<BitSet> finishedSetState = state.access(FINISHED_BITS_TAG);
-    if (!readFinishedBits(finishedSetState).equals(modifiedFinishedSet)) {
-      if (modifiedFinishedSet.getBitSet().isEmpty()) {
-        finishedSetState.clear();
-      } else {
-        finishedSetState.write(modifiedFinishedSet.getBitSet());
-      }
-    }
-  }
-
-  /**
-   * Clear finished bits.
-   */
-  public void clearFinished(StateAccessor<?> state) {
-    if (isFinishedSetNeeded()) {
-      state.access(FINISHED_BITS_TAG).clear();
-    }
-  }
-
-  /**
-   * Clear the state used for executing triggers, but leave the finished set to indicate
-   * the window is closed.
-   */
-  public void clearState(W window, Timers timers, StateAccessor<?> state) throws Exception {
-    // Don't need to clone, because we'll be clearing the finished bits anyways.
-    FinishedTriggers finishedSet = readFinishedBits(state.access(FINISHED_BITS_TAG));
-    rootTrigger.invokeClear(contextFactory.base(window, timers, rootTrigger, finishedSet));
-  }
-
-  private boolean isFinishedSetNeeded() {
-    // TODO: If we know that no trigger in the tree will ever finish, we don't need to do the
-    // lookup. Right now, we special case this for the DefaultTrigger.
-    return !(rootTrigger.getSpec() instanceof DefaultTrigger);
-  }
-}

[43/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
deleted file mode 100644
index f618bc9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/DatastoreIO.java
+++ /dev/null
@@ -1,957 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.api.services.datastore.DatastoreV1.PropertyFilter.Operator.EQUAL;
-import static com.google.api.services.datastore.DatastoreV1.PropertyOrder.Direction.DESCENDING;
-import static com.google.api.services.datastore.DatastoreV1.QueryResultBatch.MoreResultsType.NOT_FINISHED;
-import static com.google.api.services.datastore.client.DatastoreHelper.getPropertyMap;
-import static com.google.api.services.datastore.client.DatastoreHelper.makeFilter;
-import static com.google.api.services.datastore.client.DatastoreHelper.makeOrder;
-import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Verify.verify;
-
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.datastore.DatastoreV1.CommitRequest;
-import com.google.api.services.datastore.DatastoreV1.Entity;
-import com.google.api.services.datastore.DatastoreV1.EntityResult;
-import com.google.api.services.datastore.DatastoreV1.Key;
-import com.google.api.services.datastore.DatastoreV1.Key.PathElement;
-import com.google.api.services.datastore.DatastoreV1.PartitionId;
-import com.google.api.services.datastore.DatastoreV1.Query;
-import com.google.api.services.datastore.DatastoreV1.QueryResultBatch;
-import com.google.api.services.datastore.DatastoreV1.RunQueryRequest;
-import com.google.api.services.datastore.DatastoreV1.RunQueryResponse;
-import com.google.api.services.datastore.client.Datastore;
-import com.google.api.services.datastore.client.DatastoreException;
-import com.google.api.services.datastore.client.DatastoreFactory;
-import com.google.api.services.datastore.client.DatastoreHelper;
-import com.google.api.services.datastore.client.DatastoreOptions;
-import com.google.api.services.datastore.client.QuerySplitter;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.EntityCoder;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
-import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
-import com.google.cloud.dataflow.sdk.io.Sink.Writer;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.RetryHttpRequestInitializer;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-import com.google.common.primitives.Ints;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * <p>{@link DatastoreIO} provides an API to Read and Write {@link PCollection PCollections} of
- * <a href="https://developers.google.com/datastore/">Google Cloud Datastore</a>
- * {@link Entity} objects.
- *
- * <p>Google Cloud Datastore is a fully managed NoSQL data storage service.
- * An {@code Entity} is an object in Datastore, analogous to a row in traditional
- * database table.
- *
- * <p>This API currently requires an authentication workaround. To use {@link DatastoreIO}, users
- * must use the {@code gcloud} command line tool to get credentials for Datastore:
- * <pre>
- * $ gcloud auth login
- * </pre>
- *
- * <p>To read a {@link PCollection} from a query to Datastore, use {@link DatastoreIO#source} and
- * its methods {@link DatastoreIO.Source#withDataset} and {@link DatastoreIO.Source#withQuery} to
- * specify the dataset to query and the query to read from. You can optionally provide a namespace
- * to query within using {@link DatastoreIO.Source#withNamespace} or a Datastore host using
- * {@link DatastoreIO.Source#withHost}.
- *
- * <p>For example:
- *
- * <pre> {@code
- * // Read a query from Datastore
- * PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
- * Query query = ...;
- * String dataset = "...";
- *
- * Pipeline p = Pipeline.create(options);
- * PCollection<Entity> entities = p.apply(
- *     Read.from(DatastoreIO.source()
- *         .withDataset(datasetId)
- *         .withQuery(query)
- *         .withHost(host)));
- * } </pre>
- *
- * <p>or:
- *
- * <pre> {@code
- * // Read a query from Datastore using the default namespace and host
- * PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
- * Query query = ...;
- * String dataset = "...";
- *
- * Pipeline p = Pipeline.create(options);
- * PCollection<Entity> entities = p.apply(DatastoreIO.readFrom(datasetId, query));
- * p.run();
- * } </pre>
- *
- * <p><b>Note:</b> Normally, a Cloud Dataflow job will read from Cloud Datastore in parallel across
- * many workers. However, when the {@link Query} is configured with a limit using
- * {@link com.google.api.services.datastore.DatastoreV1.Query.Builder#setLimit(int)}, then
- * all returned results will be read by a single Dataflow worker in order to ensure correct data.
- *
- * <p>To write a {@link PCollection} to a Datastore, use {@link DatastoreIO#writeTo},
- * specifying the datastore to write to:
- *
- * <pre> {@code
- * PCollection<Entity> entities = ...;
- * entities.apply(DatastoreIO.writeTo(dataset));
- * p.run();
- * } </pre>
- *
- * <p>To optionally change the host that is used to write to the Datastore, use {@link
- * DatastoreIO#sink} to build a {@link DatastoreIO.Sink} and write to it using the {@link Write}
- * transform:
- *
- * <pre> {@code
- * PCollection<Entity> entities = ...;
- * entities.apply(Write.to(DatastoreIO.sink().withDataset(dataset).withHost(host)));
- * } </pre>
- *
- * <p>{@link Entity Entities} in the {@code PCollection} to be written must have complete
- * {@link Key Keys}. Complete {@code Keys} specify the {@code name} and {@code id} of the
- * {@code Entity}, where incomplete {@code Keys} do not. A {@code namespace} other than the
- * project default may be written to by specifying it in the {@code Entity} {@code Keys}.
- *
- * <pre>{@code
- * Key.Builder keyBuilder = DatastoreHelper.makeKey(...);
- * keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
- * }</pre>
- *
- * <p>{@code Entities} will be committed as upsert (update or insert) mutations. Please read
- * <a href="https://cloud.google.com/datastore/docs/concepts/entities">Entities, Properties, and
- * Keys</a> for more information about {@code Entity} keys.
- *
- * <p><h3>Permissions</h3>
- * Permission requirements depend on the {@code PipelineRunner} that is used to execute the
- * Dataflow job. Please refer to the documentation of corresponding {@code PipelineRunner}s for
- * more details.
- *
- * <p>Please see <a href="https://cloud.google.com/datastore/docs/activate">Cloud Datastore Sign Up
- * </a>for security and permission related information specific to Datastore.
- *
- * @see com.google.cloud.dataflow.sdk.runners.PipelineRunner
- */
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public class DatastoreIO {
-  public static final String DEFAULT_HOST = "https://www.googleapis.com";
-
-  /**
-   * Datastore has a limit of 500 mutations per batch operation, so we flush
-   * changes to Datastore every 500 entities.
-   */
-  public static final int DATASTORE_BATCH_UPDATE_LIMIT = 500;
-
-  /**
-   * Returns an empty {@link DatastoreIO.Source} builder with the default {@code host}.
-   * Configure the {@code dataset}, {@code query}, and {@code namespace} using
-   * {@link DatastoreIO.Source#withDataset}, {@link DatastoreIO.Source#withQuery},
-   * and {@link DatastoreIO.Source#withNamespace}.
-   *
-   * @deprecated the name and return type do not match. Use {@link #source()}.
-   */
-  @Deprecated
-  public static Source read() {
-    return source();
-  }
-
-  /**
-   * Returns an empty {@link DatastoreIO.Source} builder with the default {@code host}.
-   * Configure the {@code dataset}, {@code query}, and {@code namespace} using
-   * {@link DatastoreIO.Source#withDataset}, {@link DatastoreIO.Source#withQuery},
-   * and {@link DatastoreIO.Source#withNamespace}.
-   *
-   * <p>The resulting {@link Source} object can be passed to {@link Read} to create a
-   * {@code PTransform} that will read from Datastore.
-   */
-  public static Source source() {
-    return new Source(DEFAULT_HOST, null, null, null);
-  }
-
-  /**
-   * Returns a {@code PTransform} that reads Datastore entities from the query
-   * against the given dataset.
-   */
-  public static Read.Bounded<Entity> readFrom(String datasetId, Query query) {
-    return Read.from(new Source(DEFAULT_HOST, datasetId, query, null));
-  }
-
-  /**
-   * Returns a {@code PTransform} that reads Datastore entities from the query
-   * against the given dataset and host.
-   *
-   * @deprecated prefer {@link #source()} with {@link Source#withHost}, {@link Source#withDataset},
-   *    {@link Source#withQuery}s.
-   */
-  @Deprecated
-  public static Read.Bounded<Entity> readFrom(String host, String datasetId, Query query) {
-    return Read.from(new Source(host, datasetId, query, null));
-  }
-
-  /**
-   * A {@link Source} that reads the result rows of a Datastore query as {@code Entity} objects.
-   */
-  public static class Source extends BoundedSource<Entity> {
-    public String getHost() {
-      return host;
-    }
-
-    public String getDataset() {
-      return datasetId;
-    }
-
-    public Query getQuery() {
-      return query;
-    }
-
-    @Nullable
-    public String getNamespace() {
-      return namespace;
-    }
-
-    public Source withDataset(String datasetId) {
-      checkNotNull(datasetId, "datasetId");
-      return new Source(host, datasetId, query, namespace);
-    }
-
-    /**
-     * Returns a new {@link Source} that reads the results of the specified query.
-     *
-     * <p>Does not modify this object.
-     *
-     * <p><b>Note:</b> Normally, a Cloud Dataflow job will read from Cloud Datastore in parallel
-     * across many workers. However, when the {@link Query} is configured with a limit using
-     * {@link com.google.api.services.datastore.DatastoreV1.Query.Builder#setLimit(int)}, then all
-     * returned results will be read by a single Dataflow worker in order to ensure correct data.
-     */
-    public Source withQuery(Query query) {
-      checkNotNull(query, "query");
-      checkArgument(!query.hasLimit() || query.getLimit() > 0,
-          "Invalid query limit %s: must be positive", query.getLimit());
-      return new Source(host, datasetId, query, namespace);
-    }
-
-    public Source withHost(String host) {
-      checkNotNull(host, "host");
-      return new Source(host, datasetId, query, namespace);
-    }
-
-    public Source withNamespace(@Nullable String namespace) {
-      return new Source(host, datasetId, query, namespace);
-    }
-
-    @Override
-    public Coder<Entity> getDefaultOutputCoder() {
-      return EntityCoder.of();
-    }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) {
-      // TODO: Perhaps this can be implemented by inspecting the query.
-      return false;
-    }
-
-    @Override
-    public List<Source> splitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
-        throws Exception {
-      // Users may request a limit on the number of results. We can currently support this by
-      // simply disabling parallel reads and using only a single split.
-      if (query.hasLimit()) {
-        return ImmutableList.of(this);
-      }
-
-      long numSplits;
-      try {
-        numSplits = Math.round(((double) getEstimatedSizeBytes(options)) / desiredBundleSizeBytes);
-      } catch (Exception e) {
-        // Fallback in case estimated size is unavailable. TODO: fix this, it's horrible.
-
-        // 1. Try Dataflow's numWorkers, which will be 0 for other workers.
-        DataflowPipelineWorkerPoolOptions poolOptions =
-            options.as(DataflowPipelineWorkerPoolOptions.class);
-        if (poolOptions.getNumWorkers() > 0) {
-          LOG.warn("Estimated size of unavailable, using the number of workers {}",
-              poolOptions.getNumWorkers(), e);
-          numSplits = poolOptions.getNumWorkers();
-        } else {
-          // 2. Default to 12 in the unknown case.
-          numSplits = 12;
-        }
-      }
-
-      // If the desiredBundleSize or number of workers results in 1 split, simply return
-      // a source that reads from the original query.
-      if (numSplits <= 1) {
-        return ImmutableList.of(this);
-      }
-
-      List<Query> datastoreSplits;
-      try {
-        datastoreSplits = getSplitQueries(Ints.checkedCast(numSplits), options);
-      } catch (IllegalArgumentException | DatastoreException e) {
-        LOG.warn("Unable to parallelize the given query: {}", query, e);
-        return ImmutableList.of(this);
-      }
-
-      ImmutableList.Builder<Source> splits = ImmutableList.builder();
-      for (Query splitQuery : datastoreSplits) {
-        splits.add(new Source(host, datasetId, splitQuery, namespace));
-      }
-      return splits.build();
-    }
-
-    @Override
-    public BoundedReader<Entity> createReader(PipelineOptions pipelineOptions) throws IOException {
-      return new DatastoreReader(this, getDatastore(pipelineOptions));
-    }
-
-    @Override
-    public void validate() {
-      Preconditions.checkNotNull(host, "host");
-      Preconditions.checkNotNull(query, "query");
-      Preconditions.checkNotNull(datasetId, "datasetId");
-    }
-
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-      // Datastore provides no way to get a good estimate of how large the result of a query
-      // will be. As a rough approximation, we attempt to fetch the statistics of the whole
-      // entity kind being queried, using the __Stat_Kind__ system table, assuming exactly 1 kind
-      // is specified in the query.
-      //
-      // See https://cloud.google.com/datastore/docs/concepts/stats
-      if (mockEstimateSizeBytes != null) {
-        return mockEstimateSizeBytes;
-      }
-
-      Datastore datastore = getDatastore(options);
-      if (query.getKindCount() != 1) {
-        throw new UnsupportedOperationException(
-            "Can only estimate size for queries specifying exactly 1 kind.");
-      }
-      String ourKind = query.getKind(0).getName();
-      long latestTimestamp = queryLatestStatisticsTimestamp(datastore);
-      Query.Builder query = Query.newBuilder();
-      if (namespace == null) {
-        query.addKindBuilder().setName("__Stat_Kind__");
-      } else {
-        query.addKindBuilder().setName("__Ns_Stat_Kind__");
-      }
-      query.setFilter(makeFilter(
-          makeFilter("kind_name", EQUAL, makeValue(ourKind)).build(),
-          makeFilter("timestamp", EQUAL, makeValue(latestTimestamp)).build()));
-      RunQueryRequest request = makeRequest(query.build());
-
-      long now = System.currentTimeMillis();
-      RunQueryResponse response = datastore.runQuery(request);
-      LOG.info("Query for per-kind statistics took {}ms", System.currentTimeMillis() - now);
-
-      QueryResultBatch batch = response.getBatch();
-      if (batch.getEntityResultCount() == 0) {
-        throw new NoSuchElementException(
-            "Datastore statistics for kind " + ourKind + " unavailable");
-      }
-      Entity entity = batch.getEntityResult(0).getEntity();
-      return getPropertyMap(entity).get("entity_bytes").getIntegerValue();
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("host", host)
-          .add("dataset", datasetId)
-          .add("query", query)
-          .add("namespace", namespace)
-          .toString();
-    }
-
-    ///////////////////////////////////////////////////////////////////////////////////////////
-
-    private static final Logger LOG = LoggerFactory.getLogger(Source.class);
-    private final String host;
-    /** Not really nullable, but it may be {@code null} for in-progress {@code Source}s. */
-    @Nullable
-    private final String datasetId;
-    /** Not really nullable, but it may be {@code null} for in-progress {@code Source}s. */
-    @Nullable
-    private final Query query;
-    @Nullable
-    private final String namespace;
-
-    /** For testing only. TODO: This could be much cleaner with dependency injection. */
-    @Nullable
-    private QuerySplitter mockSplitter;
-    @Nullable
-    private Long mockEstimateSizeBytes;
-
-    /**
-     * Note that only {@code namespace} is really {@code @Nullable}. The other parameters may be
-     * {@code null} as a matter of build order, but if they are {@code null} at instantiation time,
-     * an error will be thrown.
-     */
-    private Source(
-        String host, @Nullable String datasetId, @Nullable Query query,
-        @Nullable String namespace) {
-      this.host = checkNotNull(host, "host");
-      this.datasetId = datasetId;
-      this.query = query;
-      this.namespace = namespace;
-    }
-
-    /**
-     * A helper function to get the split queries, taking into account the optional
-     * {@code namespace} and whether there is a mock splitter.
-     */
-    private List<Query> getSplitQueries(int numSplits, PipelineOptions options)
-        throws DatastoreException {
-      // If namespace is set, include it in the split request so splits are calculated accordingly.
-      PartitionId.Builder partitionBuilder = PartitionId.newBuilder();
-      if (namespace != null) {
-        partitionBuilder.setNamespace(namespace);
-      }
-
-      if (mockSplitter != null) {
-        // For testing.
-        return mockSplitter.getSplits(query, partitionBuilder.build(), numSplits, null);
-      }
-
-      return DatastoreHelper.getQuerySplitter().getSplits(
-          query, partitionBuilder.build(), numSplits, getDatastore(options));
-    }
-
-    /**
-     * Builds a {@link RunQueryRequest} from the {@code query}, using the properties set on this
-     * {@code Source}. For example, sets the {@code namespace} for the request.
-     */
-    private RunQueryRequest makeRequest(Query query) {
-      RunQueryRequest.Builder requestBuilder = RunQueryRequest.newBuilder().setQuery(query);
-      if (namespace != null) {
-        requestBuilder.getPartitionIdBuilder().setNamespace(namespace);
-      }
-      return requestBuilder.build();
-    }
-
-    /**
-     * Datastore system tables with statistics are periodically updated. This method fetches
-     * the latest timestamp of statistics update using the {@code __Stat_Total__} table.
-     */
-    private long queryLatestStatisticsTimestamp(Datastore datastore) throws DatastoreException {
-      Query.Builder query = Query.newBuilder();
-      query.addKindBuilder().setName("__Stat_Total__");
-      query.addOrder(makeOrder("timestamp", DESCENDING));
-      query.setLimit(1);
-      RunQueryRequest request = makeRequest(query.build());
-
-      long now = System.currentTimeMillis();
-      RunQueryResponse response = datastore.runQuery(request);
-      LOG.info("Query for latest stats timestamp of dataset {} took {}ms", datasetId,
-          System.currentTimeMillis() - now);
-      QueryResultBatch batch = response.getBatch();
-      if (batch.getEntityResultCount() == 0) {
-        throw new NoSuchElementException(
-            "Datastore total statistics for dataset " + datasetId + " unavailable");
-      }
-      Entity entity = batch.getEntityResult(0).getEntity();
-      return getPropertyMap(entity).get("timestamp").getTimestampMicrosecondsValue();
-    }
-
-    private Datastore getDatastore(PipelineOptions pipelineOptions) {
-      DatastoreOptions.Builder builder =
-          new DatastoreOptions.Builder().host(host).dataset(datasetId).initializer(
-              new RetryHttpRequestInitializer());
-
-      Credential credential = pipelineOptions.as(GcpOptions.class).getGcpCredential();
-      if (credential != null) {
-        builder.credential(credential);
-      }
-      return DatastoreFactory.get().create(builder.build());
-    }
-
-    /** For testing only. */
-    Source withMockSplitter(QuerySplitter splitter) {
-      Source res = new Source(host, datasetId, query, namespace);
-      res.mockSplitter = splitter;
-      res.mockEstimateSizeBytes = mockEstimateSizeBytes;
-      return res;
-    }
-
-    /** For testing only. */
-    Source withMockEstimateSizeBytes(Long estimateSizeBytes) {
-      Source res = new Source(host, datasetId, query, namespace);
-      res.mockSplitter = mockSplitter;
-      res.mockEstimateSizeBytes = estimateSizeBytes;
-      return res;
-    }
-  }
-
-  ///////////////////// Write Class /////////////////////////////////
-
-  /**
-   * Returns a new {@link DatastoreIO.Sink} builder using the default host.
-   * You need to further configure it using {@link DatastoreIO.Sink#withDataset}, and optionally
-   * {@link DatastoreIO.Sink#withHost} before using it in a {@link Write} transform.
-   *
-   * <p>For example: {@code p.apply(Write.to(DatastoreIO.sink().withDataset(dataset)));}
-   */
-  public static Sink sink() {
-    return new Sink(DEFAULT_HOST, null);
-  }
-
-  /**
-   * Returns a new {@link Write} transform that will write to a {@link Sink}.
-   *
-   * <p>For example: {@code p.apply(DatastoreIO.writeTo(dataset));}
-   */
-  public static Write.Bound<Entity> writeTo(String datasetId) {
-    return Write.to(sink().withDataset(datasetId));
-  }
-
-  /**
-   * A {@link Sink} that writes a {@link PCollection} containing
-   * {@link Entity Entities} to a Datastore kind.
-   *
-   */
-  public static class Sink extends com.google.cloud.dataflow.sdk.io.Sink<Entity> {
-    final String host;
-    final String datasetId;
-
-    /**
-     * Returns a {@link Sink} that is like this one, but will write to the specified dataset.
-     */
-    public Sink withDataset(String datasetId) {
-      checkNotNull(datasetId, "datasetId");
-      return new Sink(host, datasetId);
-    }
-
-    /**
-     * Returns a {@link Sink} that is like this one, but will use the given host.  If not specified,
-     * the {@link DatastoreIO#DEFAULT_HOST default host} will be used.
-     */
-    public Sink withHost(String host) {
-      checkNotNull(host, "host");
-      return new Sink(host, datasetId);
-    }
-
-    /**
-     * Constructs a Sink with given host and dataset.
-     */
-    protected Sink(String host, String datasetId) {
-      this.host = checkNotNull(host, "host");
-      this.datasetId = datasetId;
-    }
-
-    /**
-     * Ensures the host and dataset are set.
-     */
-    @Override
-    public void validate(PipelineOptions options) {
-      Preconditions.checkNotNull(
-          host, "Host is a required parameter. Please use withHost to set the host.");
-      Preconditions.checkNotNull(
-          datasetId,
-          "Dataset ID is a required parameter. Please use withDataset to to set the datasetId.");
-    }
-
-    @Override
-    public DatastoreWriteOperation createWriteOperation(PipelineOptions options) {
-      return new DatastoreWriteOperation(this);
-    }
-  }
-
-  /**
-   * A {@link WriteOperation} that will manage a parallel write to a Datastore sink.
-   */
-  private static class DatastoreWriteOperation
-      extends WriteOperation<Entity, DatastoreWriteResult> {
-    private static final Logger LOG = LoggerFactory.getLogger(DatastoreWriteOperation.class);
-
-    private final DatastoreIO.Sink sink;
-
-    public DatastoreWriteOperation(DatastoreIO.Sink sink) {
-      this.sink = sink;
-    }
-
-    @Override
-    public Coder<DatastoreWriteResult> getWriterResultCoder() {
-      return SerializableCoder.of(DatastoreWriteResult.class);
-    }
-
-    @Override
-    public void initialize(PipelineOptions options) throws Exception {}
-
-    /**
-     * Finalizes the write.  Logs the number of entities written to the Datastore.
-     */
-    @Override
-    public void finalize(Iterable<DatastoreWriteResult> writerResults, PipelineOptions options)
-        throws Exception {
-      long totalEntities = 0;
-      for (DatastoreWriteResult result : writerResults) {
-        totalEntities += result.entitiesWritten;
-      }
-      LOG.info("Wrote {} elements.", totalEntities);
-    }
-
-    @Override
-    public DatastoreWriter createWriter(PipelineOptions options) throws Exception {
-      DatastoreOptions.Builder builder =
-          new DatastoreOptions.Builder()
-              .host(sink.host)
-              .dataset(sink.datasetId)
-              .initializer(new RetryHttpRequestInitializer());
-      Credential credential = options.as(GcpOptions.class).getGcpCredential();
-      if (credential != null) {
-        builder.credential(credential);
-      }
-      Datastore datastore = DatastoreFactory.get().create(builder.build());
-
-      return new DatastoreWriter(this, datastore);
-    }
-
-    @Override
-    public DatastoreIO.Sink getSink() {
-      return sink;
-    }
-  }
-
-  /**
-   * {@link Writer} that writes entities to a Datastore Sink.  Entities are written in batches,
-   * where the maximum batch size is {@link DatastoreIO#DATASTORE_BATCH_UPDATE_LIMIT}.  Entities
-   * are committed as upsert mutations (either update if the key already exists, or insert if it is
-   * a new key).  If an entity does not have a complete key (i.e., it has no name or id), the bundle
-   * will fail.
-   *
-   * <p>See <a
-   * href="https://cloud.google.com/datastore/docs/concepts/entities#Datastore_Creating_an_entity">
-   * Datastore: Entities, Properties, and Keys</a> for information about entity keys and upsert
-   * mutations.
-   *
-   * <p>Commits are non-transactional.  If a commit fails because of a conflict over an entity
-   * group, the commit will be retried (up to {@link DatastoreIO#DATASTORE_BATCH_UPDATE_LIMIT}
-   * times).
-   *
-   * <p>Visible for testing purposes.
-   */
-  static class DatastoreWriter extends Writer<Entity, DatastoreWriteResult> {
-    private static final Logger LOG = LoggerFactory.getLogger(DatastoreWriter.class);
-    private final DatastoreWriteOperation writeOp;
-    private final Datastore datastore;
-    private long totalWritten = 0;
-
-    // Visible for testing.
-    final List<Entity> entities = new ArrayList<>();
-
-    /**
-     * Since a bundle is written in batches, we should retry the commit of a batch in order to
-     * prevent transient errors from causing the bundle to fail.
-     */
-    private static final int MAX_RETRIES = 5;
-
-    /**
-     * Initial backoff time for exponential backoff for retry attempts.
-     */
-    private static final int INITIAL_BACKOFF_MILLIS = 5000;
-
-    /**
-     * Returns true if a Datastore key is complete.  A key is complete if its last element
-     * has either an id or a name.
-     */
-    static boolean isValidKey(Key key) {
-      List<PathElement> elementList = key.getPathElementList();
-      if (elementList.isEmpty()) {
-        return false;
-      }
-      PathElement lastElement = elementList.get(elementList.size() - 1);
-      return (lastElement.hasId() || lastElement.hasName());
-    }
-
-    // Visible for testing
-    DatastoreWriter(DatastoreWriteOperation writeOp, Datastore datastore) {
-      this.writeOp = writeOp;
-      this.datastore = datastore;
-    }
-
-    @Override
-    public void open(String uId) throws Exception {}
-
-    /**
-     * Writes an entity to the Datastore.  Writes are batched, up to {@link
-     * DatastoreIO#DATASTORE_BATCH_UPDATE_LIMIT}. If an entity does not have a complete key, an
-     * {@link IllegalArgumentException} will be thrown.
-     */
-    @Override
-    public void write(Entity value) throws Exception {
-      // Verify that the entity to write has a complete key.
-      if (!isValidKey(value.getKey())) {
-        throw new IllegalArgumentException(
-            "Entities to be written to the Datastore must have complete keys");
-      }
-
-      entities.add(value);
-
-      if (entities.size() >= DatastoreIO.DATASTORE_BATCH_UPDATE_LIMIT) {
-        flushBatch();
-      }
-    }
-
-    /**
-     * Flushes any pending batch writes and returns a DatastoreWriteResult.
-     */
-    @Override
-    public DatastoreWriteResult close() throws Exception {
-      if (entities.size() > 0) {
-        flushBatch();
-      }
-      return new DatastoreWriteResult(totalWritten);
-    }
-
-    @Override
-    public DatastoreWriteOperation getWriteOperation() {
-      return writeOp;
-    }
-
-    /**
-     * Writes a batch of entities to the Datastore.
-     *
-     * <p>If a commit fails, it will be retried (up to {@link DatastoreWriter#MAX_RETRIES}
-     * times).  All entities in the batch will be committed again, even if the commit was partially
-     * successful. If the retry limit is exceeded, the last exception from the Datastore will be
-     * thrown.
-     *
-     * @throws DatastoreException if the commit fails or IOException or InterruptedException if
-     * backing off between retries fails.
-     */
-    private void flushBatch() throws DatastoreException, IOException, InterruptedException {
-      LOG.debug("Writing batch of {} entities", entities.size());
-      Sleeper sleeper = Sleeper.DEFAULT;
-      BackOff backoff = new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_MILLIS);
-
-      while (true) {
-        // Batch upsert entities.
-        try {
-          CommitRequest.Builder commitRequest = CommitRequest.newBuilder();
-          commitRequest.getMutationBuilder().addAllUpsert(entities);
-          commitRequest.setMode(CommitRequest.Mode.NON_TRANSACTIONAL);
-          datastore.commit(commitRequest.build());
-
-          // Break if the commit threw no exception.
-          break;
-
-        } catch (DatastoreException exception) {
-          // Only log the code and message for potentially-transient errors. The entire exception
-          // will be propagated upon the last retry.
-          LOG.error("Error writing to the Datastore ({}): {}", exception.getCode(),
-              exception.getMessage());
-          if (!BackOffUtils.next(sleeper, backoff)) {
-            LOG.error("Aborting after {} retries.", MAX_RETRIES);
-            throw exception;
-          }
-        }
-      }
-      totalWritten += entities.size();
-      LOG.debug("Successfully wrote {} entities", entities.size());
-      entities.clear();
-    }
-  }
-
-  private static class DatastoreWriteResult implements Serializable {
-    final long entitiesWritten;
-
-    public DatastoreWriteResult(long recordsWritten) {
-      this.entitiesWritten = recordsWritten;
-    }
-  }
-
-  /**
-   * A {@link Source.Reader} over the records from a query of the datastore.
-   *
-   * <p>Timestamped records are currently not supported.
-   * All records implicitly have the timestamp of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
-   */
-  public static class DatastoreReader extends BoundedSource.BoundedReader<Entity> {
-    private final Source source;
-
-    /**
-     * Datastore to read from.
-     */
-    private final Datastore datastore;
-
-    /**
-     * True if more results may be available.
-     */
-    private boolean moreResults;
-
-    /**
-     * Iterator over records.
-     */
-    private java.util.Iterator<EntityResult> entities;
-
-    /**
-     * Current batch of query results.
-     */
-    private QueryResultBatch currentBatch;
-
-    /**
-     * Maximum number of results to request per query.
-     *
-     * <p>Must be set, or it may result in an I/O error when querying
-     * Cloud Datastore.
-     */
-    private static final int QUERY_BATCH_LIMIT = 500;
-
-    /**
-     * Remaining user-requested limit on the number of sources to return. If the user did not set a
-     * limit, then this variable will always have the value {@link Integer#MAX_VALUE} and will never
-     * be decremented.
-     */
-    private int userLimit;
-
-    private Entity currentEntity;
-
-    /**
-     * Returns a DatastoreReader with Source and Datastore object set.
-     *
-     * @param datastore a datastore connection to use.
-     */
-    public DatastoreReader(Source source, Datastore datastore) {
-      this.source = source;
-      this.datastore = datastore;
-      // If the user set a limit on the query, remember it. Otherwise pin to MAX_VALUE.
-      userLimit = source.query.hasLimit() ? source.query.getLimit() : Integer.MAX_VALUE;
-    }
-
-    @Override
-    public Entity getCurrent() {
-      return currentEntity;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      return advance();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      if (entities == null || (!entities.hasNext() && moreResults)) {
-        try {
-          entities = getIteratorAndMoveCursor();
-        } catch (DatastoreException e) {
-          throw new IOException(e);
-        }
-      }
-
-      if (entities == null || !entities.hasNext()) {
-        currentEntity = null;
-        return false;
-      }
-
-      currentEntity = entities.next().getEntity();
-      return true;
-    }
-
-    @Override
-    public void close() throws IOException {
-      // Nothing
-    }
-
-    @Override
-    public DatastoreIO.Source getCurrentSource() {
-      return source;
-    }
-
-    @Override
-    public DatastoreIO.Source splitAtFraction(double fraction) {
-      // Not supported.
-      return null;
-    }
-
-    @Override
-    public Double getFractionConsumed() {
-      // Not supported.
-      return null;
-    }
-
-    /**
-     * Returns an iterator over the next batch of records for the query
-     * and updates the cursor to get the next batch as needed.
-     * Query has specified limit and offset from InputSplit.
-     */
-    private Iterator<EntityResult> getIteratorAndMoveCursor() throws DatastoreException {
-      Query.Builder query = source.query.toBuilder().clone();
-      query.setLimit(Math.min(userLimit, QUERY_BATCH_LIMIT));
-      if (currentBatch != null && currentBatch.hasEndCursor()) {
-        query.setStartCursor(currentBatch.getEndCursor());
-      }
-
-      RunQueryRequest request = source.makeRequest(query.build());
-      RunQueryResponse response = datastore.runQuery(request);
-
-      currentBatch = response.getBatch();
-
-      // MORE_RESULTS_AFTER_LIMIT is not implemented yet:
-      // https://groups.google.com/forum/#!topic/gcd-discuss/iNs6M1jA2Vw, so
-      // use result count to determine if more results might exist.
-      int numFetch = currentBatch.getEntityResultCount();
-      if (source.query.hasLimit()) {
-        verify(userLimit >= numFetch,
-            "Expected userLimit %s >= numFetch %s, because query limit %s should be <= userLimit",
-            userLimit, numFetch, query.getLimit());
-        userLimit -= numFetch;
-      }
-      moreResults =
-          // User-limit does not exist (so userLimit == MAX_VALUE) and/or has not been satisfied.
-          (userLimit > 0)
-          // All indications from the API are that there are/may be more results.
-          && ((numFetch == QUERY_BATCH_LIMIT) || (currentBatch.getMoreResults() == NOT_FINISHED));
-
-      // May receive a batch of 0 results if the number of records is a multiple
-      // of the request limit.
-      if (numFetch == 0) {
-        return null;
-      }
-
-      return currentBatch.getEntityResultList().iterator();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
deleted file mode 100644
index dda500c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSink.java
+++ /dev/null
@@ -1,864 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.api.client.googleapis.batch.BatchRequest;
-import com.google.api.client.googleapis.batch.json.JsonBatchCallback;
-import com.google.api.client.googleapis.json.GoogleJsonError;
-import com.google.api.client.http.HttpHeaders;
-import com.google.api.client.http.HttpRequestInitializer;
-import com.google.api.services.storage.Storage;
-import com.google.api.services.storage.StorageRequest;
-import com.google.api.services.storage.model.StorageObject;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.FileIOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.GcsIOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.base.Preconditions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.channels.WritableByteChannel;
-import java.nio.file.Files;
-import java.nio.file.NoSuchFileException;
-import java.nio.file.Paths;
-import java.nio.file.StandardCopyOption;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * Abstract {@link Sink} for file-based output. An implementation of FileBasedSink writes file-based
- * output and defines the format of output files (how values are written, headers/footers, MIME
- * type, etc.).
- *
- * <p>At pipeline construction time, the methods of FileBasedSink are called to validate the sink
- * and to create a {@link Sink.WriteOperation} that manages the process of writing to the sink.
- *
- * <p>The process of writing to file-based sink is as follows:
- * <ol>
- * <li>An optional subclass-defined initialization,
- * <li>a parallel write of bundles to temporary files, and finally,
- * <li>these temporary files are renamed with final output filenames.
- * </ol>
- *
- * <p>Supported file systems are those registered with {@link IOChannelUtils}.
- *
- * @param <T> the type of values written to the sink.
- */
-public abstract class FileBasedSink<T> extends Sink<T> {
-  /**
-   * Base filename for final output files.
-   */
-  protected final String baseOutputFilename;
-
-  /**
-   * The extension to be used for the final output files.
-   */
-  protected final String extension;
-
-  /**
-   * Naming template for output files. See {@link ShardNameTemplate} for a description of
-   * possible naming templates.  Default is {@link ShardNameTemplate#INDEX_OF_MAX}.
-   */
-  protected final String fileNamingTemplate;
-
-  /**
-   * Construct a FileBasedSink with the given base output filename and extension.
-   */
-  public FileBasedSink(String baseOutputFilename, String extension) {
-    this(baseOutputFilename, extension, ShardNameTemplate.INDEX_OF_MAX);
-  }
-
-  /**
-   * Construct a FileBasedSink with the given base output filename, extension, and file naming
-   * template.
-   *
-   * <p>See {@link ShardNameTemplate} for a description of file naming templates.
-   */
-  public FileBasedSink(String baseOutputFilename, String extension, String fileNamingTemplate) {
-    this.baseOutputFilename = baseOutputFilename;
-    this.extension = extension;
-    this.fileNamingTemplate = fileNamingTemplate;
-  }
-
-  /**
-   * Returns the base output filename for this file based sink.
-   */
-  public String getBaseOutputFilename() {
-    return baseOutputFilename;
-  }
-
-  /**
-   * Perform pipeline-construction-time validation. The default implementation is a no-op.
-   * Subclasses should override to ensure the sink is valid and can be written to. It is recommended
-   * to use {@link Preconditions} in the implementation of this method.
-   */
-  @Override
-  public void validate(PipelineOptions options) {}
-
-  /**
-   * Return a subclass of {@link FileBasedSink.FileBasedWriteOperation} that will manage the write
-   * to the sink.
-   */
-  @Override
-  public abstract FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options);
-
-  /**
-   * Abstract {@link Sink.WriteOperation} that manages the process of writing to a
-   * {@link FileBasedSink}.
-   *
-   * <p>The primary responsibilities of the FileBasedWriteOperation is the management of output
-   * files. During a write, {@link FileBasedSink.FileBasedWriter}s write bundles to temporary file
-   * locations. After the bundles have been written,
-   * <ol>
-   * <li>{@link FileBasedSink.FileBasedWriteOperation#finalize} is given a list of the temporary
-   * files containing the output bundles.
-   * <li>During finalize, these temporary files are copied to final output locations and named
-   * according to a file naming template.
-   * <li>Finally, any temporary files that were created during the write are removed.
-   * </ol>
-   *
-   * <p>Subclass implementations of FileBasedWriteOperation must implement
-   * {@link FileBasedSink.FileBasedWriteOperation#createWriter} to return a concrete
-   * FileBasedSinkWriter.
-   *
-   * <h2>Temporary and Output File Naming:</h2> During the write, bundles are written to temporary
-   * files using the baseTemporaryFilename that can be provided via the constructor of
-   * FileBasedWriteOperation. These temporary files will be named
-   * {@code {baseTemporaryFilename}-temp-{bundleId}}, where bundleId is the unique id of the bundle.
-   * For example, if baseTemporaryFilename is "gs://my-bucket/my_temp_output", the output for a
-   * bundle with bundle id 15723 will be "gs://my-bucket/my_temp_output-temp-15723".
-   *
-   * <p>Final output files are written to baseOutputFilename with the format
-   * {@code {baseOutputFilename}-0000i-of-0000n.{extension}} where n is the total number of bundles
-   * written and extension is the file extension. Both baseOutputFilename and extension are required
-   * constructor arguments.
-   *
-   * <p>Subclass implementations can change the file naming template by supplying a value for
-   * {@link FileBasedSink#fileNamingTemplate}.
-   *
-   * <h2>Temporary Bundle File Handling:</h2>
-   * <p>{@link FileBasedSink.FileBasedWriteOperation#temporaryFileRetention} controls the behavior
-   * for managing temporary files. By default, temporary files will be removed. Subclasses can
-   * provide a different value to the constructor.
-   *
-   * <p>Note that in the case of permanent failure of a bundle's write, no clean up of temporary
-   * files will occur.
-   *
-   * <p>If there are no elements in the PCollection being written, no output will be generated.
-   *
-   * @param <T> the type of values written to the sink.
-   */
-  public abstract static class FileBasedWriteOperation<T> extends WriteOperation<T, FileResult> {
-    private static final Logger LOG = LoggerFactory.getLogger(FileBasedWriteOperation.class);
-
-    /**
-     * Options for handling of temporary output files.
-     */
-    public enum TemporaryFileRetention {
-      KEEP,
-      REMOVE;
-    }
-
-    /**
-     * The Sink that this WriteOperation will write to.
-     */
-    protected final FileBasedSink<T> sink;
-
-    /**
-     * Option to keep or remove temporary output files.
-     */
-    protected final TemporaryFileRetention temporaryFileRetention;
-
-    /**
-     * Base filename used for temporary output files. Default is the baseOutputFilename.
-     */
-    protected final String baseTemporaryFilename;
-
-    /**
-     * Name separator for temporary files. Temporary files will be named
-     * {@code {baseTemporaryFilename}-temp-{bundleId}}.
-     */
-    protected static final String TEMPORARY_FILENAME_SEPARATOR = "-temp-";
-
-    /**
-     * Build a temporary filename using the temporary filename separator with the given prefix and
-     * suffix.
-     */
-    protected static final String buildTemporaryFilename(String prefix, String suffix) {
-      return prefix + FileBasedWriteOperation.TEMPORARY_FILENAME_SEPARATOR + suffix;
-    }
-
-    /**
-     * Construct a FileBasedWriteOperation using the same base filename for both temporary and
-     * output files.
-     *
-     * @param sink the FileBasedSink that will be used to configure this write operation.
-     */
-    public FileBasedWriteOperation(FileBasedSink<T> sink) {
-      this(sink, sink.baseOutputFilename);
-    }
-
-    /**
-     * Construct a FileBasedWriteOperation.
-     *
-     * @param sink the FileBasedSink that will be used to configure this write operation.
-     * @param baseTemporaryFilename the base filename to be used for temporary output files.
-     */
-    public FileBasedWriteOperation(FileBasedSink<T> sink, String baseTemporaryFilename) {
-      this(sink, baseTemporaryFilename, TemporaryFileRetention.REMOVE);
-    }
-
-    /**
-     * Create a new FileBasedWriteOperation.
-     *
-     * @param sink the FileBasedSink that will be used to configure this write operation.
-     * @param baseTemporaryFilename the base filename to be used for temporary output files.
-     * @param temporaryFileRetention defines how temporary files are handled.
-     */
-    public FileBasedWriteOperation(FileBasedSink<T> sink, String baseTemporaryFilename,
-        TemporaryFileRetention temporaryFileRetention) {
-      this.sink = sink;
-      this.baseTemporaryFilename = baseTemporaryFilename;
-      this.temporaryFileRetention = temporaryFileRetention;
-    }
-
-    /**
-     * Clients must implement to return a subclass of {@link FileBasedSink.FileBasedWriter}. This
-     * method must satisfy the restrictions placed on implementations of
-     * {@link Sink.WriteOperation#createWriter}. Namely, it must not mutate the state of the object.
-     */
-    @Override
-    public abstract FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception;
-
-    /**
-     * Initialization of the sink. Default implementation is a no-op. May be overridden by subclass
-     * implementations to perform initialization of the sink at pipeline runtime. This method must
-     * be idempotent and is subject to the same implementation restrictions as
-     * {@link Sink.WriteOperation#initialize}.
-     */
-    @Override
-    public void initialize(PipelineOptions options) throws Exception {}
-
-    /**
-     * Finalizes writing by copying temporary output files to their final location and optionally
-     * removing temporary files.
-     *
-     * <p>Finalization may be overridden by subclass implementations to perform customized
-     * finalization (e.g., initiating some operation on output bundles, merging them, etc.).
-     * {@code writerResults} contains the filenames of written bundles.
-     *
-     * <p>If subclasses override this method, they must guarantee that its implementation is
-     * idempotent, as it may be executed multiple times in the case of failure or for redundancy. It
-     * is a best practice to attempt to try to make this method atomic.
-     *
-     * @param writerResults the results of writes (FileResult).
-     */
-    @Override
-    public void finalize(Iterable<FileResult> writerResults, PipelineOptions options)
-        throws Exception {
-      // Collect names of temporary files and rename them.
-      List<String> files = new ArrayList<>();
-      for (FileResult result : writerResults) {
-        LOG.debug("Temporary bundle output file {} will be copied.", result.getFilename());
-        files.add(result.getFilename());
-      }
-      copyToOutputFiles(files, options);
-
-      // Optionally remove temporary files.
-      if (temporaryFileRetention == TemporaryFileRetention.REMOVE) {
-        removeTemporaryFiles(options);
-      }
-    }
-
-    /**
-     * Copy temporary files to final output filenames using the file naming template.
-     *
-     * <p>Can be called from subclasses that override {@link FileBasedWriteOperation#finalize}.
-     *
-     * <p>Files will be named according to the file naming template. The order of the output files
-     * will be the same as the sorted order of the input filenames.  In other words, if the input
-     * filenames are ["C", "A", "B"], baseOutputFilename is "file", the extension is ".txt", and
-     * the fileNamingTemplate is "-SSS-of-NNN", the contents of A will be copied to
-     * file-000-of-003.txt, the contents of B will be copied to file-001-of-003.txt, etc.
-     *
-     * @param filenames the filenames of temporary files.
-     * @return a list containing the names of final output files.
-     */
-    protected final List<String> copyToOutputFiles(List<String> filenames, PipelineOptions options)
-        throws IOException {
-      int numFiles = filenames.size();
-      List<String> srcFilenames = new ArrayList<>();
-      List<String> destFilenames = generateDestinationFilenames(numFiles);
-
-      // Sort files for copying.
-      srcFilenames.addAll(filenames);
-      Collections.sort(srcFilenames);
-
-      if (numFiles > 0) {
-        LOG.debug("Copying {} files.", numFiles);
-        FileOperations fileOperations =
-            FileOperationsFactory.getFileOperations(destFilenames.get(0), options);
-        fileOperations.copy(srcFilenames, destFilenames);
-      } else {
-        LOG.info("No output files to write.");
-      }
-
-      return destFilenames;
-    }
-
-    /**
-     * Generate output bundle filenames.
-     */
-    protected final List<String> generateDestinationFilenames(int numFiles) {
-      List<String> destFilenames = new ArrayList<>();
-      String extension = getSink().extension;
-      String baseOutputFilename = getSink().baseOutputFilename;
-      String fileNamingTemplate = getSink().fileNamingTemplate;
-
-      String suffix = getFileExtension(extension);
-      for (int i = 0; i < numFiles; i++) {
-        destFilenames.add(IOChannelUtils.constructName(
-            baseOutputFilename, fileNamingTemplate, suffix, i, numFiles));
-      }
-      return destFilenames;
-    }
-
-    /**
-     * Returns the file extension to be used. If the user did not request a file
-     * extension then this method returns the empty string. Otherwise this method
-     * adds a {@code "."} to the beginning of the users extension if one is not present.
-     */
-    private String getFileExtension(String usersExtension) {
-      if (usersExtension == null || usersExtension.isEmpty()) {
-        return "";
-      }
-      if (usersExtension.startsWith(".")) {
-        return usersExtension;
-      }
-      return "." + usersExtension;
-    }
-
-    /**
-     * Removes temporary output files. Uses the temporary filename to find files to remove.
-     *
-     * <p>Can be called from subclasses that override {@link FileBasedWriteOperation#finalize}.
-     * <b>Note:</b>If finalize is overridden and does <b>not</b> rename or otherwise finalize
-     * temporary files, this method will remove them.
-     */
-    protected final void removeTemporaryFiles(PipelineOptions options) throws IOException {
-      String pattern = buildTemporaryFilename(baseTemporaryFilename, "*");
-      LOG.debug("Finding temporary bundle output files matching {}.", pattern);
-      FileOperations fileOperations = FileOperationsFactory.getFileOperations(pattern, options);
-      IOChannelFactory factory = IOChannelUtils.getFactory(pattern);
-      Collection<String> matches = factory.match(pattern);
-      LOG.debug("{} temporary files matched {}", matches.size(), pattern);
-      LOG.debug("Removing {} files.", matches.size());
-      fileOperations.remove(matches);
-    }
-
-    /**
-     * Provides a coder for {@link FileBasedSink.FileResult}.
-     */
-    @Override
-    public Coder<FileResult> getWriterResultCoder() {
-      return SerializableCoder.of(FileResult.class);
-    }
-
-    /**
-     * Returns the FileBasedSink for this write operation.
-     */
-    @Override
-    public FileBasedSink<T> getSink() {
-      return sink;
-    }
-  }
-
-  /**
-   * Abstract {@link Sink.Writer} that writes a bundle to a {@link FileBasedSink}. Subclass
-   * implementations provide a method that can write a single value to a {@link WritableByteChannel}
-   * ({@link Sink.Writer#write}).
-   *
-   * <p>Subclass implementations may also override methods that write headers and footers before and
-   * after the values in a bundle, respectively, as well as provide a MIME type for the output
-   * channel.
-   *
-   * <p>Multiple FileBasedWriter instances may be created on the same worker, and therefore any
-   * access to static members or methods should be thread safe.
-   *
-   * @param <T> the type of values to write.
-   */
-  public abstract static class FileBasedWriter<T> extends Writer<T, FileResult> {
-    private static final Logger LOG = LoggerFactory.getLogger(FileBasedWriter.class);
-
-    final FileBasedWriteOperation<T> writeOperation;
-
-    /**
-     * Unique id for this output bundle.
-     */
-    private String id;
-
-    /**
-     * The filename of the output bundle. Equal to the
-     * {@link FileBasedSink.FileBasedWriteOperation#TEMPORARY_FILENAME_SEPARATOR} and id appended to
-     * the baseName.
-     */
-    private String filename;
-
-    /**
-     * The channel to write to.
-     */
-    private WritableByteChannel channel;
-
-    /**
-     * The MIME type used in the creation of the output channel (if the file system supports it).
-     *
-     * <p>GCS, for example, supports writing files with Content-Type metadata.
-     *
-     * <p>May be overridden. Default is {@link MimeTypes#TEXT}. See {@link MimeTypes} for other
-     * options.
-     */
-    protected String mimeType = MimeTypes.TEXT;
-
-    /**
-     * Construct a new FileBasedWriter with a base filename.
-     */
-    public FileBasedWriter(FileBasedWriteOperation<T> writeOperation) {
-      Preconditions.checkNotNull(writeOperation);
-      this.writeOperation = writeOperation;
-    }
-
-    /**
-     * Called with the channel that a subclass will write its header, footer, and values to.
-     * Subclasses should either keep a reference to the channel provided or create and keep a
-     * reference to an appropriate object that they will use to write to it.
-     *
-     * <p>Called before any subsequent calls to writeHeader, writeFooter, and write.
-     */
-    protected abstract void prepareWrite(WritableByteChannel channel) throws Exception;
-
-    /**
-     * Writes header at the beginning of output files. Nothing by default; subclasses may override.
-     */
-    protected void writeHeader() throws Exception {}
-
-    /**
-     * Writes footer at the end of output files. Nothing by default; subclasses may override.
-     */
-    protected void writeFooter() throws Exception {}
-
-    /**
-     * Opens the channel.
-     */
-    @Override
-    public final void open(String uId) throws Exception {
-      this.id = uId;
-      filename = FileBasedWriteOperation.buildTemporaryFilename(
-          getWriteOperation().baseTemporaryFilename, uId);
-      LOG.debug("Opening {}.", filename);
-      channel = IOChannelUtils.create(filename, mimeType);
-      try {
-        prepareWrite(channel);
-        LOG.debug("Writing header to {}.", filename);
-        writeHeader();
-      } catch (Exception e) {
-        // The caller shouldn't have to close() this Writer if it fails to open(), so close the
-        // channel if prepareWrite() or writeHeader() fails.
-        try {
-          LOG.error("Writing header to {} failed, closing channel.", filename);
-          channel.close();
-        } catch (IOException closeException) {
-          // Log exception and mask it.
-          LOG.error("Closing channel for {} failed: {}", filename, closeException.getMessage());
-        }
-        // Throw the exception that caused the write to fail.
-        throw e;
-      }
-      LOG.debug("Starting write of bundle {} to {}.", this.id, filename);
-    }
-
-    /**
-     * Closes the channel and return the bundle result.
-     */
-    @Override
-    public final FileResult close() throws Exception {
-      try (WritableByteChannel theChannel = channel) {
-        LOG.debug("Writing footer to {}.", filename);
-        writeFooter();
-      }
-      FileResult result = new FileResult(filename);
-      LOG.debug("Result for bundle {}: {}", this.id, filename);
-      return result;
-    }
-
-    /**
-     * Return the FileBasedWriteOperation that this Writer belongs to.
-     */
-    @Override
-    public FileBasedWriteOperation<T> getWriteOperation() {
-      return writeOperation;
-    }
-  }
-
-  /**
-   * Result of a single bundle write. Contains the filename of the bundle.
-   */
-  public static final class FileResult implements Serializable {
-    private final String filename;
-
-    public FileResult(String filename) {
-      this.filename = filename;
-    }
-
-    public String getFilename() {
-      return filename;
-    }
-  }
-
-  // File system operations
-  // Warning: These class are purposefully private and will be replaced by more robust file I/O
-  // utilities. Not for use outside FileBasedSink.
-
-  /**
-   * Factory for FileOperations.
-   */
-  private static class FileOperationsFactory {
-    /**
-     * Return a FileOperations implementation based on which IOChannel would be used to write to a
-     * location specification (not necessarily a filename, as it may contain wildcards).
-     *
-     * <p>Only supports File and GCS locations (currently, the only factories registered with
-     * IOChannelUtils). For other locations, an exception is thrown.
-     */
-    public static FileOperations getFileOperations(String spec, PipelineOptions options)
-        throws IOException {
-      IOChannelFactory factory = IOChannelUtils.getFactory(spec);
-      if (factory instanceof GcsIOChannelFactory) {
-        return new GcsOperations(options);
-      } else if (factory instanceof FileIOChannelFactory) {
-        return new LocalFileOperations();
-      } else {
-        throw new IOException("Unrecognized file system.");
-      }
-    }
-  }
-
-  /**
-   * Copy and Remove operations for files. Operations behave like remove-if-existing and
-   * copy-if-existing and do not throw exceptions on file not found to enable retries of these
-   * operations in the case of transient error.
-   */
-  private static interface FileOperations {
-    /**
-     * Copy a collection of files from one location to another.
-     *
-     * <p>The number of source filenames must equal the number of destination filenames.
-     *
-     * @param srcFilenames the source filenames.
-     * @param destFilenames the destination filenames.
-     */
-    public void copy(List<String> srcFilenames, List<String> destFilenames) throws IOException;
-
-    /**
-     * Remove a collection of files.
-     */
-    public void remove(Collection<String> filenames) throws IOException;
-  }
-
-  /**
-   * GCS file system operations.
-   */
-  private static class GcsOperations implements FileOperations {
-    private static final Logger LOG = LoggerFactory.getLogger(GcsOperations.class);
-
-    /**
-     * Maximum number of requests permitted in a GCS batch request.
-     */
-    private static final int MAX_REQUESTS_PER_BATCH = 1000;
-
-    private ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-    private GcsOptions gcsOptions;
-    private Storage gcs;
-    private BatchHelper batchHelper;
-
-    public GcsOperations(PipelineOptions options) {
-      gcsOptions = options.as(GcsOptions.class);
-      gcs = Transport.newStorageClient(gcsOptions).build();
-      batchHelper =
-          new BatchHelper(gcs.getRequestFactory().getInitializer(), gcs, MAX_REQUESTS_PER_BATCH);
-    }
-
-    @Override
-    public void copy(List<String> srcFilenames, List<String> destFilenames) throws IOException {
-      Preconditions.checkArgument(
-          srcFilenames.size() == destFilenames.size(),
-          String.format("Number of source files {} must equal number of destination files {}",
-              srcFilenames.size(), destFilenames.size()));
-      for (int i = 0; i < srcFilenames.size(); i++) {
-        final GcsPath sourcePath = GcsPath.fromUri(srcFilenames.get(i));
-        final GcsPath destPath = GcsPath.fromUri(destFilenames.get(i));
-        LOG.debug("Copying {} to {}", sourcePath, destPath);
-        Storage.Objects.Copy copyObject = gcs.objects().copy(sourcePath.getBucket(),
-            sourcePath.getObject(), destPath.getBucket(), destPath.getObject(), null);
-        batchHelper.queue(copyObject, new JsonBatchCallback<StorageObject>() {
-          @Override
-          public void onSuccess(StorageObject obj, HttpHeaders responseHeaders) {
-            LOG.debug("Successfully copied {} to {}", sourcePath, destPath);
-          }
-
-          @Override
-          public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) throws IOException {
-            // Do nothing on item not found.
-            if (!errorExtractor.itemNotFound(e)) {
-              throw new IOException(e.toString());
-            }
-            LOG.debug("{} does not exist.", sourcePath);
-          }
-        });
-      }
-      batchHelper.flush();
-    }
-
-    @Override
-    public void remove(Collection<String> filenames) throws IOException {
-      for (String filename : filenames) {
-        final GcsPath path = GcsPath.fromUri(filename);
-        LOG.debug("Removing: " + path);
-        Storage.Objects.Delete deleteObject =
-            gcs.objects().delete(path.getBucket(), path.getObject());
-        batchHelper.queue(deleteObject, new JsonBatchCallback<Void>() {
-          @Override
-          public void onSuccess(Void obj, HttpHeaders responseHeaders) throws IOException {
-            LOG.debug("Successfully removed {}", path);
-          }
-
-          @Override
-          public void onFailure(GoogleJsonError e, HttpHeaders responseHeaders) throws IOException {
-            // Do nothing on item not found.
-            if (!errorExtractor.itemNotFound(e)) {
-              throw new IOException(e.toString());
-            }
-            LOG.debug("{} does not exist.", path);
-          }
-        });
-      }
-      batchHelper.flush();
-    }
-  }
-
-  /**
-   * File systems supported by {@link Files}.
-   */
-  private static class LocalFileOperations implements FileOperations {
-    private static final Logger LOG = LoggerFactory.getLogger(LocalFileOperations.class);
-
-    @Override
-    public void copy(List<String> srcFilenames, List<String> destFilenames) throws IOException {
-      Preconditions.checkArgument(
-          srcFilenames.size() == destFilenames.size(),
-          String.format("Number of source files {} must equal number of destination files {}",
-              srcFilenames.size(), destFilenames.size()));
-      int numFiles = srcFilenames.size();
-      for (int i = 0; i < numFiles; i++) {
-        String src = srcFilenames.get(i);
-        String dst = destFilenames.get(i);
-        LOG.debug("Copying {} to {}", src, dst);
-        copyOne(src, dst);
-      }
-    }
-
-    private void copyOne(String source, String destination) throws IOException {
-      try {
-        // Copy the source file, replacing the existing destination.
-        Files.copy(Paths.get(source), Paths.get(destination), StandardCopyOption.REPLACE_EXISTING);
-      } catch (NoSuchFileException e) {
-        LOG.debug("{} does not exist.", source);
-        // Suppress exception if file does not exist.
-      }
-    }
-
-    @Override
-    public void remove(Collection<String> filenames) throws IOException {
-      for (String filename : filenames) {
-        LOG.debug("Removing file {}", filename);
-        removeOne(filename);
-      }
-    }
-
-    private void removeOne(String filename) throws IOException {
-      // Delete the file if it exists.
-      boolean exists = Files.deleteIfExists(Paths.get(filename));
-      if (!exists) {
-        LOG.debug("{} does not exist.", filename);
-      }
-    }
-  }
-
-  /**
-   * BatchHelper abstracts out the logic for the maximum requests per batch for GCS.
-   *
-   * <p>Copy of
-   * https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/src/main/java/com/google/cloud/hadoop/gcsio/BatchHelper.java
-   *
-   * <p>Copied to prevent Dataflow from depending on the Hadoop-related dependencies that are not
-   * used in Dataflow.  Hadoop-related dependencies will be removed from the Google Cloud Storage
-   * Connector (https://cloud.google.com/hadoop/google-cloud-storage-connector) so that this project
-   * and others may use the connector without introducing unnecessary dependencies.
-   *
-   * <p>This class is not thread-safe; create a new BatchHelper instance per single-threaded logical
-   * grouping of requests.
-   */
-  @NotThreadSafe
-  private static class BatchHelper {
-    /**
-     * Callback that causes a single StorageRequest to be added to the BatchRequest.
-     */
-    protected static interface QueueRequestCallback {
-      void enqueue() throws IOException;
-    }
-
-    private final List<QueueRequestCallback> pendingBatchEntries;
-    private final BatchRequest batch;
-
-    // Number of requests that can be queued into a single actual HTTP request
-    // before a sub-batch is sent.
-    private final long maxRequestsPerBatch;
-
-    // Flag that indicates whether there is an in-progress flush.
-    private boolean flushing = false;
-
-    /**
-     * Primary constructor, generally accessed only via the inner Factory class.
-     */
-    public BatchHelper(
-        HttpRequestInitializer requestInitializer, Storage gcs, long maxRequestsPerBatch) {
-      this.pendingBatchEntries = new LinkedList<>();
-      this.batch = gcs.batch(requestInitializer);
-      this.maxRequestsPerBatch = maxRequestsPerBatch;
-    }
-
-    /**
-     * Adds an additional request to the batch, and possibly flushes the current contents of the
-     * batch if {@code maxRequestsPerBatch} has been reached.
-     */
-    public <T> void queue(final StorageRequest<T> req, final JsonBatchCallback<T> callback)
-        throws IOException {
-      QueueRequestCallback queueCallback = new QueueRequestCallback() {
-        @Override
-        public void enqueue() throws IOException {
-          req.queue(batch, callback);
-        }
-      };
-      pendingBatchEntries.add(queueCallback);
-
-      flushIfPossibleAndRequired();
-    }
-
-    // Flush our buffer if we have more pending entries than maxRequestsPerBatch
-    private void flushIfPossibleAndRequired() throws IOException {
-      if (pendingBatchEntries.size() > maxRequestsPerBatch) {
-        flushIfPossible();
-      }
-    }
-
-    // Flush our buffer if we are not already in a flush operation and we have data to flush.
-    private void flushIfPossible() throws IOException {
-      if (!flushing && pendingBatchEntries.size() > 0) {
-        flushing = true;
-        try {
-          while (batch.size() < maxRequestsPerBatch && pendingBatchEntries.size() > 0) {
-            QueueRequestCallback head = pendingBatchEntries.remove(0);
-            head.enqueue();
-          }
-
-          batch.execute();
-        } finally {
-          flushing = false;
-        }
-      }
-    }
-
-
-    /**
-     * Sends any currently remaining requests in the batch; should be called at the end of any
-     * series of batched requests to ensure everything has been sent.
-     */
-    public void flush() throws IOException {
-      flushIfPossible();
-    }
-  }
-
-  static class ReshardForWrite<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    @Override
-    public PCollection<T> apply(PCollection<T> input) {
-      return input
-          // TODO: This would need to be adapted to write per-window shards.
-          .apply(Window.<T>into(new GlobalWindows())
-                       .triggering(DefaultTrigger.of())
-                       .discardingFiredPanes())
-          .apply("RandomKey", ParDo.of(
-              new DoFn<T, KV<Long, T>>() {
-                transient long counter, step;
-                @Override
-                public void startBundle(Context c) {
-                  counter = (long) (Math.random() * Long.MAX_VALUE);
-                  step = 1 + 2 * (long) (Math.random() * Long.MAX_VALUE);
-                }
-                @Override
-                public void processElement(ProcessContext c) {
-                  counter += step;
-                  c.output(KV.of(counter, c.element()));
-                }
-              }))
-          .apply(GroupByKey.<Long, T>create())
-          .apply("Ungroup", ParDo.of(
-              new DoFn<KV<Long, Iterable<T>>, T>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  for (T item : c.element().getValue()) {
-                    c.output(item);
-                  }
-                }
-              }));
-    }
-  }
-}

[59/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
new file mode 100644
index 0000000..4dfdd85
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
@@ -0,0 +1,485 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.Sleeper;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.Bigquery.Datasets;
+import com.google.api.services.bigquery.Bigquery.Tables;
+import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.DatasetReference;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.Subscription;
+import com.google.api.services.pubsub.model.Topic;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
+import com.google.common.base.Throwables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * The utility class that sets up and tears down external resources, starts the Google Cloud Pub/Sub
+ * injector, and cancels the streaming and the injector pipelines once the program terminates.
+ *
+ * <p>It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
+ */
+public class DataflowExampleUtils {
+
+  private final DataflowPipelineOptions options;
+  private Bigquery bigQueryClient = null;
+  private Pubsub pubsubClient = null;
+  private Dataflow dataflowClient = null;
+  private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
+  private List<String> pendingMessages = Lists.newArrayList();
+
+  public DataflowExampleUtils(DataflowPipelineOptions options) {
+    this.options = options;
+  }
+
+  /**
+   * Do resources and runner options setup.
+   */
+  public DataflowExampleUtils(DataflowPipelineOptions options, boolean isUnbounded)
+      throws IOException {
+    this.options = options;
+    setupResourcesAndRunner(isUnbounded);
+  }
+
+  /**
+   * Sets up external resources that are required by the example,
+   * such as Pub/Sub topics and BigQuery tables.
+   *
+   * @throws IOException if there is a problem setting up the resources
+   */
+  public void setup() throws IOException {
+    Sleeper sleeper = Sleeper.DEFAULT;
+    BackOff backOff = new AttemptBoundedExponentialBackOff(3, 200);
+    Throwable lastException = null;
+    try {
+      do {
+        try {
+          setupPubsub();
+          setupBigQueryTable();
+          return;
+        } catch (GoogleJsonResponseException e) {
+          lastException = e;
+        }
+      } while (BackOffUtils.next(sleeper, backOff));
+    } catch (InterruptedException e) {
+      // Ignore InterruptedException
+    }
+    Throwables.propagate(lastException);
+  }
+
+  /**
+   * Set up external resources, and configure the runner appropriately.
+   */
+  public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
+    if (isUnbounded) {
+      options.setStreaming(true);
+    }
+    setup();
+    setupRunner();
+  }
+
+  /**
+   * Sets up the Google Cloud Pub/Sub topic.
+   *
+   * <p>If the topic doesn't exist, a new topic with the given name will be created.
+   *
+   * @throws IOException if there is a problem setting up the Pub/Sub topic
+   */
+  public void setupPubsub() throws IOException {
+    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
+        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
+    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
+      pendingMessages.add("**********************Set Up Pubsub************************");
+      setupPubsubTopic(pubsubOptions.getPubsubTopic());
+      pendingMessages.add("The Pub/Sub topic has been set up for this example: "
+          + pubsubOptions.getPubsubTopic());
+
+      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
+        setupPubsubSubscription(
+            pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription());
+        pendingMessages.add("The Pub/Sub subscription has been set up for this example: "
+            + pubsubOptions.getPubsubSubscription());
+      }
+    }
+  }
+
+  /**
+   * Sets up the BigQuery table with the given schema.
+   *
+   * <p>If the table already exists, the schema has to match the given one. Otherwise, the example
+   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
+   * will be created.
+   *
+   * @throws IOException if there is a problem setting up the BigQuery table
+   */
+  public void setupBigQueryTable() throws IOException {
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("******************Set Up Big Query Table*******************");
+      setupBigQueryTable(bigQueryTableOptions.getProject(),
+                         bigQueryTableOptions.getBigQueryDataset(),
+                         bigQueryTableOptions.getBigQueryTable(),
+                         bigQueryTableOptions.getBigQuerySchema());
+      pendingMessages.add("The BigQuery table has been set up for this example: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+    }
+  }
+
+  /**
+   * Tears down external resources that can be deleted upon the example's completion.
+   */
+  private void tearDown() {
+    pendingMessages.add("*************************Tear Down*************************");
+    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
+        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
+    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
+      try {
+        deletePubsubTopic(pubsubOptions.getPubsubTopic());
+        pendingMessages.add("The Pub/Sub topic has been deleted: "
+            + pubsubOptions.getPubsubTopic());
+      } catch (IOException e) {
+        pendingMessages.add("Failed to delete the Pub/Sub topic : "
+            + pubsubOptions.getPubsubTopic());
+      }
+      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
+        try {
+          deletePubsubSubscription(pubsubOptions.getPubsubSubscription());
+          pendingMessages.add("The Pub/Sub subscription has been deleted: "
+              + pubsubOptions.getPubsubSubscription());
+        } catch (IOException e) {
+          pendingMessages.add("Failed to delete the Pub/Sub subscription : "
+              + pubsubOptions.getPubsubSubscription());
+        }
+      }
+    }
+
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("The BigQuery table might contain the example's output, "
+          + "and it is not deleted automatically: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+      pendingMessages.add("Please go to the Developers Console to delete it manually."
+          + " Otherwise, you may be charged for its usage.");
+    }
+  }
+
+  private void setupBigQueryTable(String projectId, String datasetId, String tableId,
+      TableSchema schema) throws IOException {
+    if (bigQueryClient == null) {
+      bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
+    }
+
+    Datasets datasetService = bigQueryClient.datasets();
+    if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
+      Dataset newDataset = new Dataset().setDatasetReference(
+          new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
+      datasetService.insert(projectId, newDataset).execute();
+    }
+
+    Tables tableService = bigQueryClient.tables();
+    Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
+    if (table == null) {
+      Table newTable = new Table().setSchema(schema).setTableReference(
+          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId));
+      tableService.insert(projectId, datasetId, newTable).execute();
+    } else if (!table.getSchema().equals(schema)) {
+      throw new RuntimeException(
+          "Table exists and schemas do not match, expecting: " + schema.toPrettyString()
+          + ", actual: " + table.getSchema().toPrettyString());
+    }
+  }
+
+  private void setupPubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) {
+      pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute();
+    }
+  }
+
+  private void setupPubsubSubscription(String topic, String subscription) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) {
+      Subscription subInfo = new Subscription()
+        .setAckDeadlineSeconds(60)
+        .setTopic(topic);
+      pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
+    }
+  }
+
+  /**
+   * Deletes the Google Cloud Pub/Sub topic.
+   *
+   * @throws IOException if there is a problem deleting the Pub/Sub topic
+   */
+  private void deletePubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) {
+      pubsubClient.projects().topics().delete(topic).execute();
+    }
+  }
+
+  /**
+   * Deletes the Google Cloud Pub/Sub subscription.
+   *
+   * @throws IOException if there is a problem deleting the Pub/Sub subscription
+   */
+  private void deletePubsubSubscription(String subscription) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) {
+      pubsubClient.projects().subscriptions().delete(subscription).execute();
+    }
+  }
+
+  /**
+   * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
+   * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
+   * creating the topic if necessary.
+   */
+  public void startInjectorIfNeeded(String inputFile) {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (pubsubTopicOptions.isStreaming()
+        && !Strings.isNullOrEmpty(inputFile)
+        && !Strings.isNullOrEmpty(pubsubTopicOptions.getPubsubTopic())) {
+      runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
+    }
+  }
+
+  /**
+   * Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
+   * streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
+   * flag value.
+   */
+  public void setupRunner() {
+    if (options.isStreaming() && options.getRunner() != DirectPipelineRunner.class) {
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+  }
+
+  /**
+   * Runs a batch pipeline to inject data into the PubSubIO input topic.
+   *
+   * <p>The injector pipeline will read from the given text file, and inject data
+   * into the Google Cloud Pub/Sub topic.
+   */
+  public void runInjectorPipeline(String inputFile, String topic) {
+    runInjectorPipeline(TextIO.Read.from(inputFile), topic, null);
+  }
+
+  /**
+   * Runs a batch pipeline to inject data into the PubSubIO input topic.
+   *
+   * <p>The injector pipeline will read from the given source, and inject data
+   * into the Google Cloud Pub/Sub topic.
+   */
+  public void runInjectorPipeline(PTransform<? super PBegin, PCollection<String>> readSource,
+                                  String topic,
+                                  String pubsubTimestampTabelKey) {
+    PubsubFileInjector.Bound injector;
+    if (Strings.isNullOrEmpty(pubsubTimestampTabelKey)) {
+      injector = PubsubFileInjector.publish(topic);
+    } else {
+      injector = PubsubFileInjector.withTimestampLabelKey(pubsubTimestampTabelKey).publish(topic);
+    }
+    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
+    if (options.getServiceAccountName() != null) {
+      copiedOptions.setServiceAccountName(options.getServiceAccountName());
+    }
+    if (options.getServiceAccountKeyfile() != null) {
+      copiedOptions.setServiceAccountKeyfile(options.getServiceAccountKeyfile());
+    }
+    copiedOptions.setStreaming(false);
+    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
+    copiedOptions.setJobName(options.getJobName() + "-injector");
+    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
+    injectorPipeline.apply(readSource)
+                    .apply(IntraBundleParallelization
+                        .of(injector)
+                        .withMaxParallelism(20));
+    PipelineResult result = injectorPipeline.run();
+    if (result instanceof DataflowPipelineJob) {
+      jobsToCancel.add(((DataflowPipelineJob) result));
+    }
+  }
+
+  /**
+   * Runs the provided pipeline to inject data into the PubSubIO input topic.
+   */
+  public void runInjectorPipeline(Pipeline injectorPipeline) {
+    PipelineResult result = injectorPipeline.run();
+    if (result instanceof DataflowPipelineJob) {
+      jobsToCancel.add(((DataflowPipelineJob) result));
+    }
+  }
+
+  /**
+   * Start the auxiliary injector pipeline, then wait for this pipeline to finish.
+   */
+  public void mockUnboundedSource(String inputFile, PipelineResult result) {
+    startInjectorIfNeeded(inputFile);
+    waitToFinish(result);
+  }
+
+  /**
+   * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
+   * waits for the pipeline to finish and cancels it (and the injector) before the program exists.
+   */
+  public void waitToFinish(PipelineResult result) {
+    if (result instanceof DataflowPipelineJob) {
+      final DataflowPipelineJob job = (DataflowPipelineJob) result;
+      jobsToCancel.add(job);
+      if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
+        addShutdownHook(jobsToCancel);
+      }
+      try {
+        job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
+      } catch (Exception e) {
+        throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
+      }
+    } else {
+      // Do nothing if the given PipelineResult doesn't support waitToFinish(),
+      // such as EvaluationResults returned by DirectPipelineRunner.
+      tearDown();
+      printPendingMessages();
+    }
+  }
+
+  private void addShutdownHook(final Collection<DataflowPipelineJob> jobs) {
+    if (dataflowClient == null) {
+      dataflowClient = options.getDataflowClient();
+    }
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
+        tearDown();
+        printPendingMessages();
+        for (DataflowPipelineJob job : jobs) {
+          System.out.println("Canceling example pipeline: " + job.getJobId());
+          try {
+            job.cancel();
+          } catch (IOException e) {
+            System.out.println("Failed to cancel the job,"
+                + " please go to the Developers Console to cancel it manually");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+
+        for (DataflowPipelineJob job : jobs) {
+          boolean cancellationVerified = false;
+          for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) {
+            if (job.getState().isTerminal()) {
+              cancellationVerified = true;
+              System.out.println("Canceled example pipeline: " + job.getJobId());
+              break;
+            } else {
+              System.out.println(
+                  "The example pipeline is still running. Verifying the cancellation.");
+            }
+            try {
+              Thread.sleep(10000);
+            } catch (InterruptedException e) {
+              // Ignore
+            }
+          }
+          if (!cancellationVerified) {
+            System.out.println("Failed to verify the cancellation for job: " + job.getJobId());
+            System.out.println("Please go to the Developers Console to verify manually:");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+      }
+    });
+  }
+
+  private void printPendingMessages() {
+    System.out.println();
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+    for (String message : pendingMessages) {
+      System.out.println(message);
+    }
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+  }
+
+  private static <T> T executeNullIfNotFound(
+      AbstractGoogleClientRequest<T> request) throws IOException {
+    try {
+      return request.execute();
+    } catch (GoogleJsonResponseException e) {
+      if (e.getStatusCode() == HttpServletResponse.SC_NOT_FOUND) {
+        return null;
+      } else {
+        throw e;
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
new file mode 100644
index 0000000..7c213b5
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure BigQuery tables in Dataflow examples.
+ * The project defaults to the project being used to run the example.
+ */
+public interface ExampleBigQueryTableOptions extends DataflowPipelineOptions {
+  @Description("BigQuery dataset name")
+  @Default.String("dataflow_examples")
+  String getBigQueryDataset();
+  void setBigQueryDataset(String dataset);
+
+  @Description("BigQuery table name")
+  @Default.InstanceFactory(BigQueryTableFactory.class)
+  String getBigQueryTable();
+  void setBigQueryTable(String table);
+
+  @Description("BigQuery table schema")
+  TableSchema getBigQuerySchema();
+  void setBigQuerySchema(TableSchema schema);
+
+  /**
+   * Returns the job name as the default BigQuery table name.
+   */
+  static class BigQueryTableFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      return options.as(DataflowPipelineOptions.class).getJobName()
+          .replace('-', '_');
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
new file mode 100644
index 0000000..d7bd4b8
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure Pub/Sub topic/subscription in Dataflow examples.
+ */
+public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions {
+  @Description("Pub/Sub subscription")
+  @Default.InstanceFactory(PubsubSubscriptionFactory.class)
+  String getPubsubSubscription();
+  void setPubsubSubscription(String subscription);
+
+  /**
+   * Returns a default Pub/Sub subscription based on the project and the job names.
+   */
+  static class PubsubSubscriptionFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowPipelineOptions =
+          options.as(DataflowPipelineOptions.class);
+      return "projects/" + dataflowPipelineOptions.getProject()
+          + "/subscriptions/" + dataflowPipelineOptions.getJobName();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
new file mode 100644
index 0000000..4bedf31
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure Pub/Sub topic in Dataflow examples.
+ */
+public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
+  @Description("Pub/Sub topic")
+  @Default.InstanceFactory(PubsubTopicFactory.class)
+  String getPubsubTopic();
+  void setPubsubTopic(String topic);
+
+  /**
+   * Returns a default Pub/Sub topic based on the project and the job names.
+   */
+  static class PubsubTopicFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowPipelineOptions =
+          options.as(DataflowPipelineOptions.class);
+      return "projects/" + dataflowPipelineOptions.getProject()
+          + "/topics/" + dataflowPipelineOptions.getJobName();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
new file mode 100644
index 0000000..4a82ae6
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.PublishRequest;
+import com.google.api.services.pubsub.model.PubsubMessage;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.ImmutableMap;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * A batch Dataflow pipeline for injecting a set of GCS files into
+ * a PubSub topic line by line. Empty lines are skipped.
+ *
+ * <p>This is useful for testing streaming
+ * pipelines. Note that since batch pipelines might retry chunks, this
+ * does _not_ guarantee exactly-once injection of file data. Some lines may
+ * be published multiple times.
+ * </p>
+ */
+public class PubsubFileInjector {
+
+  /**
+   * An incomplete {@code PubsubFileInjector} transform with unbound output topic.
+   */
+  public static class Unbound {
+    private final String timestampLabelKey;
+
+    Unbound() {
+      this.timestampLabelKey = null;
+    }
+
+    Unbound(String timestampLabelKey) {
+      this.timestampLabelKey = timestampLabelKey;
+    }
+
+    Unbound withTimestampLabelKey(String timestampLabelKey) {
+      return new Unbound(timestampLabelKey);
+    }
+
+    public Bound publish(String outputTopic) {
+      return new Bound(outputTopic, timestampLabelKey);
+    }
+  }
+
+  /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
+  public static class Bound extends DoFn<String, Void> {
+    private final String outputTopic;
+    private final String timestampLabelKey;
+    public transient Pubsub pubsub;
+
+    public Bound(String outputTopic, String timestampLabelKey) {
+      this.outputTopic = outputTopic;
+      this.timestampLabelKey = timestampLabelKey;
+    }
+
+    @Override
+    public void startBundle(Context context) {
+      this.pubsub =
+          Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
+              .build();
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      if (c.element().isEmpty()) {
+        return;
+      }
+      PubsubMessage pubsubMessage = new PubsubMessage();
+      pubsubMessage.encodeData(c.element().getBytes());
+      if (timestampLabelKey != null) {
+        pubsubMessage.setAttributes(
+            ImmutableMap.of(timestampLabelKey, Long.toString(c.timestamp().getMillis())));
+      }
+      PublishRequest publishRequest = new PublishRequest();
+      publishRequest.setMessages(Arrays.asList(pubsubMessage));
+      this.pubsub.projects().topics().publish(outputTopic, publishRequest).execute();
+    }
+  }
+
+  /**
+   * Creates a {@code PubsubFileInjector} transform with the given timestamp label key.
+   */
+  public static Unbound withTimestampLabelKey(String timestampLabelKey) {
+    return new Unbound(timestampLabelKey);
+  }
+
+  /**
+   * Creates a {@code PubsubFileInjector} transform that publishes to the given output topic.
+   */
+  public static Bound publish(String outputTopic) {
+    return new Unbound().publish(outputTopic);
+  }
+
+  /**
+   * Command line parameter options.
+   */
+  private interface PubsubFileInjectorOptions extends PipelineOptions {
+    @Description("GCS location of files.")
+    @Validation.Required
+    String getInput();
+    void setInput(String value);
+
+    @Description("Topic to publish on.")
+    @Validation.Required
+    String getOutputTopic();
+    void setOutputTopic(String value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   */
+  public static void main(String[] args) {
+    PubsubFileInjectorOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(PubsubFileInjectorOptions.class);
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    pipeline
+        .apply(TextIO.Read.from(options.getInput()))
+        .apply(IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
+            .withMaxParallelism(20));
+
+    pipeline.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
new file mode 100644
index 0000000..f897338
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Key;
+import com.google.api.services.datastore.DatastoreV1.Value;
+import com.google.api.services.datastore.client.DatastoreHelper;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Partition;
+import com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Top;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.common.base.MoreObjects;
+import com.google.common.base.Preconditions;
+
+import org.joda.time.Duration;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * An example that computes the most popular hash tags
+ * for every prefix, which can be used for auto-completion.
+ *
+ * <p>Concepts: Using the same pipeline in both streaming and batch, combiners,
+ *              composite transforms.
+ *
+ * <p>To execute this pipeline using the Dataflow service in batch mode,
+ * specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=DataflowPipelineRunner
+ *   --inputFile=gs://path/to/input*.txt
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service in streaming mode,
+ * specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=DataflowPipelineRunner
+ *   --inputFile=gs://YOUR_INPUT_DIRECTORY/*.txt
+ *   --streaming
+ * }</pre>
+ *
+ * <p>This will update the datastore every 10 seconds based on the last
+ * 30 minutes of data received.
+ */
+public class AutoComplete {
+
+  /**
+   * A PTransform that takes as input a list of tokens and returns
+   * the most common tokens per prefix.
+   */
+  public static class ComputeTopCompletions
+      extends PTransform<PCollection<String>, PCollection<KV<String, List<CompletionCandidate>>>> {
+    private final int candidatesPerPrefix;
+    private final boolean recursive;
+
+    protected ComputeTopCompletions(int candidatesPerPrefix, boolean recursive) {
+      this.candidatesPerPrefix = candidatesPerPrefix;
+      this.recursive = recursive;
+    }
+
+    public static ComputeTopCompletions top(int candidatesPerPrefix, boolean recursive) {
+      return new ComputeTopCompletions(candidatesPerPrefix, recursive);
+    }
+
+    @Override
+    public PCollection<KV<String, List<CompletionCandidate>>> apply(PCollection<String> input) {
+      PCollection<CompletionCandidate> candidates = input
+        // First count how often each token appears.
+        .apply(new Count.PerElement<String>())
+
+        // Map the KV outputs of Count into our own CompletionCandiate class.
+        .apply(ParDo.named("CreateCompletionCandidates").of(
+            new DoFn<KV<String, Long>, CompletionCandidate>() {
+              @Override
+              public void processElement(ProcessContext c) {
+                c.output(new CompletionCandidate(c.element().getKey(), c.element().getValue()));
+              }
+            }));
+
+      // Compute the top via either a flat or recursive algorithm.
+      if (recursive) {
+        return candidates
+          .apply(new ComputeTopRecursive(candidatesPerPrefix, 1))
+          .apply(Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
+      } else {
+        return candidates
+          .apply(new ComputeTopFlat(candidatesPerPrefix, 1));
+      }
+    }
+  }
+
+  /**
+   * Lower latency, but more expensive.
+   */
+  private static class ComputeTopFlat
+      extends PTransform<PCollection<CompletionCandidate>,
+                         PCollection<KV<String, List<CompletionCandidate>>>> {
+    private final int candidatesPerPrefix;
+    private final int minPrefix;
+
+    public ComputeTopFlat(int candidatesPerPrefix, int minPrefix) {
+      this.candidatesPerPrefix = candidatesPerPrefix;
+      this.minPrefix = minPrefix;
+    }
+
+    @Override
+    public PCollection<KV<String, List<CompletionCandidate>>> apply(
+        PCollection<CompletionCandidate> input) {
+      return input
+        // For each completion candidate, map it to all prefixes.
+        .apply(ParDo.of(new AllPrefixes(minPrefix)))
+
+        // Find and return the top candiates for each prefix.
+        .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix)
+               .withHotKeyFanout(new HotKeyFanout()));
+    }
+
+    private static class HotKeyFanout implements SerializableFunction<String, Integer> {
+      @Override
+      public Integer apply(String input) {
+        return (int) Math.pow(4, 5 - input.length());
+      }
+    }
+  }
+
+  /**
+   * Cheaper but higher latency.
+   *
+   * <p>Returns two PCollections, the first is top prefixes of size greater
+   * than minPrefix, and the second is top prefixes of size exactly
+   * minPrefix.
+   */
+  private static class ComputeTopRecursive
+      extends PTransform<PCollection<CompletionCandidate>,
+                         PCollectionList<KV<String, List<CompletionCandidate>>>> {
+    private final int candidatesPerPrefix;
+    private final int minPrefix;
+
+    public ComputeTopRecursive(int candidatesPerPrefix, int minPrefix) {
+      this.candidatesPerPrefix = candidatesPerPrefix;
+      this.minPrefix = minPrefix;
+    }
+
+    private class KeySizePartitionFn implements PartitionFn<KV<String, List<CompletionCandidate>>> {
+      @Override
+      public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numPartitions) {
+        return elem.getKey().length() > minPrefix ? 0 : 1;
+      }
+    }
+
+    private static class FlattenTops
+        extends DoFn<KV<String, List<CompletionCandidate>>, CompletionCandidate> {
+      @Override
+      public void processElement(ProcessContext c) {
+        for (CompletionCandidate cc : c.element().getValue()) {
+          c.output(cc);
+        }
+      }
+    }
+
+    @Override
+    public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
+          PCollection<CompletionCandidate> input) {
+        if (minPrefix > 10) {
+          // Base case, partitioning to return the output in the expected format.
+          return input
+            .apply(new ComputeTopFlat(candidatesPerPrefix, minPrefix))
+            .apply(Partition.of(2, new KeySizePartitionFn()));
+        } else {
+          // If a candidate is in the top N for prefix a...b, it must also be in the top
+          // N for a...bX for every X, which is typlically a much smaller set to consider.
+          // First, compute the top candidate for prefixes of size at least minPrefix + 1.
+          PCollectionList<KV<String, List<CompletionCandidate>>> larger = input
+            .apply(new ComputeTopRecursive(candidatesPerPrefix, minPrefix + 1));
+          // Consider the top candidates for each prefix of length minPrefix + 1...
+          PCollection<KV<String, List<CompletionCandidate>>> small =
+            PCollectionList
+            .of(larger.get(1).apply(ParDo.of(new FlattenTops())))
+            // ...together with those (previously excluded) candidates of length
+            // exactly minPrefix...
+            .and(input.apply(Filter.byPredicate(
+                new SerializableFunction<CompletionCandidate, Boolean>() {
+                  @Override
+                  public Boolean apply(CompletionCandidate c) {
+                    return c.getValue().length() == minPrefix;
+                  }
+                })))
+            .apply("FlattenSmall", Flatten.<CompletionCandidate>pCollections())
+            // ...set the key to be the minPrefix-length prefix...
+            .apply(ParDo.of(new AllPrefixes(minPrefix, minPrefix)))
+            // ...and (re)apply the Top operator to all of them together.
+            .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix));
+
+          PCollection<KV<String, List<CompletionCandidate>>> flattenLarger = larger
+              .apply("FlattenLarge", Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
+
+          return PCollectionList.of(flattenLarger).and(small);
+        }
+    }
+  }
+
+  /**
+   * A DoFn that keys each candidate by all its prefixes.
+   */
+  private static class AllPrefixes
+      extends DoFn<CompletionCandidate, KV<String, CompletionCandidate>> {
+    private final int minPrefix;
+    private final int maxPrefix;
+    public AllPrefixes(int minPrefix) {
+      this(minPrefix, Integer.MAX_VALUE);
+    }
+    public AllPrefixes(int minPrefix, int maxPrefix) {
+      this.minPrefix = minPrefix;
+      this.maxPrefix = maxPrefix;
+    }
+    @Override
+      public void processElement(ProcessContext c) {
+      String word = c.element().value;
+      for (int i = minPrefix; i <= Math.min(word.length(), maxPrefix); i++) {
+        c.output(KV.of(word.substring(0, i), c.element()));
+      }
+    }
+  }
+
+  /**
+   * Class used to store tag-count pairs.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class CompletionCandidate implements Comparable<CompletionCandidate> {
+    private long count;
+    private String value;
+
+    public CompletionCandidate(String value, long count) {
+      this.value = value;
+      this.count = count;
+    }
+
+    public long getCount() {
+      return count;
+    }
+
+    public String getValue() {
+      return value;
+    }
+
+    // Empty constructor required for Avro decoding.
+    public CompletionCandidate() {}
+
+    @Override
+    public int compareTo(CompletionCandidate o) {
+      if (this.count < o.count) {
+        return -1;
+      } else if (this.count == o.count) {
+        return this.value.compareTo(o.value);
+      } else {
+        return 1;
+      }
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof CompletionCandidate) {
+        CompletionCandidate that = (CompletionCandidate) other;
+        return this.count == that.count && this.value.equals(that.value);
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return Long.valueOf(count).hashCode() ^ value.hashCode();
+    }
+
+    @Override
+    public String toString() {
+      return "CompletionCandidate[" + value + ", " + count + "]";
+    }
+  }
+
+  /**
+   * Takes as input a set of strings, and emits each #hashtag found therein.
+   */
+  static class ExtractHashtags extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      Matcher m = Pattern.compile("#\\S+").matcher(c.element());
+      while (m.find()) {
+        c.output(m.group().substring(1));
+      }
+    }
+  }
+
+  static class FormatForBigquery extends DoFn<KV<String, List<CompletionCandidate>>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      List<TableRow> completions = new ArrayList<>();
+      for (CompletionCandidate cc : c.element().getValue()) {
+        completions.add(new TableRow()
+          .set("count", cc.getCount())
+          .set("tag", cc.getValue()));
+      }
+      TableRow row = new TableRow()
+        .set("prefix", c.element().getKey())
+        .set("tags", completions);
+      c.output(row);
+    }
+
+    /**
+     * Defines the BigQuery schema used for the output.
+     */
+    static TableSchema getSchema() {
+      List<TableFieldSchema> tagFields = new ArrayList<>();
+      tagFields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
+      tagFields.add(new TableFieldSchema().setName("tag").setType("STRING"));
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("prefix").setType("STRING"));
+      fields.add(new TableFieldSchema()
+          .setName("tags").setType("RECORD").setMode("REPEATED").setFields(tagFields));
+      return new TableSchema().setFields(fields);
+    }
+  }
+
+  /**
+   * Takes as input a the top candidates per prefix, and emits an entity
+   * suitable for writing to Datastore.
+   */
+  static class FormatForDatastore extends DoFn<KV<String, List<CompletionCandidate>>, Entity> {
+    private String kind;
+
+    public FormatForDatastore(String kind) {
+      this.kind = kind;
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      Entity.Builder entityBuilder = Entity.newBuilder();
+      Key key = DatastoreHelper.makeKey(kind, c.element().getKey()).build();
+
+      entityBuilder.setKey(key);
+      List<Value> candidates = new ArrayList<>();
+      for (CompletionCandidate tag : c.element().getValue()) {
+        Entity.Builder tagEntity = Entity.newBuilder();
+        tagEntity.addProperty(
+            DatastoreHelper.makeProperty("tag", DatastoreHelper.makeValue(tag.value)));
+        tagEntity.addProperty(
+            DatastoreHelper.makeProperty("count", DatastoreHelper.makeValue(tag.count)));
+        candidates.add(DatastoreHelper.makeValue(tagEntity).setIndexed(false).build());
+      }
+      entityBuilder.addProperty(
+          DatastoreHelper.makeProperty("candidates", DatastoreHelper.makeValue(candidates)));
+      c.output(entityBuilder.build());
+    }
+  }
+
+  /**
+   * Options supported by this class.
+   *
+   * <p>Inherits standard Dataflow configuration options.
+   */
+  private static interface Options extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+    @Description("Input text file")
+    String getInputFile();
+    void setInputFile(String value);
+
+    @Description("Whether to use the recursive algorithm")
+    @Default.Boolean(true)
+    Boolean getRecursive();
+    void setRecursive(Boolean value);
+
+    @Description("Dataset entity kind")
+    @Default.String("autocomplete-demo")
+    String getKind();
+    void setKind(String value);
+
+    @Description("Whether output to BigQuery")
+    @Default.Boolean(true)
+    Boolean getOutputToBigQuery();
+    void setOutputToBigQuery(Boolean value);
+
+    @Description("Whether output to Datastore")
+    @Default.Boolean(false)
+    Boolean getOutputToDatastore();
+    void setOutputToDatastore(Boolean value);
+
+    @Description("Datastore output dataset ID, defaults to project ID")
+    String getOutputDataset();
+    void setOutputDataset(String value);
+  }
+
+  public static void main(String[] args) throws IOException {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    if (options.isStreaming()) {
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+
+    options.setBigQuerySchema(FormatForBigquery.getSchema());
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+
+    // We support running the same pipeline in either
+    // batch or windowed streaming mode.
+    PTransform<? super PBegin, PCollection<String>> readSource;
+    WindowFn<Object, ?> windowFn;
+    if (options.isStreaming()) {
+      Preconditions.checkArgument(
+          !options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
+      dataflowUtils.setupPubsub();
+
+      readSource = PubsubIO.Read.topic(options.getPubsubTopic());
+      windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
+    } else {
+      readSource = TextIO.Read.from(options.getInputFile());
+      windowFn = new GlobalWindows();
+    }
+
+    // Create the pipeline.
+    Pipeline p = Pipeline.create(options);
+    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
+      .apply(readSource)
+      .apply(ParDo.of(new ExtractHashtags()))
+      .apply(Window.<String>into(windowFn))
+      .apply(ComputeTopCompletions.top(10, options.getRecursive()));
+
+    if (options.getOutputToDatastore()) {
+      toWrite
+      .apply(ParDo.named("FormatForDatastore").of(new FormatForDatastore(options.getKind())))
+      .apply(DatastoreIO.writeTo(MoreObjects.firstNonNull(
+          options.getOutputDataset(), options.getProject())));
+    }
+    if (options.getOutputToBigQuery()) {
+      dataflowUtils.setupBigQueryTable();
+
+      TableReference tableRef = new TableReference();
+      tableRef.setProjectId(options.getProject());
+      tableRef.setDatasetId(options.getBigQueryDataset());
+      tableRef.setTableId(options.getBigQueryTable());
+
+      toWrite
+        .apply(ParDo.of(new FormatForBigquery()))
+        .apply(BigQueryIO.Write
+               .to(tableRef)
+               .withSchema(FormatForBigquery.getSchema())
+               .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+               .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+    }
+
+    // Run the pipeline.
+    PipelineResult result = p.run();
+
+    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    }
+
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/README.md b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
new file mode 100644
index 0000000..5fba154
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
@@ -0,0 +1,44 @@
+
+# "Complete" Examples
+
+This directory contains end-to-end example pipelines that perform complex data processing tasks. They include:
+
+<ul>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java">AutoComplete</a>
+  &mdash; An example that computes the most popular hash tags for every
+  prefix, which can be used for auto-completion. Demonstrates how to use the
+  same pipeline in both streaming and batch, combiners, and composite
+  transforms.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java">StreamingWordExtract</a>
+  &mdash; A streaming pipeline example that inputs lines of text from a Cloud
+  Pub/Sub topic, splits each line into individual words, capitalizes those
+  words, and writes the output to a BigQuery table.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java">TfIdf</a>
+  &mdash; An example that computes a basic TF-IDF search table for a directory or
+  Cloud Storage prefix. Demonstrates joining data, side inputs, and logging.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java">TopWikipediaSessions</a>
+  &mdash; An example that reads Wikipedia edit data from Cloud Storage and
+  computes the user with the longest string of edits separated by no more than
+  an hour within each month. Demonstrates using Cloud Dataflow
+  <code>Windowing</code> to perform time-based aggregations of data.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
+  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
+  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
+  runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
+  <code>AvroCoder</code> to encode a custom class, and custom
+  <code>Combine</code> transforms.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>
+  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
+  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
+  runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
+  Pub/Sub topic ingestion.
+  </li>
+  </ul>
+
+See the [documentation](https://cloud.google.com/dataflow/getting-started) and the [Examples
+README](../../../../../../../../../README.md) for
+information about how to run these examples.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
new file mode 100644
index 0000000..99c5249
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * A streaming Dataflow Example using BigQuery output.
+ *
+ * <p>This pipeline example reads lines of text from a PubSub topic, splits each line
+ * into individual words, capitalizes those words, and writes the output to
+ * a BigQuery table.
+ *
+ * <p>By default, the example will run a separate pipeline to inject the data from the default
+ * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
+ * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
+ * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
+ * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
+ * to this example.
+ *
+ * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
+ * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
+ *
+ * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
+ */
+public class StreamingWordExtract {
+
+  /** A DoFn that tokenizes lines of text into individual words. */
+  static class ExtractWords extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] words = c.element().split("[^a-zA-Z']+");
+      for (String word : words) {
+        if (!word.isEmpty()) {
+          c.output(word);
+        }
+      }
+    }
+  }
+
+  /** A DoFn that uppercases a word. */
+  static class Uppercase extends DoFn<String, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element().toUpperCase());
+    }
+  }
+
+  /**
+   * Converts strings into BigQuery rows.
+   */
+  static class StringToRowConverter extends DoFn<String, TableRow> {
+    /**
+     * In this example, put the whole string into single BigQuery field.
+     */
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(new TableRow().set("string_field", c.element()));
+    }
+
+    static TableSchema getSchema() {
+      return new TableSchema().setFields(new ArrayList<TableFieldSchema>() {
+            // Compose the list of TableFieldSchema from tableSchema.
+            {
+              add(new TableFieldSchema().setName("string_field").setType("STRING"));
+            }
+      });
+    }
+  }
+
+  /**
+   * Options supported by {@link StreamingWordExtract}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private interface StreamingWordExtractOptions
+      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+    @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInputFile();
+    void setInputFile(String value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   *
+   * @throws IOException if there is a problem setting up resources
+   */
+  public static void main(String[] args) throws IOException {
+    StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(StreamingWordExtractOptions.class);
+    options.setStreaming(true);
+    // In order to cancel the pipelines automatically,
+    // {@literal DataflowPipelineRunner} is forced to be used.
+    options.setRunner(DataflowPipelineRunner.class);
+
+    options.setBigQuerySchema(StringToRowConverter.getSchema());
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    String tableSpec = new StringBuilder()
+        .append(options.getProject()).append(":")
+        .append(options.getBigQueryDataset()).append(".")
+        .append(options.getBigQueryTable())
+        .toString();
+    pipeline
+        .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
+        .apply(ParDo.of(new ExtractWords()))
+        .apply(ParDo.of(new Uppercase()))
+        .apply(ParDo.of(new StringToRowConverter()))
+        .apply(BigQueryIO.Write.to(tableSpec)
+            .withSchema(StringToRowConverter.getSchema()));
+
+    PipelineResult result = pipeline.run();
+
+    if (!options.getInputFile().isEmpty()) {
+      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
+    }
+
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
new file mode 100644
index 0000000..65ac753
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.Coder;
+import com.google.cloud.dataflow.sdk.coders.KvCoder;
+import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Flatten;
+import com.google.cloud.dataflow.sdk.transforms.Keys;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.transforms.Values;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.WithKeys;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.PDone;
+import com.google.cloud.dataflow.sdk.values.PInput;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
+ *
+ * <p>Concepts: joining data; side inputs; logging
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }</pre>
+ * and a local output file or output prefix on GCS:
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * and an output prefix on GCS:
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
+ *
+ * <p>The default input is {@code gs://dataflow-samples/shakespeare/} and can be overridden with
+ * {@code --input}.
+ */
+public class TfIdf {
+  /**
+   * Options supported by {@link TfIdf}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Path to the directory or GCS prefix containing files to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Prefix of output URI to write to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  /**
+   * Lists documents contained beneath the {@code options.input} prefix/directory.
+   */
+  public static Set<URI> listInputDocuments(Options options)
+      throws URISyntaxException, IOException {
+    URI baseUri = new URI(options.getInput());
+
+    // List all documents in the directory or GCS prefix.
+    URI absoluteUri;
+    if (baseUri.getScheme() != null) {
+      absoluteUri = baseUri;
+    } else {
+      absoluteUri = new URI(
+          "file",
+          baseUri.getAuthority(),
+          baseUri.getPath(),
+          baseUri.getQuery(),
+          baseUri.getFragment());
+    }
+
+    Set<URI> uris = new HashSet<>();
+    if (absoluteUri.getScheme().equals("file")) {
+      File directory = new File(absoluteUri);
+      for (String entry : directory.list()) {
+        File path = new File(directory, entry);
+        uris.add(path.toURI());
+      }
+    } else if (absoluteUri.getScheme().equals("gs")) {
+      GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
+      URI gcsUriGlob = new URI(
+          absoluteUri.getScheme(),
+          absoluteUri.getAuthority(),
+          absoluteUri.getPath() + "*",
+          absoluteUri.getQuery(),
+          absoluteUri.getFragment());
+      for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
+        uris.add(entry.toUri());
+      }
+    }
+
+    return uris;
+  }
+
+  /**
+   * Reads the documents at the provided uris and returns all lines
+   * from the documents tagged with which document they are from.
+   */
+  public static class ReadDocuments
+      extends PTransform<PInput, PCollection<KV<URI, String>>> {
+    private Iterable<URI> uris;
+
+    public ReadDocuments(Iterable<URI> uris) {
+      this.uris = uris;
+    }
+
+    @Override
+    public Coder<?> getDefaultOutputCoder() {
+      return KvCoder.of(StringDelegateCoder.of(URI.class), StringUtf8Coder.of());
+    }
+
+    @Override
+    public PCollection<KV<URI, String>> apply(PInput input) {
+      Pipeline pipeline = input.getPipeline();
+
+      // Create one TextIO.Read transform for each document
+      // and add its output to a PCollectionList
+      PCollectionList<KV<URI, String>> urisToLines =
+          PCollectionList.empty(pipeline);
+
+      // TextIO.Read supports:
+      //  - file: URIs and paths locally
+      //  - gs: URIs on the service
+      for (final URI uri : uris) {
+        String uriString;
+        if (uri.getScheme().equals("file")) {
+          uriString = new File(uri).getPath();
+        } else {
+          uriString = uri.toString();
+        }
+
+        PCollection<KV<URI, String>> oneUriToLines = pipeline
+            .apply(TextIO.Read.from(uriString)
+                .named("TextIO.Read(" + uriString + ")"))
+            .apply("WithKeys(" + uriString + ")", WithKeys.<URI, String>of(uri));
+
+        urisToLines = urisToLines.and(oneUriToLines);
+      }
+
+      return urisToLines.apply(Flatten.<KV<URI, String>>pCollections());
+    }
+  }
+
+  /**
+   * A transform containing a basic TF-IDF pipeline. The input consists of KV objects
+   * where the key is the document's URI and the value is a piece
+   * of the document's content. The output is mapping from terms to
+   * scores for each document URI.
+   */
+  public static class ComputeTfIdf
+      extends PTransform<PCollection<KV<URI, String>>, PCollection<KV<String, KV<URI, Double>>>> {
+    public ComputeTfIdf() { }
+
+    @Override
+    public PCollection<KV<String, KV<URI, Double>>> apply(
+      PCollection<KV<URI, String>> uriToContent) {
+
+      // Compute the total number of documents, and
+      // prepare this singleton PCollectionView for
+      // use as a side input.
+      final PCollectionView<Long> totalDocuments =
+          uriToContent
+          .apply("GetURIs", Keys.<URI>create())
+          .apply("RemoveDuplicateDocs", RemoveDuplicates.<URI>create())
+          .apply(Count.<URI>globally())
+          .apply(View.<Long>asSingleton());
+
+      // Create a collection of pairs mapping a URI to each
+      // of the words in the document associated with that that URI.
+      PCollection<KV<URI, String>> uriToWords = uriToContent
+          .apply(ParDo.named("SplitWords").of(
+              new DoFn<KV<URI, String>, KV<URI, String>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  URI uri = c.element().getKey();
+                  String line = c.element().getValue();
+                  for (String word : line.split("\\W+")) {
+                    // Log INFO messages when the word “love” is found.
+                    if (word.toLowerCase().equals("love")) {
+                      LOG.info("Found {}", word.toLowerCase());
+                    }
+
+                    if (!word.isEmpty()) {
+                      c.output(KV.of(uri, word.toLowerCase()));
+                    }
+                  }
+                }
+              }));
+
+      // Compute a mapping from each word to the total
+      // number of documents in which it appears.
+      PCollection<KV<String, Long>> wordToDocCount = uriToWords
+          .apply("RemoveDuplicateWords", RemoveDuplicates.<KV<URI, String>>create())
+          .apply(Values.<String>create())
+          .apply("CountDocs", Count.<String>perElement());
+
+      // Compute a mapping from each URI to the total
+      // number of words in the document associated with that URI.
+      PCollection<KV<URI, Long>> uriToWordTotal = uriToWords
+          .apply("GetURIs2", Keys.<URI>create())
+          .apply("CountWords", Count.<URI>perElement());
+
+      // Count, for each (URI, word) pair, the number of
+      // occurrences of that word in the document associated
+      // with the URI.
+      PCollection<KV<KV<URI, String>, Long>> uriAndWordToCount = uriToWords
+          .apply("CountWordDocPairs", Count.<KV<URI, String>>perElement());
+
+      // Adjust the above collection to a mapping from
+      // (URI, word) pairs to counts into an isomorphic mapping
+      // from URI to (word, count) pairs, to prepare for a join
+      // by the URI key.
+      PCollection<KV<URI, KV<String, Long>>> uriToWordAndCount = uriAndWordToCount
+          .apply(ParDo.named("ShiftKeys").of(
+              new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  URI uri = c.element().getKey().getKey();
+                  String word = c.element().getKey().getValue();
+                  Long occurrences = c.element().getValue();
+                  c.output(KV.of(uri, KV.of(word, occurrences)));
+                }
+              }));
+
+      // Prepare to join the mapping of URI to (word, count) pairs with
+      // the mapping of URI to total word counts, by associating
+      // each of the input PCollection<KV<URI, ...>> with
+      // a tuple tag. Each input must have the same key type, URI
+      // in this case. The type parameter of the tuple tag matches
+      // the types of the values for each collection.
+      final TupleTag<Long> wordTotalsTag = new TupleTag<Long>();
+      final TupleTag<KV<String, Long>> wordCountsTag = new TupleTag<KV<String, Long>>();
+      KeyedPCollectionTuple<URI> coGbkInput = KeyedPCollectionTuple
+          .of(wordTotalsTag, uriToWordTotal)
+          .and(wordCountsTag, uriToWordAndCount);
+
+      // Perform a CoGroupByKey (a sort of pre-join) on the prepared
+      // inputs. This yields a mapping from URI to a CoGbkResult
+      // (CoGroupByKey Result). The CoGbkResult is a mapping
+      // from the above tuple tags to the values in each input
+      // associated with a particular URI. In this case, each
+      // KV<URI, CoGbkResult> group a URI with the total number of
+      // words in that document as well as all the (word, count)
+      // pairs for particular words.
+      PCollection<KV<URI, CoGbkResult>> uriToWordAndCountAndTotal = coGbkInput
+          .apply("CoGroupByUri", CoGroupByKey.<URI>create());
+
+      // Compute a mapping from each word to a (URI, term frequency)
+      // pair for each URI. A word's term frequency for a document
+      // is simply the number of times that word occurs in the document
+      // divided by the total number of words in the document.
+      PCollection<KV<String, KV<URI, Double>>> wordToUriAndTf = uriToWordAndCountAndTotal
+          .apply(ParDo.named("ComputeTermFrequencies").of(
+              new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  URI uri = c.element().getKey();
+                  Long wordTotal = c.element().getValue().getOnly(wordTotalsTag);
+
+                  for (KV<String, Long> wordAndCount
+                           : c.element().getValue().getAll(wordCountsTag)) {
+                    String word = wordAndCount.getKey();
+                    Long wordCount = wordAndCount.getValue();
+                    Double termFrequency = wordCount.doubleValue() / wordTotal.doubleValue();
+                    c.output(KV.of(word, KV.of(uri, termFrequency)));
+                  }
+                }
+              }));
+
+      // Compute a mapping from each word to its document frequency.
+      // A word's document frequency in a corpus is the number of
+      // documents in which the word appears divided by the total
+      // number of documents in the corpus. Note how the total number of
+      // documents is passed as a side input; the same value is
+      // presented to each invocation of the DoFn.
+      PCollection<KV<String, Double>> wordToDf = wordToDocCount
+          .apply(ParDo
+              .named("ComputeDocFrequencies")
+              .withSideInputs(totalDocuments)
+              .of(new DoFn<KV<String, Long>, KV<String, Double>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  String word = c.element().getKey();
+                  Long documentCount = c.element().getValue();
+                  Long documentTotal = c.sideInput(totalDocuments);
+                  Double documentFrequency = documentCount.doubleValue()
+                      / documentTotal.doubleValue();
+
+                  c.output(KV.of(word, documentFrequency));
+                }
+              }));
+
+      // Join the term frequency and document frequency
+      // collections, each keyed on the word.
+      final TupleTag<KV<URI, Double>> tfTag = new TupleTag<KV<URI, Double>>();
+      final TupleTag<Double> dfTag = new TupleTag<Double>();
+      PCollection<KV<String, CoGbkResult>> wordToUriAndTfAndDf = KeyedPCollectionTuple
+          .of(tfTag, wordToUriAndTf)
+          .and(dfTag, wordToDf)
+          .apply(CoGroupByKey.<String>create());
+
+      // Compute a mapping from each word to a (URI, TF-IDF) score
+      // for each URI. There are a variety of definitions of TF-IDF
+      // ("term frequency - inverse document frequency") score;
+      // here we use a basic version that is the term frequency
+      // divided by the log of the document frequency.
+      PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = wordToUriAndTfAndDf
+          .apply(ParDo.named("ComputeTfIdf").of(
+              new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  String word = c.element().getKey();
+                  Double df = c.element().getValue().getOnly(dfTag);
+
+                  for (KV<URI, Double> uriAndTf : c.element().getValue().getAll(tfTag)) {
+                    URI uri = uriAndTf.getKey();
+                    Double tf = uriAndTf.getValue();
+                    Double tfIdf = tf * Math.log(1 / df);
+                    c.output(KV.of(word, KV.of(uri, tfIdf)));
+                  }
+                }
+              }));
+
+      return wordToUriAndTfIdf;
+    }
+
+    // Instantiate Logger.
+    // It is suggested that the user specify the class name of the containing class
+    // (in this case ComputeTfIdf).
+    private static final Logger LOG = LoggerFactory.getLogger(ComputeTfIdf.class);
+  }
+
+  /**
+   * A {@link PTransform} to write, in CSV format, a mapping from term and URI
+   * to score.
+   */
+  public static class WriteTfIdf
+      extends PTransform<PCollection<KV<String, KV<URI, Double>>>, PDone> {
+    private String output;
+
+    public WriteTfIdf(String output) {
+      this.output = output;
+    }
+
+    @Override
+    public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
+      return wordToUriAndTfIdf
+          .apply(ParDo.named("Format").of(new DoFn<KV<String, KV<URI, Double>>, String>() {
+            @Override
+            public void processElement(ProcessContext c) {
+              c.output(String.format("%s,\t%s,\t%f",
+                  c.element().getKey(),
+                  c.element().getValue().getKey(),
+                  c.element().getValue().getValue()));
+            }
+          }))
+          .apply(TextIO.Write
+              .to(output)
+              .withSuffix(".csv"));
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline pipeline = Pipeline.create(options);
+    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
+
+    pipeline
+        .apply(new ReadDocuments(listInputDocuments(options)))
+        .apply(new ComputeTfIdf())
+        .apply(new WriteTfIdf(options.getOutput()));
+
+    pipeline.run();
+  }
+}

[62/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
new file mode 100644
index 0000000..55982df
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.injector;
+
+
+import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.googleapis.util.Utils;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.http.HttpStatusCodes;
+import com.google.api.client.http.HttpTransport;
+import com.google.api.client.json.JsonFactory;
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.PubsubScopes;
+import com.google.api.services.pubsub.model.Topic;
+
+import com.google.common.base.Preconditions;
+
+import java.io.IOException;
+
+class InjectorUtils {
+
+  private static final String APP_NAME = "injector";
+
+  /**
+   * Builds a new Pubsub client and returns it.
+   */
+  public static Pubsub getClient(final HttpTransport httpTransport,
+                                 final JsonFactory jsonFactory)
+           throws IOException {
+      Preconditions.checkNotNull(httpTransport);
+      Preconditions.checkNotNull(jsonFactory);
+      GoogleCredential credential =
+          GoogleCredential.getApplicationDefault(httpTransport, jsonFactory);
+      if (credential.createScopedRequired()) {
+          credential = credential.createScoped(PubsubScopes.all());
+      }
+      if (credential.getClientAuthentication() != null) {
+        System.out.println("\n***Warning! You are not using service account credentials to "
+          + "authenticate.\nYou need to use service account credentials for this example,"
+          + "\nsince user-level credentials do not have enough pubsub quota,\nand so you will run "
+          + "out of PubSub quota very quickly.\nSee "
+          + "https://developers.google.com/identity/protocols/application-default-credentials.");
+        System.exit(1);
+      }
+      HttpRequestInitializer initializer =
+          new RetryHttpInitializerWrapper(credential);
+      return new Pubsub.Builder(httpTransport, jsonFactory, initializer)
+              .setApplicationName(APP_NAME)
+              .build();
+  }
+
+  /**
+   * Builds a new Pubsub client with default HttpTransport and
+   * JsonFactory and returns it.
+   */
+  public static Pubsub getClient() throws IOException {
+      return getClient(Utils.getDefaultTransport(),
+                       Utils.getDefaultJsonFactory());
+  }
+
+
+  /**
+   * Returns the fully qualified topic name for Pub/Sub.
+   */
+  public static String getFullyQualifiedTopicName(
+          final String project, final String topic) {
+      return String.format("projects/%s/topics/%s", project, topic);
+  }
+
+  /**
+   * Create a topic if it doesn't exist.
+   */
+  public static void createTopic(Pubsub client, String fullTopicName)
+      throws IOException {
+    try {
+        client.projects().topics().get(fullTopicName).execute();
+    } catch (GoogleJsonResponseException e) {
+      if (e.getStatusCode() == HttpStatusCodes.STATUS_CODE_NOT_FOUND) {
+        Topic topic = client.projects().topics()
+                .create(fullTopicName, new Topic())
+                .execute();
+        System.out.printf("Topic %s was created.\n", topic.getName());
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
new file mode 100644
index 0000000..1437534
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.injector;
+
+import com.google.api.client.auth.oauth2.Credential;
+import com.google.api.client.http.HttpBackOffIOExceptionHandler;
+import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpRequestInitializer;
+import com.google.api.client.http.HttpResponse;
+import com.google.api.client.http.HttpUnsuccessfulResponseHandler;
+import com.google.api.client.util.ExponentialBackOff;
+import com.google.api.client.util.Sleeper;
+import com.google.common.base.Preconditions;
+
+import java.io.IOException;
+import java.util.logging.Logger;
+
+/**
+ * RetryHttpInitializerWrapper will automatically retry upon RPC
+ * failures, preserving the auto-refresh behavior of the Google
+ * Credentials.
+ */
+public class RetryHttpInitializerWrapper implements HttpRequestInitializer {
+
+    /**
+     * A private logger.
+     */
+    private static final Logger LOG =
+            Logger.getLogger(RetryHttpInitializerWrapper.class.getName());
+
+    /**
+     * One minutes in miliseconds.
+     */
+    private static final int ONEMINITUES = 60000;
+
+    /**
+     * Intercepts the request for filling in the "Authorization"
+     * header field, as well as recovering from certain unsuccessful
+     * error codes wherein the Credential must refresh its token for a
+     * retry.
+     */
+    private final Credential wrappedCredential;
+
+    /**
+     * A sleeper; you can replace it with a mock in your test.
+     */
+    private final Sleeper sleeper;
+
+    /**
+     * A constructor.
+     *
+     * @param wrappedCredential Credential which will be wrapped and
+     * used for providing auth header.
+     */
+    public RetryHttpInitializerWrapper(final Credential wrappedCredential) {
+        this(wrappedCredential, Sleeper.DEFAULT);
+    }
+
+    /**
+     * A protected constructor only for testing.
+     *
+     * @param wrappedCredential Credential which will be wrapped and
+     * used for providing auth header.
+     * @param sleeper Sleeper for easy testing.
+     */
+    RetryHttpInitializerWrapper(
+            final Credential wrappedCredential, final Sleeper sleeper) {
+        this.wrappedCredential = Preconditions.checkNotNull(wrappedCredential);
+        this.sleeper = sleeper;
+    }
+
+    /**
+     * Initializes the given request.
+     */
+    @Override
+    public final void initialize(final HttpRequest request) {
+        request.setReadTimeout(2 * ONEMINITUES); // 2 minutes read timeout
+        final HttpUnsuccessfulResponseHandler backoffHandler =
+                new HttpBackOffUnsuccessfulResponseHandler(
+                        new ExponentialBackOff())
+                        .setSleeper(sleeper);
+        request.setInterceptor(wrappedCredential);
+        request.setUnsuccessfulResponseHandler(
+                new HttpUnsuccessfulResponseHandler() {
+                    @Override
+                    public boolean handleResponse(
+                            final HttpRequest request,
+                            final HttpResponse response,
+                            final boolean supportsRetry) throws IOException {
+                        if (wrappedCredential.handleResponse(
+                                request, response, supportsRetry)) {
+                            // If credential decides it can handle it,
+                            // the return code or message indicated
+                            // something specific to authentication,
+                            // and no backoff is desired.
+                            return true;
+                        } else if (backoffHandler.handleResponse(
+                                request, response, supportsRetry)) {
+                            // Otherwise, we defer to the judgement of
+                            // our internal backoff handler.
+                            LOG.info("Retrying "
+                                    + request.getUrl().toString());
+                            return true;
+                        } else {
+                            return false;
+                        }
+                    }
+                });
+        request.setIOExceptionHandler(
+                new HttpBackOffIOExceptionHandler(new ExponentialBackOff())
+                        .setSleeper(sleeper));
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
new file mode 100644
index 0000000..2cf719a
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
@@ -0,0 +1,134 @@
+  /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.utils;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.complete.game.UserScore;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
+import com.google.cloud.dataflow.sdk.options.GcpOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Generate, format, and write BigQuery table row information. Use provided information about
+ * the field names and types, as well as lambda functions that describe how to generate their
+ * values.
+ */
+public class WriteToBigQuery<T>
+    extends PTransform<PCollection<T>, PDone> {
+
+  protected String tableName;
+  protected Map<String, FieldInfo<T>> fieldInfo;
+
+  public WriteToBigQuery() {
+  }
+
+  public WriteToBigQuery(String tableName,
+      Map<String, FieldInfo<T>> fieldInfo) {
+    this.tableName = tableName;
+    this.fieldInfo = fieldInfo;
+  }
+
+  /** Define a class to hold information about output table field definitions. */
+  public static class FieldInfo<T> implements Serializable {
+    // The BigQuery 'type' of the field
+    private String fieldType;
+    // A lambda function to generate the field value
+    private SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fieldFn;
+
+    public FieldInfo(String fieldType,
+        SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fieldFn) {
+      this.fieldType = fieldType;
+      this.fieldFn = fieldFn;
+    }
+
+    String getFieldType() {
+      return this.fieldType;
+    }
+
+    SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> getFieldFn() {
+      return this.fieldFn;
+    }
+  }
+  /** Convert each key/score pair into a BigQuery TableRow as specified by fieldFn. */
+  protected class BuildRowFn extends DoFn<T, TableRow> {
+
+    @Override
+    public void processElement(ProcessContext c) {
+
+      TableRow row = new TableRow();
+      for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
+          String key = entry.getKey();
+          FieldInfo<T> fcnInfo = entry.getValue();
+          SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fcn =
+            fcnInfo.getFieldFn();
+          row.set(key, fcn.apply(c));
+        }
+      c.output(row);
+    }
+  }
+
+  /** Build the output table schema. */
+  protected TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
+      String key = entry.getKey();
+      FieldInfo<T> fcnInfo = entry.getValue();
+      String bqType = fcnInfo.getFieldType();
+      fields.add(new TableFieldSchema().setName(key).setType(bqType));
+    }
+    return new TableSchema().setFields(fields);
+  }
+
+  @Override
+  public PDone apply(PCollection<T> teamAndScore) {
+    return teamAndScore
+      .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
+      .apply(BigQueryIO.Write
+                .to(getTable(teamAndScore.getPipeline(),
+                    tableName))
+                .withSchema(getSchema())
+                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+                .withWriteDisposition(WriteDisposition.WRITE_APPEND));
+  }
+
+  /** Utility to construct an output table reference. */
+  static TableReference getTable(Pipeline pipeline, String tableName) {
+    PipelineOptions options = pipeline.getOptions();
+    TableReference table = new TableReference();
+    table.setDatasetId(options.as(UserScore.Options.class).getDataset());
+    table.setProjectId(options.as(GcpOptions.class).getProject());
+    table.setTableId(tableName);
+    return table;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
new file mode 100644
index 0000000..8433021
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
@@ -0,0 +1,76 @@
+  /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.utils;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PDone;
+
+import java.util.Map;
+
+/**
+ * Generate, format, and write BigQuery table row information. Subclasses {@link WriteToBigQuery}
+ * to require windowing; so this subclass may be used for writes that require access to the
+ * context's window information.
+ */
+public class WriteWindowedToBigQuery<T>
+    extends WriteToBigQuery<T> {
+
+  public WriteWindowedToBigQuery(String tableName,
+      Map<String, FieldInfo<T>> fieldInfo) {
+    super(tableName, fieldInfo);
+  }
+
+  /** Convert each key/score pair into a BigQuery TableRow. */
+  protected class BuildRowFn extends DoFn<T, TableRow>
+      implements RequiresWindowAccess {
+
+    @Override
+    public void processElement(ProcessContext c) {
+
+      TableRow row = new TableRow();
+      for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
+          String key = entry.getKey();
+          FieldInfo<T> fcnInfo = entry.getValue();
+          SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fcn =
+            fcnInfo.getFieldFn();
+          row.set(key, fcn.apply(c));
+        }
+      c.output(row);
+    }
+  }
+
+  @Override
+  public PDone apply(PCollection<T> teamAndScore) {
+    return teamAndScore
+      .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
+      .apply(BigQueryIO.Write
+                .to(getTable(teamAndScore.getPipeline(),
+                    tableName))
+                .withSchema(getSchema())
+                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+                .withWriteDisposition(WriteDisposition.WRITE_APPEND));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
new file mode 100644
index 0000000..fcae41c
--- /dev/null
+++ b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.GcsOptions;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.util.GcsUtil;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+import com.google.common.collect.ImmutableList;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test
+ * file should be maintained with a copy of its code for a basic smoke test.
+ */
+@RunWith(JUnit4.class)
+public class MinimalWordCountJava8Test implements Serializable {
+
+  /**
+   * A basic smoke test that ensures there is no crash at pipeline construction time.
+   */
+  @Test
+  public void testMinimalWordCountJava8() throws Exception {
+    Pipeline p = TestPipeline.create();
+    p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil());
+
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
+         .withOutputType(new TypeDescriptor<String>() {}))
+     .apply(Filter.byPredicate((String word) -> !word.isEmpty()))
+     .apply(Count.<String>perElement())
+     .apply(MapElements
+         .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())
+         .withOutputType(new TypeDescriptor<String>() {}))
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
+  }
+
+  private GcsUtil buildMockGcsUtil() throws IOException {
+    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
+
+    // Any request to open gets a new bogus channel
+    Mockito
+        .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
+        .then(new Answer<SeekableByteChannel>() {
+          @Override
+          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+            return FileChannel.open(
+                Files.createTempFile("channel-", ".tmp"),
+                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+          }
+        });
+
+    // Any request for expansion returns a list containing the original GcsPath
+    // This is required to pass validation that occurs in TextIO during apply()
+    Mockito
+        .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
+        .then(new Answer<List<GcsPath>>() {
+          @Override
+          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+          }
+        });
+
+    return mockGcsUtil;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
new file mode 100644
index 0000000..f77d146
--- /dev/null
+++ b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.complete.game.GameStats.CalculateSpammyUsers;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of GameStats.
+ * Because the pipeline was designed for easy readability and explanations, it lacks good
+ * modularity for testing. See our testing documentation for better ideas:
+ * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline.
+ */
+@RunWith(JUnit4.class)
+public class GameStatsTest implements Serializable {
+
+  // User scores
+  static final List<KV<String, Integer>> USER_SCORES = Arrays.asList(
+    KV.of("Robot-2", 66), KV.of("Robot-1", 116), KV.of("user7_AndroidGreenKookaburra", 23),
+    KV.of("user7_AndroidGreenKookaburra", 1),
+    KV.of("user19_BisqueBilby", 14), KV.of("user13_ApricotQuokka", 15),
+    KV.of("user18_BananaEmu", 25), KV.of("user6_AmberEchidna", 8),
+    KV.of("user2_AmberQuokka", 6), KV.of("user0_MagentaKangaroo", 4),
+    KV.of("user0_MagentaKangaroo", 3), KV.of("user2_AmberCockatoo", 13),
+    KV.of("user7_AlmondWallaby", 15), KV.of("user6_AmberNumbat", 11),
+    KV.of("user6_AmberQuokka", 4));
+
+  // The expected list of 'spammers'.
+  static final List<KV<String, Integer>> SPAMMERS = Arrays.asList(
+      KV.of("Robot-2", 66), KV.of("Robot-1", 116));
+
+  /** Test the calculation of 'spammy users'. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCalculateSpammyUsers() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<KV<String, Integer>> input = p.apply(Create.of(USER_SCORES));
+    PCollection<KV<String, Integer>> output = input.apply(new CalculateSpammyUsers());
+
+    // Check the set of spammers.
+    DataflowAssert.that(output).containsInAnyOrder(SPAMMERS);
+
+    p.run();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
new file mode 100644
index 0000000..f77a5d4
--- /dev/null
+++ b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of HourlyTeamScore.
+ * Because the pipeline was designed for easy readability and explanations, it lacks good
+ * modularity for testing. See our testing documentation for better ideas:
+ * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline.
+ */
+@RunWith(JUnit4.class)
+public class HourlyTeamScoreTest implements Serializable {
+
+  static final String[] GAME_EVENTS_ARRAY = new String[] {
+    "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444",
+    "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444",
+    "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444",
+    // time gap...
+    "user0_AndroidGreenEchidna,AndroidGreenEchidna,0,1447965690000,2015-11-19 12:41:31.053",
+    "user0_MagentaKangaroo,MagentaKangaroo,4,1447965690000,2015-11-19 12:41:31.053",
+    "user2_AmberCockatoo,AmberCockatoo,13,1447965690000,2015-11-19 12:41:31.053",
+    "user18_BananaEmu,BananaEmu,7,1447965690000,2015-11-19 12:41:31.053",
+    "user3_BananaEmu,BananaEmu,17,1447965690000,2015-11-19 12:41:31.053",
+    "user18_BananaEmu,BananaEmu,1,1447965690000,2015-11-19 12:41:31.053",
+    "user18_ApricotCaneToad,ApricotCaneToad,14,1447965690000,2015-11-19 12:41:31.053"
+  };
+
+
+  static final List<String> GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY);
+
+
+  // Used to check the filtering.
+  static final KV[] FILTERED_EVENTS = new KV[] {
+      KV.of("user0_AndroidGreenEchidna", 0), KV.of("user0_MagentaKangaroo", 4),
+      KV.of("user2_AmberCockatoo", 13),
+      KV.of("user18_BananaEmu", 7), KV.of("user3_BananaEmu", 17),
+      KV.of("user18_BananaEmu", 1), KV.of("user18_ApricotCaneToad", 14)
+    };
+
+
+  /** Test the filtering. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUserScoresFilter() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    final Instant startMinTimestamp = new Instant(1447965680000L);
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> output = input
+      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
+
+      .apply("FilterStartTime", Filter.byPredicate(
+          (GameActionInfo gInfo)
+              -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
+      // run a map to access the fields in the result.
+      .apply(MapElements
+          .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
+          .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
+
+      DataflowAssert.that(output).containsInAnyOrder(FILTERED_EVENTS);
+
+    p.run();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
new file mode 100644
index 0000000..641e2c3
--- /dev/null
+++ b/examples/java8/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ExtractAndSumScore;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo;
+import com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of UserScore.
+ */
+@RunWith(JUnit4.class)
+public class UserScoreTest implements Serializable {
+
+  static final String[] GAME_EVENTS_ARRAY = new String[] {
+    "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444",
+    "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444",
+    "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444",
+    "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444",
+    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444",
+    "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444",
+    "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444"
+  };
+
+    static final String[] GAME_EVENTS_ARRAY2 = new String[] {
+    "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444",
+    "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444",
+    "user13_BisqueBilby,BisqueBilby,xxx,1447955630000,2015-11-19 09:53:53.444"
+  };
+
+  static final List<String> GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY);
+  static final List<String> GAME_EVENTS2 = Arrays.asList(GAME_EVENTS_ARRAY2);
+
+  static final List<KV<String, Integer>> USER_SUMS = Arrays.asList(
+      KV.of("user0_MagentaKangaroo", 3), KV.of("user13_ApricotQuokka", 15),
+      KV.of("user6_AmberNumbat", 11), KV.of("user7_AlmondWallaby", 15),
+      KV.of("user7_AndroidGreenKookaburra", 23),
+      KV.of("user19_BisqueBilby", 14));
+
+  static final List<KV<String, Integer>> TEAM_SUMS = Arrays.asList(
+      KV.of("MagentaKangaroo", 3), KV.of("ApricotQuokka", 15),
+      KV.of("AmberNumbat", 11), KV.of("AlmondWallaby", 15),
+      KV.of("AndroidGreenKookaburra", 23),
+      KV.of("BisqueBilby", 14));
+
+  /** Test the ParseEventFn DoFn. */
+  @Test
+  public void testParseEventFn() {
+    DoFnTester<String, GameActionInfo> parseEventFn =
+        DoFnTester.of(new ParseEventFn());
+
+    List<GameActionInfo> results = parseEventFn.processBatch(GAME_EVENTS_ARRAY);
+    Assert.assertEquals(results.size(), 8);
+    Assert.assertEquals(results.get(0).getUser(), "user0_MagentaKangaroo");
+    Assert.assertEquals(results.get(0).getTeam(), "MagentaKangaroo");
+    Assert.assertEquals(results.get(0).getScore(), new Integer(3));
+  }
+
+  /** Tests ExtractAndSumScore("user"). */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUserScoreSums() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> output = input
+      .apply(ParDo.of(new ParseEventFn()))
+      // Extract and sum username/score pairs from the event data.
+      .apply("ExtractUserScore", new ExtractAndSumScore("user"));
+
+    // Check the user score sums.
+    DataflowAssert.that(output).containsInAnyOrder(USER_SUMS);
+
+    p.run();
+  }
+
+  /** Tests ExtractAndSumScore("team"). */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTeamScoreSums() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> output = input
+      .apply(ParDo.of(new ParseEventFn()))
+      // Extract and sum teamname/score pairs from the event data.
+      .apply("ExtractTeamScore", new ExtractAndSumScore("team"));
+
+    // Check the team score sums.
+    DataflowAssert.that(output).containsInAnyOrder(TEAM_SUMS);
+
+    p.run();
+  }
+
+  /** Test that bad input data is dropped appropriately. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testUserScoresBadInput() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Integer>> extract = input
+      .apply(ParDo.of(new ParseEventFn()))
+      .apply(
+          MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
+          .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
+
+    DataflowAssert.that(extract).empty();
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/pom.xml
----------------------------------------------------------------------
diff --git a/java8examples/pom.xml b/java8examples/pom.xml
deleted file mode 100644
index eb3ef01..0000000
--- a/java8examples/pom.xml
+++ /dev/null
@@ -1,279 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>parent</artifactId>
-    <version>0.1.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>java8examples-all</artifactId>
-  <name>Apache Beam :: Examples :: Java 8 All</name>
-  <description>Apache Beam Java SDK provides a simple, Java-based
-    interface for processing virtually any size data.
-    This artifact includes examples of the SDK from a Java 8
-    user.</description>
-
-  <packaging>jar</packaging>
-
-  <profiles>
-    <profile>
-      <id>DataflowPipelineTests</id>
-      <properties>
-        <runIntegrationTestOnService>true</runIntegrationTestOnService>
-        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
-        <testParallelValue>both</testParallelValue>
-      </properties>
-    </profile>
-  </profiles>
-
-  <build>
-    <plugins>
-      <plugin>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <configuration>
-          <source>1.8</source>
-          <target>1.8</target>
-          <testSource>1.8</testSource>
-          <testTarget>1.8</testTarget>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals><goal>analyze-only</goal></goals>
-            <configuration>
-              <failOnWarning>true</failOnWarning>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>2.12</version>
-        <dependencies>
-          <dependency>
-            <groupId>com.puppycrawl.tools</groupId>
-            <artifactId>checkstyle</artifactId>
-            <version>6.6</version>
-          </dependency>
-        </dependencies>
-        <configuration>
-          <configLocation>../checkstyle.xml</configLocation>
-          <consoleOutput>true</consoleOutput>
-          <failOnViolation>true</failOnViolation>
-          <includeTestSourceDirectory>true</includeTestSourceDirectory>
-          <includeResources>false</includeResources>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>check</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Source plugin for generating source and test-source JARs. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <version>2.4</version>
-        <executions>
-          <execution>
-            <id>attach-sources</id>
-            <phase>compile</phase>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>attach-test-sources</id>
-            <phase>test-compile</phase>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>default-jar</id>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>default-test-jar</id>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Coverage analysis for unit tests. -->
-      <plugin>
-        <groupId>org.jacoco</groupId>
-        <artifactId>jacoco-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>java-sdk-all</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>java-examples-all</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>${guava.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <version>${slf4j.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-      <version>${avro.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>${joda.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <version>${hamcrest.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <version>1.10.19</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>${junit.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-bigquery</artifactId>
-      <version>${bigquery.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.oauth-client</groupId>
-      <artifactId>google-oauth-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-pubsub</artifactId>
-      <version>${pubsub.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.api-client</groupId>
-      <artifactId>google-api-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
deleted file mode 100644
index c115ea0..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.Filter;
-import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import java.util.Arrays;
-
-/**
- * An example that counts words in Shakespeare, using Java 8 language features.
- *
- * <p>See {@link MinimalWordCount} for a comprehensive explanation.
- */
-public class MinimalWordCountJava8 {
-
-  public static void main(String[] args) {
-    DataflowPipelineOptions options = PipelineOptionsFactory.create()
-        .as(DataflowPipelineOptions.class);
-
-    options.setRunner(BlockingDataflowPipelineRunner.class);
-
-    // CHANGE 1 of 3: Your project ID is required in order to run your pipeline on the Google Cloud.
-    options.setProject("SET_YOUR_PROJECT_ID_HERE");
-
-    // CHANGE 2 of 3: Your Google Cloud Storage path is required for staging local files.
-    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
-
-    Pipeline p = Pipeline.create(options);
-
-    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
-     .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
-         .withOutputType(new TypeDescriptor<String>() {}))
-     .apply(Filter.byPredicate((String word) -> !word.isEmpty()))
-     .apply(Count.<String>perElement())
-     .apply(MapElements
-         .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())
-         .withOutputType(new TypeDescriptor<String>() {}))
-
-     // CHANGE 3 of 3: The Google Cloud Storage path is required for outputting the results to.
-     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java
deleted file mode 100644
index 7c67d10..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.Mean;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.Values;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.joda.time.DateTimeZone;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-
-/**
- * This class is the fourth in a series of four pipelines that tell a story in a 'gaming'
- * domain, following {@link UserScore}, {@link HourlyTeamScore}, and {@link LeaderBoard}.
- * New concepts: session windows and finding session duration; use of both
- * singleton and non-singleton side inputs.
- *
- * <p> This pipeline builds on the {@link LeaderBoard} functionality, and adds some "business
- * intelligence" analysis: abuse detection and usage patterns. The pipeline derives the Mean user
- * score sum for a window, and uses that information to identify likely spammers/robots. (The robots
- * have a higher click rate than the human users). The 'robot' users are then filtered out when
- * calculating the team scores.
- *
- * <p> Additionally, user sessions are tracked: that is, we find bursts of user activity using
- * session windows. Then, the mean session duration information is recorded in the context of
- * subsequent fixed windowing. (This could be used to tell us what games are giving us greater
- * user retention).
- *
- * <p> Run {@code com.google.cloud.dataflow.examples.complete.game.injector.Injector} to generate
- * pubsub data for this pipeline. The {@code Injector} documentation provides more detail.
- *
- * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
- * like this:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- *   --dataset=YOUR-DATASET
- *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
- * }
- * </pre>
- * where the BigQuery dataset you specify must already exist. The PubSub topic you specify should
- * be the same topic to which the Injector is publishing.
- */
-public class GameStats extends LeaderBoard {
-
-  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
-
-  private static DateTimeFormatter fmt =
-      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
-          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
-
-  /**
-   * Filter out all but those users with a high clickrate, which we will consider as 'spammy' uesrs.
-   * We do this by finding the mean total score per user, then using that information as a side
-   * input to filter out all but those user scores that are > (mean * SCORE_WEIGHT)
-   */
-  // [START DocInclude_AbuseDetect]
-  public static class CalculateSpammyUsers
-      extends PTransform<PCollection<KV<String, Integer>>, PCollection<KV<String, Integer>>> {
-    private static final Logger LOG = LoggerFactory.getLogger(CalculateSpammyUsers.class);
-    private static final double SCORE_WEIGHT = 2.5;
-
-    @Override
-    public PCollection<KV<String, Integer>> apply(PCollection<KV<String, Integer>> userScores) {
-
-      // Get the sum of scores for each user.
-      PCollection<KV<String, Integer>> sumScores = userScores
-          .apply("UserSum", Sum.<String>integersPerKey());
-
-      // Extract the score from each element, and use it to find the global mean.
-      final PCollectionView<Double> globalMeanScore = sumScores.apply(Values.<Integer>create())
-          .apply(Mean.<Integer>globally().asSingletonView());
-
-      // Filter the user sums using the global mean.
-      PCollection<KV<String, Integer>> filtered = sumScores
-          .apply(ParDo
-              .named("ProcessAndFilter")
-              // use the derived mean total score as a side input
-              .withSideInputs(globalMeanScore)
-              .of(new DoFn<KV<String, Integer>, KV<String, Integer>>() {
-                private final Aggregator<Long, Long> numSpammerUsers =
-                  createAggregator("SpammerUsers", new Sum.SumLongFn());
-                @Override
-                public void processElement(ProcessContext c) {
-                  Integer score = c.element().getValue();
-                  Double gmc = c.sideInput(globalMeanScore);
-                  if (score > (gmc * SCORE_WEIGHT)) {
-                    LOG.info("user " + c.element().getKey() + " spammer score " + score
-                        + " with mean " + gmc);
-                    numSpammerUsers.addValue(1L);
-                    c.output(c.element());
-                  }
-                }
-              }));
-      return filtered;
-    }
-  }
-  // [END DocInclude_AbuseDetect]
-
-  /**
-   * Calculate and output an element's session duration.
-   */
-  private static class UserSessionInfoFn extends DoFn<KV<String, Integer>, Integer>
-      implements RequiresWindowAccess {
-
-    @Override
-    public void processElement(ProcessContext c) {
-      IntervalWindow w = (IntervalWindow) c.window();
-      int duration = new Duration(
-          w.start(), w.end()).toPeriod().toStandardMinutes().getMinutes();
-      c.output(duration);
-    }
-  }
-
-
-  /**
-   * Options supported by {@link GameStats}.
-   */
-  static interface Options extends LeaderBoard.Options {
-    @Description("Numeric value of fixed window duration for user analysis, in minutes")
-    @Default.Integer(60)
-    Integer getFixedWindowDuration();
-    void setFixedWindowDuration(Integer value);
-
-    @Description("Numeric value of gap between user sessions, in minutes")
-    @Default.Integer(5)
-    Integer getSessionGap();
-    void setSessionGap(Integer value);
-
-    @Description("Numeric value of fixed window for finding mean of user session duration, "
-        + "in minutes")
-    @Default.Integer(30)
-    Integer getUserActivityWindowDuration();
-    void setUserActivityWindowDuration(Integer value);
-
-    @Description("Prefix used for the BigQuery table names")
-    @Default.String("game_stats")
-    String getTablePrefix();
-    void setTablePrefix(String value);
-  }
-
-
-  /**
-   * Create a map of information that describes how to write pipeline output to BigQuery. This map
-   * is used to write information about team score sums.
-   */
-  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
-      configureWindowedWrite() {
-    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
-        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
-    tableConfigure.put("team",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
-            c -> c.element().getKey()));
-    tableConfigure.put("total_score",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
-            c -> c.element().getValue()));
-    tableConfigure.put("window_start",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
-          c -> { IntervalWindow w = (IntervalWindow) c.window();
-                 return fmt.print(w.start()); }));
-    tableConfigure.put("processing_time",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
-            "STRING", c -> fmt.print(Instant.now())));
-    return tableConfigure;
-  }
-
-  /**
-   * Create a map of information that describes how to write pipeline output to BigQuery. This map
-   * is used to write information about mean user session time.
-   */
-  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<Double>>
-      configureSessionWindowWrite() {
-
-    Map<String, WriteWindowedToBigQuery.FieldInfo<Double>> tableConfigure =
-        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<Double>>();
-    tableConfigure.put("window_start",
-        new WriteWindowedToBigQuery.FieldInfo<Double>("STRING",
-          c -> { IntervalWindow w = (IntervalWindow) c.window();
-                 return fmt.print(w.start()); }));
-    tableConfigure.put("mean_duration",
-        new WriteWindowedToBigQuery.FieldInfo<Double>("FLOAT", c -> c.element()));
-    return tableConfigure;
-  }
-
-
-
-  public static void main(String[] args) throws Exception {
-
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    // Enforce that this pipeline is always run in streaming mode.
-    options.setStreaming(true);
-    // Allow the pipeline to be cancelled automatically.
-    options.setRunner(DataflowPipelineRunner.class);
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    Pipeline pipeline = Pipeline.create(options);
-
-    // Read Events from Pub/Sub using custom timestamps
-    PCollection<GameActionInfo> rawEvents = pipeline
-        .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
-        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
-
-    // Extract username/score pairs from the event stream
-    PCollection<KV<String, Integer>> userEvents =
-        rawEvents.apply("ExtractUserScore",
-          MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
-            .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
-
-    // Calculate the total score per user over fixed windows, and
-    // cumulative updates for late data.
-    final PCollectionView<Map<String, Integer>> spammersView = userEvents
-      .apply(Window.named("FixedWindowsUser")
-          .<KV<String, Integer>>into(FixedWindows.of(
-              Duration.standardMinutes(options.getFixedWindowDuration())))
-          )
-
-      // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
-      // These might be robots/spammers.
-      .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
-      // Derive a view from the collection of spammer users. It will be used as a side input
-      // in calculating the team score sums, below.
-      .apply("CreateSpammersView", View.<String, Integer>asMap());
-
-    // [START DocInclude_FilterAndCalc]
-    // Calculate the total score per team over fixed windows,
-    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
-    // suspected robots-- to filter out scores from those users from the sum.
-    // Write the results to BigQuery.
-    rawEvents
-      .apply(Window.named("WindowIntoFixedWindows")
-          .<GameActionInfo>into(FixedWindows.of(
-              Duration.standardMinutes(options.getFixedWindowDuration())))
-          )
-      // Filter out the detected spammer users, using the side input derived above.
-      .apply(ParDo.named("FilterOutSpammers")
-              .withSideInputs(spammersView)
-              .of(new DoFn<GameActionInfo, GameActionInfo>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  // If the user is not in the spammers Map, output the data element.
-                  if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
-                    c.output(c.element());
-                  }
-                }
-              }))
-      // Extract and sum teamname/score pairs from the event data.
-      .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
-      // [END DocInclude_FilterAndCalc]
-      // Write the result to BigQuery
-      .apply("WriteTeamSums",
-             new WriteWindowedToBigQuery<KV<String, Integer>>(
-                options.getTablePrefix() + "_team", configureWindowedWrite()));
-
-
-    // [START DocInclude_SessionCalc]
-    // Detect user sessions-- that is, a burst of activity separated by a gap from further
-    // activity. Find and record the mean session lengths.
-    // This information could help the game designers track the changing user engagement
-    // as their set of games changes.
-    userEvents
-      .apply(Window.named("WindowIntoSessions")
-            .<KV<String, Integer>>into(
-                  Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
-        .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
-      // For this use, we care only about the existence of the session, not any particular
-      // information aggregated over it, so the following is an efficient way to do that.
-      .apply(Combine.perKey(x -> 0))
-      // Get the duration per session.
-      .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
-      // [END DocInclude_SessionCalc]
-      // [START DocInclude_Rewindow]
-      // Re-window to process groups of session sums according to when the sessions complete.
-      .apply(Window.named("WindowToExtractSessionMean")
-            .<Integer>into(
-                FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
-      // Find the mean session duration in each window.
-      .apply(Mean.<Integer>globally().withoutDefaults())
-      // Write this info to a BigQuery table.
-      .apply("WriteAvgSessionLength",
-             new WriteWindowedToBigQuery<Double>(
-                options.getTablePrefix() + "_sessions", configureSessionWindowWrite()));
-    // [END DocInclude_Rewindow]
-
-
-    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
-    // command line.
-    PipelineResult result = pipeline.run();
-    dataflowUtils.waitToFinish(result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
deleted file mode 100644
index 481b9df..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Filter;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.WithTimestamps;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.joda.time.DateTimeZone;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-
-/**
- * This class is the second in a series of four pipelines that tell a story in a 'gaming'
- * domain, following {@link UserScore}. In addition to the concepts introduced in {@link UserScore},
- * new concepts include: windowing and element timestamps; use of {@code Filter.byPredicate()}.
- *
- * <p> This pipeline processes data collected from gaming events in batch, building on {@link
- * UserScore} but using fixed windows. It calculates the sum of scores per team, for each window,
- * optionally allowing specification of two timestamps before and after which data is filtered out.
- * This allows a model where late data collected after the intended analysis window can be included,
- * and any late-arriving data prior to the beginning of the analysis window can be removed as well.
- * By using windowing and adding element timestamps, we can do finer-grained analysis than with the
- * {@link UserScore} pipeline. However, our batch processing is high-latency, in that we don't get
- * results from plays at the beginning of the batch's time period until the batch is processed.
- *
- * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
- * like this:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- *   --dataset=YOUR-DATASET
- * }
- * </pre>
- * where the BigQuery dataset you specify must already exist.
- *
- * <p> Optionally include {@code --input} to specify the batch input file path.
- * To indicate a time after which the data should be filtered out, include the
- * {@code --stopMin} arg. E.g., {@code --stopMin=2015-10-18-23-59} indicates that any data
- * timestamped after 23:59 PST on 2015-10-18 should not be included in the analysis.
- * To indicate a time before which data should be filtered out, include the {@code --startMin} arg.
- * If you're using the default input specified in {@link UserScore},
- * "gs://dataflow-samples/game/gaming_data*.csv", then
- * {@code --startMin=2015-11-16-16-10 --stopMin=2015-11-17-16-10} are good values.
- */
-public class HourlyTeamScore extends UserScore {
-
-  private static DateTimeFormatter fmt =
-      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
-          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
-  private static DateTimeFormatter minFmt =
-      DateTimeFormat.forPattern("yyyy-MM-dd-HH-mm")
-          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
-
-
-  /**
-   * Options supported by {@link HourlyTeamScore}.
-   */
-  static interface Options extends UserScore.Options {
-
-    @Description("Numeric value of fixed window duration, in minutes")
-    @Default.Integer(60)
-    Integer getWindowDuration();
-    void setWindowDuration(Integer value);
-
-    @Description("String representation of the first minute after which to generate results,"
-        + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST."
-        + "Any input data timestamped prior to that minute won't be included in the sums.")
-    @Default.String("1970-01-01-00-00")
-    String getStartMin();
-    void setStartMin(String value);
-
-    @Description("String representation of the first minute for which to not generate results,"
-        + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST."
-        + "Any input data timestamped after that minute won't be included in the sums.")
-    @Default.String("2100-01-01-00-00")
-    String getStopMin();
-    void setStopMin(String value);
-
-    @Description("The BigQuery table name. Should not already exist.")
-    @Default.String("hourly_team_score")
-    String getTableName();
-    void setTableName(String value);
-  }
-
-  /**
-   * Create a map of information that describes how to write pipeline output to BigQuery. This map
-   * is passed to the {@link WriteWindowedToBigQuery} constructor to write team score sums and
-   * includes information about window start time.
-   */
-  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
-      configureWindowedTableWrite() {
-    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfig =
-        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
-    tableConfig.put("team",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
-            c -> c.element().getKey()));
-    tableConfig.put("total_score",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
-            c -> c.element().getValue()));
-    tableConfig.put("window_start",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
-          c -> { IntervalWindow w = (IntervalWindow) c.window();
-                 return fmt.print(w.start()); }));
-    return tableConfig;
-  }
-
-
-  /**
-   * Run a batch pipeline to do windowed analysis of the data.
-   */
-  // [START DocInclude_HTSMain]
-  public static void main(String[] args) throws Exception {
-    // Begin constructing a pipeline configured by commandline flags.
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline pipeline = Pipeline.create(options);
-
-    final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin()));
-    final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin()));
-
-    // Read 'gaming' events from a text file.
-    pipeline.apply(TextIO.Read.from(options.getInput()))
-      // Parse the incoming data.
-      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
-
-      // Filter out data before and after the given times so that it is not included
-      // in the calculations. As we collect data in batches (say, by day), the batch for the day
-      // that we want to analyze could potentially include some late-arriving data from the previous
-      // day. If so, we want to weed it out. Similarly, if we include data from the following day
-      // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events
-      // that fall after the time period we want to analyze.
-      // [START DocInclude_HTSFilters]
-      .apply("FilterStartTime", Filter.byPredicate(
-          (GameActionInfo gInfo)
-              -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
-      .apply("FilterEndTime", Filter.byPredicate(
-          (GameActionInfo gInfo)
-              -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
-      // [END DocInclude_HTSFilters]
-
-      // [START DocInclude_HTSAddTsAndWindow]
-      // Add an element timestamp based on the event log, and apply fixed windowing.
-      .apply("AddEventTimestamps",
-             WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
-      .apply(Window.named("FixedWindowsTeam")
-          .<GameActionInfo>into(FixedWindows.of(
-                Duration.standardMinutes(options.getWindowDuration()))))
-      // [END DocInclude_HTSAddTsAndWindow]
-
-      // Extract and sum teamname/score pairs from the event data.
-      .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
-      .apply("WriteTeamScoreSums",
-        new WriteWindowedToBigQuery<KV<String, Integer>>(options.getTableName(),
-            configureWindowedTableWrite()));
-
-
-    pipeline.run();
-  }
-  // [END DocInclude_HTSMain]
-
-}

[49/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
deleted file mode 100644
index 24f6a45..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoder.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UTFDataFormatException;
-
-/**
- * A {@link BigEndianIntegerCoder} encodes {@link Integer Integers} in 4 bytes, big-endian.
- */
-public class BigEndianIntegerCoder extends AtomicCoder<Integer> {
-
-  @JsonCreator
-  public static BigEndianIntegerCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final BigEndianIntegerCoder INSTANCE = new BigEndianIntegerCoder();
-
-  private BigEndianIntegerCoder() {}
-
-  @Override
-  public void encode(Integer value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Integer");
-    }
-    new DataOutputStream(outStream).writeInt(value);
-  }
-
-  @Override
-  public Integer decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    try {
-      return new DataInputStream(inStream).readInt();
-    } catch (EOFException | UTFDataFormatException exn) {
-      // These exceptions correspond to decoding problems, so change
-      // what kind of exception they're branded as.
-      throw new CoderException(exn);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. This coder is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}, because {@link #getEncodedElementByteSize} runs in constant time.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code 4}, the size in bytes of an integer's big endian encoding.
-   */
-  @Override
-  protected long getEncodedElementByteSize(Integer value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Integer");
-    }
-    return 4;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
deleted file mode 100644
index 4196608..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoder.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UTFDataFormatException;
-
-/**
- * A {@link BigEndianLongCoder} encodes {@link Long}s in 8 bytes, big-endian.
- */
-public class BigEndianLongCoder extends AtomicCoder<Long> {
-
-  @JsonCreator
-  public static BigEndianLongCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final BigEndianLongCoder INSTANCE = new BigEndianLongCoder();
-
-  private BigEndianLongCoder() {}
-
-  @Override
-  public void encode(Long value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Long");
-    }
-    new DataOutputStream(outStream).writeLong(value);
-  }
-
-  @Override
-  public Long decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    try {
-      return new DataInputStream(inStream).readLong();
-    } catch (EOFException | UTFDataFormatException exn) {
-      // These exceptions correspond to decoding problems, so change
-      // what kind of exception they're branded as.
-      throw new CoderException(exn);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. This coder is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}, since {@link #getEncodedElementByteSize} returns a constant.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code 8}, the byte size of a big-endian encoded {@code Long}.
-   */
-  @Override
-  protected long getEncodedElementByteSize(Long value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Long");
-    }
-    return 8;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
deleted file mode 100644
index 1e555c6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoder.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
-import com.google.cloud.dataflow.sdk.util.StreamUtils;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.common.io.ByteStreams;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} for {@code byte[]}.
- *
- * <p>The encoding format is as follows:
- * <ul>
- * <li>If in a non-nested context (the {@code byte[]} is the only value in the stream), the
- * bytes are read/written directly.</li>
- * <li>If in a nested context, the bytes are prefixed with the length of the array,
- * encoded via a {@link VarIntCoder}.</li>
- * </ul>
- */
-public class ByteArrayCoder extends AtomicCoder<byte[]> {
-
-  @JsonCreator
-  public static ByteArrayCoder of() {
-    return INSTANCE;
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final ByteArrayCoder INSTANCE = new ByteArrayCoder();
-
-  private ByteArrayCoder() {}
-
-  @Override
-  public void encode(byte[] value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null byte[]");
-    }
-    if (!context.isWholeStream) {
-      VarInt.encode(value.length, outStream);
-      outStream.write(value);
-    } else {
-      outStream.write(value);
-    }
-  }
-
-  /**
-   * Encodes the provided {@code value} with the identical encoding to {@link #encode}, but with
-   * optimizations that take ownership of the value.
-   *
-   * <p>Once passed to this method, {@code value} should never be observed or mutated again.
-   */
-  public void encodeAndOwn(byte[] value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (!context.isWholeStream) {
-      VarInt.encode(value.length, outStream);
-      outStream.write(value);
-    } else {
-      if (outStream instanceof ExposedByteArrayOutputStream) {
-        ((ExposedByteArrayOutputStream) outStream).writeAndOwn(value);
-      } else {
-        outStream.write(value);
-      }
-    }
-  }
-
-  @Override
-  public byte[] decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    if (context.isWholeStream) {
-      return StreamUtils.getBytes(inStream);
-    } else {
-      int length = VarInt.decodeInt(inStream);
-      if (length < 0) {
-        throw new IOException("invalid length " + length);
-      }
-      byte[] value = new byte[length];
-      ByteStreams.readFully(inStream, value);
-      return value;
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return objects that are equal if the two arrays contain the same bytes.
-   */
-  @Override
-  public Object structuralValue(byte[] value) {
-    return new StructuralByteArray(value);
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true} since {@link #getEncodedElementByteSize} runs in
-   * constant time using the {@code length} of the provided array.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(byte[] value, Context context) {
-    return true;
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(byte[] value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null byte[]");
-    }
-    long size = 0;
-    if (!context.isWholeStream) {
-      size += VarInt.getLength(value.length);
-    }
-    return size + value.length;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java
deleted file mode 100644
index 9f17497..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteCoder.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UTFDataFormatException;
-
-/**
- * A {@link ByteCoder} encodes {@link Byte} values in 1 byte using Java serialization.
- */
-public class ByteCoder extends AtomicCoder<Byte> {
-
-  @JsonCreator
-  public static ByteCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final ByteCoder INSTANCE = new ByteCoder();
-
-  private ByteCoder() {}
-
-  @Override
-  public void encode(Byte value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Byte");
-    }
-    outStream.write(value.byteValue());
-  }
-
-  @Override
-  public Byte decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    try {
-      // value will be between 0-255, -1 for EOF
-      int value = inStream.read();
-      if (value == -1) {
-        throw new EOFException("EOF encountered decoding 1 byte from input stream");
-      }
-      return (byte) value;
-    } catch (EOFException | UTFDataFormatException exn) {
-      // These exceptions correspond to decoding problems, so change
-      // what kind of exception they're branded as.
-      throw new CoderException(exn);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * {@link ByteCoder} will never throw a {@link Coder.NonDeterministicException}; bytes can always
-   * be encoded deterministically.
-   */
-  @Override
-  public void verifyDeterministic() {}
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. This coder is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. {@link ByteCoder#getEncodedElementByteSize} returns a constant.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Byte value, Context context) {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code 1}, the byte size of a {@link Byte} encoded using Java serialization.
-   */
-  @Override
-  protected long getEncodedElementByteSize(Byte value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot estimate size for unsupported null value");
-    }
-    return 1;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java
deleted file mode 100644
index b7c1a3c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoder.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.common.io.ByteStreams;
-import com.google.protobuf.ByteString;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} for {@link ByteString} objects based on their encoded Protocol Buffer form.
- *
- * <p>When this code is used in a nested {@link Coder.Context}, the serialized {@link ByteString}
- * objects are first delimited by their size.
- */
-public class ByteStringCoder extends AtomicCoder<ByteString> {
-
-  @JsonCreator
-  public static ByteStringCoder of() {
-    return INSTANCE;
-  }
-
-  /***************************/
-
-  private static final ByteStringCoder INSTANCE = new ByteStringCoder();
-
-  private ByteStringCoder() {}
-
-  @Override
-  public void encode(ByteString value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null ByteString");
-    }
-
-    if (!context.isWholeStream) {
-      // ByteString is not delimited, so write its size before its contents.
-      VarInt.encode(value.size(), outStream);
-    }
-    value.writeTo(outStream);
-  }
-
-  @Override
-  public ByteString decode(InputStream inStream, Context context) throws IOException {
-    if (context.isWholeStream) {
-      return ByteString.readFrom(inStream);
-    }
-
-    int size = VarInt.decodeInt(inStream);
-    // ByteString reads to the end of the input stream, so give it a limited stream of exactly
-    // the right length. Also set its chunk size so that the ByteString will contain exactly
-    // one chunk.
-    return ByteString.readFrom(ByteStreams.limit(inStream, size), size);
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(ByteString value, Context context) throws Exception {
-    int size = value.size();
-
-    if (context.isWholeStream) {
-      return size;
-    }
-    return VarInt.getLength(size) + size;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * <p>Returns true; the encoded output of two invocations of {@link ByteStringCoder} in the same
-   * {@link Coder.Context} will be identical if and only if the original {@link ByteString} objects
-   * are equal according to {@link Object#equals}.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * <p>Returns true. {@link ByteString#size} returns the size of an array and a {@link VarInt}.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(ByteString value, Context context) {
-    return true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
deleted file mode 100644
index 97b5e23..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CannotProvideCoderException.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-/**
- * The exception thrown when a {@link CoderProvider} cannot
- * provide a {@link Coder} that has been requested.
- */
-public class CannotProvideCoderException extends Exception {
-  private final ReasonCode reason;
-
-  public CannotProvideCoderException(String message) {
-    this(message, ReasonCode.UNKNOWN);
-  }
-
-  public CannotProvideCoderException(String message, ReasonCode reason) {
-    super(message);
-    this.reason = reason;
-  }
-
-  public CannotProvideCoderException(String message, Throwable cause) {
-    this(message, cause, ReasonCode.UNKNOWN);
-  }
-
-  public CannotProvideCoderException(String message, Throwable cause, ReasonCode reason) {
-    super(message, cause);
-    this.reason = reason;
-  }
-
-  public CannotProvideCoderException(Throwable cause) {
-    this(cause, ReasonCode.UNKNOWN);
-  }
-
-  public CannotProvideCoderException(Throwable cause, ReasonCode reason) {
-    super(cause);
-    this.reason = reason;
-  }
-
-  /**
-   * @return the reason that Coder inference failed.
-   */
-  public ReasonCode getReason() {
-    return reason;
-  }
-
-  /**
-   * Returns the inner-most {@link CannotProvideCoderException} when they are deeply nested.
-   *
-   * <p>For example, if a coder for {@code List<KV<Integer, Whatsit>>} cannot be provided because
-   * there is no known coder for {@code Whatsit}, the root cause of the exception should be a
-   * CannotProvideCoderException with details pertinent to {@code Whatsit}, suppressing the
-   * intermediate layers.
-   */
-  public Throwable getRootCause() {
-    Throwable cause = getCause();
-    if (cause == null) {
-      return this;
-    } else if (!(cause instanceof CannotProvideCoderException)) {
-      return cause;
-    } else {
-      return ((CannotProvideCoderException) cause).getRootCause();
-    }
-  }
-
-  /**
-   * Indicates the reason that {@link Coder} inference failed.
-   */
-  public static enum ReasonCode {
-    /**
-     * The reason a coder could not be provided is unknown or does have an established
-     * {@link ReasonCode}.
-     */
-    UNKNOWN,
-
-    /**
-     * The reason a coder could not be provided is type erasure, for example when requesting
-     * coder inference for a {@code List<T>} where {@code T} is unknown.
-     */
-    TYPE_ERASURE
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
deleted file mode 100644
index f3a8bec..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Coder.java
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.common.base.Joiner;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Objects;
-import com.google.common.base.Preconditions;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link Coder Coder&lt;T&gt;} defines how to encode and decode values of type {@code T} into
- * byte streams.
- *
- * <p>{@link Coder} instances are serialized during job creation and deserialized
- * before use, via JSON serialization. See {@link SerializableCoder} for an example of a
- * {@link Coder} that adds a custom field to
- * the {@link Coder} serialization. It provides a constructor annotated with
- * {@link com.fasterxml.jackson.annotation.JsonCreator}, which is a factory method used when
- * deserializing a {@link Coder} instance.
- *
- * <p>{@link Coder} classes for compound types are often composed from coder classes for types
- * contains therein. The composition of {@link Coder} instances into a coder for the compound
- * class is the subject of the {@link CoderFactory} type, which enables automatic generic
- * composition of {@link Coder} classes within the {@link CoderRegistry}. With particular
- * static methods on a compound {@link Coder} class, a {@link CoderFactory} can be automatically
- * inferred. See {@link KvCoder} for an example of a simple compound {@link Coder} that supports
- * automatic composition in the {@link CoderRegistry}.
- *
- * <p>The binary format of a {@link Coder} is identified by {@link #getEncodingId()}; be sure to
- * understand the requirements for evolving coder formats.
- *
- * <p>All methods of a {@link Coder} are required to be thread safe.
- *
- * @param <T> the type of the values being transcoded
- */
-public interface Coder<T> extends Serializable {
-  /** The context in which encoding or decoding is being done. */
-  public static class Context {
-    /**
-     * The outer context: the value being encoded or decoded takes
-     * up the remainder of the record/stream contents.
-     */
-    public static final Context OUTER = new Context(true);
-
-    /**
-     * The nested context: the value being encoded or decoded is
-     * (potentially) a part of a larger record/stream contents, and
-     * may have other parts encoded or decoded after it.
-     */
-    public static final Context NESTED = new Context(false);
-
-    /**
-     * Whether the encoded or decoded value fills the remainder of the
-     * output or input (resp.) record/stream contents.  If so, then
-     * the size of the decoded value can be determined from the
-     * remaining size of the record/stream contents, and so explicit
-     * lengths aren't required.
-     */
-    public final boolean isWholeStream;
-
-    public Context(boolean isWholeStream) {
-      this.isWholeStream = isWholeStream;
-    }
-
-    public Context nested() {
-      return NESTED;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (!(obj instanceof Context)) {
-        return false;
-      }
-      return Objects.equal(isWholeStream, ((Context) obj).isWholeStream);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(isWholeStream);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(Context.class)
-          .addValue(isWholeStream ? "OUTER" : "NESTED").toString();
-    }
-  }
-
-  /**
-   * Encodes the given value of type {@code T} onto the given output stream
-   * in the given context.
-   *
-   * @throws IOException if writing to the {@code OutputStream} fails
-   * for some reason
-   * @throws CoderException if the value could not be encoded for some reason
-   */
-  public void encode(T value, OutputStream outStream, Context context)
-      throws CoderException, IOException;
-
-  /**
-   * Decodes a value of type {@code T} from the given input stream in
-   * the given context.  Returns the decoded value.
-   *
-   * @throws IOException if reading from the {@code InputStream} fails
-   * for some reason
-   * @throws CoderException if the value could not be decoded for some reason
-   */
-  public T decode(InputStream inStream, Context context)
-      throws CoderException, IOException;
-
-  /**
-   * If this is a {@code Coder} for a parameterized type, returns the
-   * list of {@code Coder}s being used for each of the parameters, or
-   * returns {@code null} if this cannot be done or this is not a
-   * parameterized type.
-   */
-  public List<? extends Coder<?>> getCoderArguments();
-
-  /**
-   * Returns the {@link CloudObject} that represents this {@code Coder}.
-   */
-  public CloudObject asCloudObject();
-
-  /**
-   * Throw {@link NonDeterministicException} if the coding is not deterministic.
-   *
-   * <p>In order for a {@code Coder} to be considered deterministic,
-   * the following must be true:
-   * <ul>
-   *   <li>two values that compare as equal (via {@code Object.equals()}
-   *       or {@code Comparable.compareTo()}, if supported) have the same
-   *       encoding.
-   *   <li>the {@code Coder} always produces a canonical encoding, which is the
-   *       same for an instance of an object even if produced on different
-   *       computers at different times.
-   * </ul>
-   *
-   * @throws Coder.NonDeterministicException if this coder is not deterministic.
-   */
-  public void verifyDeterministic() throws Coder.NonDeterministicException;
-
-  /**
-   * Returns {@code true} if this {@link Coder} is injective with respect to {@link Objects#equals}.
-   *
-   * <p>Whenever the encoded bytes of two values are equal, then the original values are equal
-   * according to {@code Objects.equals()}. Note that this is well-defined for {@code null}.
-   *
-   * <p>This condition is most notably false for arrays. More generally, this condition is false
-   * whenever {@code equals()} compares object identity, rather than performing a
-   * semantic/structural comparison.
-   */
-  public boolean consistentWithEquals();
-
-  /**
-   * Returns an object with an {@code Object.equals()} method that represents structural equality
-   * on the argument.
-   *
-   * <p>For any two values {@code x} and {@code y} of type {@code T}, if their encoded bytes are the
-   * same, then it must be the case that {@code structuralValue(x).equals(@code structuralValue(y)}.
-   *
-   * <p>Most notably:
-   * <ul>
-   *   <li>The structural value for an array coder should perform a structural comparison of the
-   *   contents of the arrays, rather than the default behavior of comparing according to object
-   *   identity.
-   *   <li>The structural value for a coder accepting {@code null} should be a proper object with
-   *   an {@code equals()} method, even if the input value is {@code null}.
-   * </ul>
-   *
-   * <p>See also {@link #consistentWithEquals()}.
-   */
-  public Object structuralValue(T value) throws Exception;
-
-  /**
-   * Returns whether {@link #registerByteSizeObserver} cheap enough to
-   * call for every element, that is, if this {@code Coder} can
-   * calculate the byte size of the element to be coded in roughly
-   * constant time (or lazily).
-   *
-   * <p>Not intended to be called by user code, but instead by
-   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
-   * implementations.
-   */
-  public boolean isRegisterByteSizeObserverCheap(T value, Context context);
-
-  /**
-   * Notifies the {@code ElementByteSizeObserver} about the byte size
-   * of the encoded value using this {@code Coder}.
-   *
-   * <p>Not intended to be called by user code, but instead by
-   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
-   * implementations.
-   */
-  public void registerByteSizeObserver(
-      T value, ElementByteSizeObserver observer, Context context)
-      throws Exception;
-
-  /**
-   * An identifier for the binary format written by {@link #encode}.
-   *
-   * <p>This value, along with the fully qualified class name, forms an identifier for the
-   * binary format of this coder. Whenever this value changes, the new encoding is considered
-   * incompatible with the prior format: It is presumed that the prior version of the coder will
-   * be unable to correctly read the new format and the new version of the coder will be unable to
-   * correctly read the old format.
-   *
-   * <p>If the format is changed in a backwards-compatible way (the Coder can still accept data from
-   * the prior format), such as by adding optional fields to a Protocol Buffer or Avro definition,
-   * and you want Dataflow to understand that the new coder is compatible with the prior coder,
-   * this value must remain unchanged. It is then the responsibility of {@link #decode} to correctly
-   * read data from the prior format.
-   */
-  @Experimental(Kind.CODER_ENCODING_ID)
-  public String getEncodingId();
-
-  /**
-   * A collection of encodings supported by {@link #decode} in addition to the encoding
-   * from {@link #getEncodingId()} (which is assumed supported).
-   *
-   * <p><i>This information is not currently used for any purpose</i>. It is descriptive only,
-   * and this method is subject to change.
-   *
-   * @see #getEncodingId()
-   */
-  @Experimental(Kind.CODER_ENCODING_ID)
-  public Collection<String> getAllowedEncodings();
-
-  /**
-   * Exception thrown by {@link Coder#verifyDeterministic()} if the encoding is
-   * not deterministic, including details of why the encoding is not deterministic.
-   */
-  public static class NonDeterministicException extends Throwable {
-    private Coder<?> coder;
-    private List<String> reasons;
-
-    public NonDeterministicException(
-        Coder<?> coder, String reason, @Nullable NonDeterministicException e) {
-      this(coder, Arrays.asList(reason), e);
-    }
-
-    public NonDeterministicException(Coder<?> coder, String reason) {
-      this(coder, Arrays.asList(reason), null);
-    }
-
-    public NonDeterministicException(Coder<?> coder, List<String> reasons) {
-      this(coder, reasons, null);
-    }
-
-    public NonDeterministicException(
-        Coder<?> coder,
-        List<String> reasons,
-        @Nullable NonDeterministicException cause) {
-      super(cause);
-      Preconditions.checkArgument(reasons.size() > 0,
-          "Reasons must not be empty.");
-      this.reasons = reasons;
-      this.coder = coder;
-    }
-
-    public Iterable<String> getReasons() {
-      return reasons;
-    }
-
-    @Override
-    public String getMessage() {
-      return String.format("%s is not deterministic because:\n  %s",
-          coder, Joiner.on("\n  ").join(reasons));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
deleted file mode 100644
index 8ff8571..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderException.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import java.io.IOException;
-
-/**
- * An {@link Exception} thrown if there is a problem encoding or decoding a value.
- */
-public class CoderException extends IOException {
-  public CoderException(String message) {
-    super(message);
-  }
-
-  public CoderException(String message, Throwable cause) {
-    super(message, cause);
-  }
-
-  public CoderException(Throwable cause) {
-    super(cause);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
deleted file mode 100644
index 82b40a4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactories.java
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.lang.reflect.ParameterizedType;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Static utility methods for creating and working with {@link Coder}s.
- */
-public final class CoderFactories {
-  private CoderFactories() { } // Static utility class
-
-  /**
-   * Creates a {@link CoderFactory} built from particular static methods of a class that
-   * implements {@link Coder}.
-   *
-   * <p>The class must have the following static methods:
-   *
-   * <ul>
-   * <li> {@code
-   * public static Coder<T> of(Coder<X> argCoder1, Coder<Y> argCoder2, ...)
-   * }
-   * <li> {@code
-   * public static List<Object> getInstanceComponents(T exampleValue);
-   * }
-   * </ul>
-   *
-   * <p>The {@code of(...)} method will be used to construct a
-   * {@code Coder<T>} from component {@link Coder}s.
-   * It must accept one {@link Coder} argument for each
-   * generic type parameter of {@code T}. If {@code T} takes no generic
-   * type parameters, then the {@code of()} factory method should take
-   * no arguments.
-   *
-   * <p>The {@code getInstanceComponents} method will be used to
-   * decompose a value during the {@link Coder} inference process,
-   * to automatically choose coders for the components.
-   *
-   * <p>Note that the class {@code T} to be coded may be a
-   * not-yet-specialized generic class.
-   * For a generic class {@code MyClass<X>} and an actual type parameter
-   * {@code Foo}, the {@link CoderFactoryFromStaticMethods} will
-   * accept any {@code Coder<Foo>} and produce a {@code Coder<MyClass<Foo>>}.
-   *
-   * <p>For example, the {@link CoderFactory} returned by
-   * {@code fromStaticMethods(ListCoder.class)}
-   * will produce a {@code Coder<List<X>>} for any {@code Coder Coder<X>}.
-   */
-  public static <T> CoderFactory fromStaticMethods(Class<T> clazz) {
-    return new CoderFactoryFromStaticMethods(clazz);
-  }
-
-  /**
-   * Creates a {@link CoderFactory} that always returns the
-   * given coder.
-   *
-   * <p>The {@code getInstanceComponents} method of this
-   * {@link CoderFactory} always returns an empty list.
-   */
-  public static <T> CoderFactory forCoder(Coder<T> coder) {
-    return new CoderFactoryForCoder<>(coder);
-  }
-
-  /**
-   * See {@link #fromStaticMethods} for a detailed description
-   * of the characteristics of this {@link CoderFactory}.
-   */
-  private static class CoderFactoryFromStaticMethods implements CoderFactory {
-
-    @Override
-    @SuppressWarnings("rawtypes")
-    public Coder<?> create(List<? extends Coder<?>> componentCoders) {
-      try {
-        return (Coder) factoryMethod.invoke(
-            null /* static */, componentCoders.toArray());
-      } catch (IllegalAccessException |
-               IllegalArgumentException |
-               InvocationTargetException |
-               NullPointerException |
-               ExceptionInInitializerError exn) {
-        throw new IllegalStateException(
-            "error when invoking Coder factory method " + factoryMethod,
-            exn);
-      }
-    }
-
-    @Override
-    public List<Object> getInstanceComponents(Object value) {
-      try {
-        @SuppressWarnings("unchecked")
-        List<Object> components =  (List<Object>) getComponentsMethod.invoke(
-            null /* static */, value);
-        return components;
-      } catch (IllegalAccessException
-          | IllegalArgumentException
-          | InvocationTargetException
-          | NullPointerException
-          | ExceptionInInitializerError exn) {
-        throw new IllegalStateException(
-            "error when invoking Coder getComponents method " + getComponentsMethod,
-            exn);
-      }
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////
-
-    // Method to create a coder given component coders
-    // For a Coder class of kind * -> * -> ... n times ... -> *
-    // this has type Coder<?> -> Coder<?> -> ... n times ... -> Coder<T>
-    private Method factoryMethod;
-
-    // Method to decompose a value of type T into its parts.
-    // For a Coder class of kind * -> * -> ... n times ... -> *
-    // this has type T -> List<Object>
-    // where the list has n elements.
-    private Method getComponentsMethod;
-
-    /**
-     * Returns a CoderFactory that invokes the given static factory method
-     * to create the Coder.
-     */
-    private CoderFactoryFromStaticMethods(Class<?> coderClazz) {
-      this.factoryMethod = getFactoryMethod(coderClazz);
-      this.getComponentsMethod = getInstanceComponentsMethod(coderClazz);
-    }
-
-    /**
-     * Returns the static {@code of} constructor method on {@code coderClazz}
-     * if it exists. It is assumed to have one {@link Coder} parameter for
-     * each type parameter of {@code coderClazz}.
-     */
-    private Method getFactoryMethod(Class<?> coderClazz) {
-      Method factoryMethodCandidate;
-
-      // Find the static factory method of coderClazz named 'of' with
-      // the appropriate number of type parameters.
-      int numTypeParameters = coderClazz.getTypeParameters().length;
-      Class<?>[] factoryMethodArgTypes = new Class<?>[numTypeParameters];
-      Arrays.fill(factoryMethodArgTypes, Coder.class);
-      try {
-        factoryMethodCandidate =
-            coderClazz.getDeclaredMethod("of", factoryMethodArgTypes);
-      } catch (NoSuchMethodException | SecurityException exn) {
-        throw new IllegalArgumentException(
-            "cannot register Coder " + coderClazz + ": "
-            + "does not have an accessible method named 'of' with "
-            + numTypeParameters + " arguments of Coder type",
-            exn);
-      }
-      if (!Modifier.isStatic(factoryMethodCandidate.getModifiers())) {
-        throw new IllegalArgumentException(
-            "cannot register Coder " + coderClazz + ": "
-            + "method named 'of' with " + numTypeParameters
-            + " arguments of Coder type is not static");
-      }
-      if (!coderClazz.isAssignableFrom(factoryMethodCandidate.getReturnType())) {
-        throw new IllegalArgumentException(
-            "cannot register Coder " + coderClazz + ": "
-            + "method named 'of' with " + numTypeParameters
-            + " arguments of Coder type does not return a " + coderClazz);
-      }
-      try {
-        if (!factoryMethodCandidate.isAccessible()) {
-          factoryMethodCandidate.setAccessible(true);
-        }
-      } catch (SecurityException exn) {
-        throw new IllegalArgumentException(
-            "cannot register Coder " + coderClazz + ": "
-            + "method named 'of' with " + numTypeParameters
-            + " arguments of Coder type is not accessible",
-            exn);
-      }
-
-      return factoryMethodCandidate;
-    }
-
-    /**
-     * Finds the static method on {@code coderType} to use
-     * to decompose a value of type {@code T} into components,
-     * each corresponding to an argument of the {@code of}
-     * method.
-     */
-    private <T> Method getInstanceComponentsMethod(Class<?> coderClazz) {
-      TypeDescriptor<?> coderType = TypeDescriptor.of(coderClazz);
-      TypeDescriptor<T> argumentType = getCodedType(coderType);
-
-      // getInstanceComponents may be implemented in a superclass,
-      // so we search them all for an applicable method. We do not
-      // try to be clever about finding the best overload. It may
-      // be in a generic superclass, erased to accept an Object.
-      // However, subtypes are listed before supertypes (it is a
-      // topological ordering) so probably the best one will be chosen
-      // if there are more than one (which should be rare)
-      for (TypeDescriptor<?> supertype : coderType.getClasses()) {
-        for (Method method : supertype.getRawType().getDeclaredMethods()) {
-          if (method.getName().equals("getInstanceComponents")) {
-            TypeDescriptor<?> formalArgumentType = supertype.getArgumentTypes(method).get(0);
-            if (formalArgumentType.getRawType().isAssignableFrom(argumentType.getRawType())) {
-              return method;
-            }
-          }
-        }
-      }
-
-      throw new IllegalArgumentException(
-          "cannot create a CoderFactory from " + coderType + ": "
-          + "does not have an accessible method "
-          + "'getInstanceComponents'");
-    }
-
-    /**
-     * If {@code coderType} is a subclass of {@link Coder} for a specific
-     * type {@code T}, returns {@code T.class}. Otherwise, raises IllegalArgumentException.
-     */
-    private <T> TypeDescriptor<T> getCodedType(TypeDescriptor<?> coderType) {
-      for (TypeDescriptor<?> ifaceType : coderType.getInterfaces()) {
-        if (ifaceType.getRawType().equals(Coder.class)) {
-          ParameterizedType coderIface = (ParameterizedType) ifaceType.getType();
-          @SuppressWarnings("unchecked")
-          TypeDescriptor<T> token =
-              (TypeDescriptor<T>) TypeDescriptor.of(coderIface.getActualTypeArguments()[0]);
-          return token;
-        }
-      }
-      throw new IllegalArgumentException(
-          "cannot build CoderFactory from class " + coderType
-          + ": does not implement Coder<T> for any T.");
-    }
-  }
-
-  /**
-   * See {@link #forCoder} for a detailed description of this
-   * {@link CoderFactory}.
-   */
-  private static class CoderFactoryForCoder<T> implements CoderFactory {
-    private Coder<T> coder;
-
-    public CoderFactoryForCoder(Coder<T> coder) {
-      this.coder = coder;
-    }
-
-    @Override
-    public Coder<?> create(List<? extends Coder<?>> componentCoders) {
-      return this.coder;
-    }
-
-    @Override
-    public List<Object> getInstanceComponents(Object value) {
-      return Collections.emptyList();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
deleted file mode 100644
index 541256c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderFactory.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import java.util.List;
-
-/**
- * A {@link CoderFactory} creates coders and decomposes values.
- * It may operate on a parameterized type, such as {@link List},
- * in which case the {@link #create} method accepts a list of
- * coders to use for the type parameters.
- */
-public interface CoderFactory {
-
-  /**
-   * Returns a {@code Coder<?>}, given argument coder to use for
-   * values of a particular type, given the Coders for each of
-   * the type's generic parameter types.
-   */
-  public Coder<?> create(List<? extends Coder<?>> componentCoders);
-
-  /**
-   * Returns a list of objects contained in {@code value}, one per
-   * type argument, or {@code null} if none can be determined.
-   * The list of returned objects should be the same size as the
-   * list of coders required by {@link #create}.
-   */
-  public List<Object> getInstanceComponents(Object value);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
deleted file mode 100644
index a3e6ec4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProvider.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-/**
- * A {@link CoderProvider} may create a {@link Coder} for
- * any concrete class.
- */
-public interface CoderProvider {
-
-  /**
-   * Provides a coder for a given class, if possible.
-   *
-   * @throws CannotProvideCoderException if no coder can be provided
-   */
-  public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
deleted file mode 100644
index 8b0aedd..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderProviders.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.Joiner;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-
-import java.lang.reflect.InvocationTargetException;
-import java.util.List;
-
-/**
- * Static utility methods for working with {@link CoderProvider CoderProviders}.
- */
-public final class CoderProviders {
-
-  // Static utility class
-  private CoderProviders() { }
-
-  /**
-   * Creates a {@link CoderProvider} built from particular static methods of a class that
-   * implements {@link Coder}. The requirements for this method are precisely the requirements
-   * for a {@link Coder} class to be usable with {@link DefaultCoder} annotations.
-   *
-   * <p>The class must have the following static method:
-   *
-   * <pre>{@code
-   * public static Coder<T> of(TypeDescriptor<T> type)
-   * }
-   * </pre>
-   */
-  public static <T> CoderProvider fromStaticMethods(Class<T> clazz) {
-    return new CoderProviderFromStaticMethods(clazz);
-  }
-
-
-  /**
-   * Returns a {@link CoderProvider} that consults each of the provider {@code coderProviders}
-   * and returns the first {@link Coder} provided.
-   *
-   * <p>Note that the order in which the providers are listed matters: While the set of types
-   * handled will be the union of those handled by all of the providers in the list, the actual
-   * {@link Coder} provided by the first successful provider may differ, and may have inferior
-   * properties. For example, not all {@link Coder Coders} are deterministic, handle {@code null}
-   * values, or have comparable performance.
-   */
-  public static CoderProvider firstOf(CoderProvider... coderProviders) {
-    return new FirstOf(ImmutableList.copyOf(coderProviders));
-  }
-
-  ///////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * @see #firstOf
-   */
-  private static class FirstOf implements CoderProvider {
-
-    private Iterable<CoderProvider> providers;
-
-    public FirstOf(Iterable<CoderProvider> providers) {
-      this.providers = providers;
-    }
-
-    @Override
-    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
-      List<String> messages = Lists.newArrayList();
-      for (CoderProvider provider : providers) {
-        try {
-          return provider.getCoder(type);
-        } catch (CannotProvideCoderException exc) {
-          messages.add(String.format("%s could not provide a Coder for type %s: %s",
-              provider, type, exc.getMessage()));
-        }
-      }
-      throw new CannotProvideCoderException(
-          String.format("Cannot provide coder for type %s: %s.",
-              type, Joiner.on("; ").join(messages)));
-    }
-  }
-
-  private static class CoderProviderFromStaticMethods implements CoderProvider {
-
-    /** If true, then clazz has {@code of(TypeDescriptor)}. If false, {@code of(Class)}. */
-    private final boolean takesTypeDescriptor;
-    private final Class<?> clazz;
-
-    public CoderProviderFromStaticMethods(Class<?> clazz) {
-      // Note that the second condition supports older classes, which only needed to provide
-      // of(Class), not of(TypeDescriptor). Our own classes have updated to accept a
-      // TypeDescriptor. Hence the error message points only to the current specification,
-      // not both acceptable conditions.
-      checkArgument(classTakesTypeDescriptor(clazz) || classTakesClass(clazz),
-          "Class " + clazz.getCanonicalName()
-          + " is missing required static method of(TypeDescriptor).");
-
-      this.takesTypeDescriptor = classTakesTypeDescriptor(clazz);
-      this.clazz = clazz;
-    }
-
-    @Override
-    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
-      try {
-        if (takesTypeDescriptor) {
-          @SuppressWarnings("unchecked")
-          Coder<T> result = InstanceBuilder.ofType(Coder.class)
-              .fromClass(clazz)
-              .fromFactoryMethod("of")
-              .withArg(TypeDescriptor.class, type)
-              .build();
-          return result;
-        } else {
-          @SuppressWarnings("unchecked")
-          Coder<T> result = InstanceBuilder.ofType(Coder.class)
-              .fromClass(clazz)
-              .fromFactoryMethod("of")
-              .withArg(Class.class, type.getRawType())
-              .build();
-          return result;
-        }
-      } catch (RuntimeException exc) {
-        if (exc.getCause() instanceof InvocationTargetException) {
-          throw new CannotProvideCoderException(exc.getCause().getCause());
-        }
-        throw exc;
-      }
-    }
-
-    private boolean classTakesTypeDescriptor(Class<?> clazz) {
-      try {
-        clazz.getDeclaredMethod("of", TypeDescriptor.class);
-        return true;
-      } catch (NoSuchMethodException | SecurityException exc) {
-        return false;
-      }
-    }
-
-    private boolean classTakesClass(Class<?> clazz) {
-      try {
-        clazz.getDeclaredMethod("of", Class.class);
-        return true;
-      } catch (NoSuchMethodException | SecurityException exc) {
-        return false;
-      }
-    }
-  }
-}

[13/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
deleted file mode 100644
index 824825f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CounterAggregator.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Max;
-import com.google.cloud.dataflow.sdk.transforms.Min;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-
-/**
- * An implementation of the {@code Aggregator} interface that uses a
- * {@link Counter} as the underlying representation. Supports {@link CombineFn}s
- * from the {@link Sum}, {@link Min} and {@link Max} classes.
- *
- * @param <InputT> the type of input values
- * @param <AccumT> the type of accumulator values
- * @param <OutputT> the type of output value
- */
-public class CounterAggregator<InputT, AccumT, OutputT> implements Aggregator<InputT, OutputT> {
-
-  private final Counter<InputT> counter;
-  private final CombineFn<InputT, AccumT, OutputT> combiner;
-
-  /**
-   * Constructs a new aggregator with the given name and aggregation logic
-   * specified in the CombineFn argument. The underlying counter is
-   * automatically added into the provided CounterSet.
-   *
-   *  <p>If a counter with the same name already exists, it will be reused, as
-   * long as it has the same type.
-   */
-  public CounterAggregator(String name, CombineFn<? super InputT, AccumT, OutputT> combiner,
-      CounterSet.AddCounterMutator addCounterMutator) {
-    // Safe contravariant cast
-    this(constructCounter(name, combiner), addCounterMutator,
-        (CombineFn<InputT, AccumT, OutputT>) combiner);
-  }
-
-  private CounterAggregator(Counter<InputT> counter,
-      CounterSet.AddCounterMutator addCounterMutator,
-      CombineFn<InputT, AccumT, OutputT> combiner) {
-    try {
-      this.counter = addCounterMutator.addCounter(counter);
-    } catch (IllegalArgumentException ex) {
-      throw new IllegalArgumentException(
-          "aggregator's name collides with an existing aggregator "
-          + "or system-provided counter of an incompatible type");
-    }
-    this.combiner = combiner;
-  }
-
-  private static <T> Counter<T> constructCounter(String name,
-      CombineFn<? super T, ?, ?> combiner) {
-    if (combiner instanceof CounterProvider) {
-      @SuppressWarnings("unchecked")
-      CounterProvider<T> counterProvider = (CounterProvider<T>) combiner;
-      return counterProvider.getCounter(name);
-    } else {
-      throw new IllegalArgumentException("unsupported combiner in Aggregator: "
-        + combiner.getClass().getName());
-    }
-  }
-
-  @Override
-  public void addValue(InputT value) {
-    counter.addValue(value);
-  }
-
-  @Override
-  public String getName() {
-    return counter.getName();
-  }
-
-  @Override
-  public CombineFn<InputT, ?, OutputT> getCombineFn() {
-    return combiner;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
deleted file mode 100644
index 4913a1e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CredentialFactory.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.Credential;
-
-import java.io.IOException;
-import java.security.GeneralSecurityException;
-
-/**
- * Construct an oauth credential to be used by the SDK and the SDK workers.
- */
-public interface CredentialFactory {
-  public Credential getCredential() throws IOException, GeneralSecurityException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
deleted file mode 100644
index 671b131..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Credentials.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.extensions.java6.auth.oauth2.AbstractPromptReceiver;
-import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp;
-import com.google.api.client.googleapis.auth.oauth2.GoogleAuthorizationCodeFlow;
-import com.google.api.client.googleapis.auth.oauth2.GoogleClientSecrets;
-import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
-import com.google.api.client.googleapis.auth.oauth2.GoogleOAuthConstants;
-import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
-import com.google.api.client.http.GenericUrl;
-import com.google.api.client.http.HttpTransport;
-import com.google.api.client.json.JsonFactory;
-import com.google.api.client.json.jackson2.JacksonFactory;
-import com.google.api.client.util.store.FileDataStoreFactory;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.common.base.Preconditions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.security.GeneralSecurityException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * Provides support for loading credentials.
- */
-public class Credentials {
-
-  private static final Logger LOG = LoggerFactory.getLogger(Credentials.class);
-
-  /**
-   * OAuth 2.0 scopes used by a local worker (not on GCE).
-   * The scope cloud-platform provides access to all Cloud Platform resources.
-   * cloud-platform isn't sufficient yet for talking to datastore so we request
-   * those resources separately.
-   *
-   * <p>Note that trusted scope relationships don't apply to OAuth tokens, so for
-   * services we access directly (GCS) as opposed to through the backend
-   * (BigQuery, GCE), we need to explicitly request that scope.
-   */
-  private static final List<String> SCOPES = Arrays.asList(
-      "https://www.googleapis.com/auth/cloud-platform",
-      "https://www.googleapis.com/auth/devstorage.full_control",
-      "https://www.googleapis.com/auth/userinfo.email",
-      "https://www.googleapis.com/auth/datastore");
-
-  private static class PromptReceiver extends AbstractPromptReceiver {
-    @Override
-    public String getRedirectUri() {
-      return GoogleOAuthConstants.OOB_REDIRECT_URI;
-    }
-  }
-
-  /**
-   * Initializes OAuth2 credentials.
-   *
-   * <p>This can use 3 different mechanisms for obtaining a credential:
-   * <ol>
-   *   <li>
-   *     It can fetch the
-   *     <a href="https://developers.google.com/accounts/docs/application-default-credentials">
-   *     application default credentials</a>.
-   *   </li>
-   *   <li>
-   *     The user can specify a client secrets file and go through the OAuth2
-   *     webflow. The credential will then be cached in the user's home
-   *     directory for reuse. Provide the property "secrets_file" to use this
-   *     mechanism.
-   *   </li>
-   *   <li>
-   *     The user can specify a file containing a service account.
-   *     Provide the properties "service_account_keyfile" and
-   *     "service_account_name" to use this mechanism.
-   *   </li>
-   * </ol>
-   * The default mechanism is to use the
-   * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
-   * application default credentials</a>. The other options can be used by providing the
-   * corresponding properties.
-   */
-  public static Credential getCredential(GcpOptions options)
-      throws IOException, GeneralSecurityException {
-    String keyFile = options.getServiceAccountKeyfile();
-    String accountName = options.getServiceAccountName();
-
-    if (keyFile != null && accountName != null) {
-      try {
-        return getCredentialFromFile(keyFile, accountName, SCOPES);
-      } catch (GeneralSecurityException e) {
-        throw new IOException("Unable to obtain credentials from file", e);
-      }
-    }
-
-    if (options.getSecretsFile() != null) {
-      return getCredentialFromClientSecrets(options, SCOPES);
-    }
-
-    try {
-      return GoogleCredential.getApplicationDefault().createScoped(SCOPES);
-    } catch (IOException e) {
-      throw new RuntimeException("Unable to get application default credentials. Please see "
-          + "https://developers.google.com/accounts/docs/application-default-credentials "
-          + "for details on how to specify credentials. This version of the SDK is "
-          + "dependent on the gcloud core component version 2015.02.05 or newer to "
-          + "be able to get credentials from the currently authorized user via gcloud auth.", e);
-    }
-  }
-
-  /**
-   * Loads OAuth2 credential from a local file.
-   */
-  private static Credential getCredentialFromFile(
-      String keyFile, String accountId, Collection<String> scopes)
-      throws IOException, GeneralSecurityException {
-    GoogleCredential credential = new GoogleCredential.Builder()
-        .setTransport(Transport.getTransport())
-        .setJsonFactory(Transport.getJsonFactory())
-        .setServiceAccountId(accountId)
-        .setServiceAccountScopes(scopes)
-        .setServiceAccountPrivateKeyFromP12File(new File(keyFile))
-        .build();
-
-    LOG.info("Created credential from file {}", keyFile);
-    return credential;
-  }
-
-  /**
-   * Loads OAuth2 credential from client secrets, which may require an
-   * interactive authorization prompt.
-   */
-  private static Credential getCredentialFromClientSecrets(
-      GcpOptions options, Collection<String> scopes)
-      throws IOException, GeneralSecurityException {
-    String clientSecretsFile = options.getSecretsFile();
-
-    Preconditions.checkArgument(clientSecretsFile != null);
-    HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
-
-    JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
-    GoogleClientSecrets clientSecrets;
-
-    try {
-      clientSecrets = GoogleClientSecrets.load(jsonFactory,
-          new FileReader(clientSecretsFile));
-    } catch (IOException e) {
-      throw new RuntimeException(
-          "Could not read the client secrets from file: " + clientSecretsFile,
-          e);
-    }
-
-    FileDataStoreFactory dataStoreFactory =
-        new FileDataStoreFactory(new java.io.File(options.getCredentialDir()));
-
-    GoogleAuthorizationCodeFlow flow = new GoogleAuthorizationCodeFlow.Builder(
-        httpTransport, jsonFactory, clientSecrets, scopes)
-        .setDataStoreFactory(dataStoreFactory)
-        .setTokenServerUrl(new GenericUrl(options.getTokenServerUrl()))
-        .setAuthorizationServerEncodedUrl(options.getAuthorizationServerEncodedUrl())
-        .build();
-
-    // The credentialId identifies the credential if we're using a persistent
-    // credential store.
-    Credential credential =
-        new AuthorizationCodeInstalledApp(flow, new PromptReceiver())
-            .authorize(options.getCredentialId());
-
-    LOG.info("Got credential from client secret");
-    return credential;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
deleted file mode 100644
index cfb120c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowPathValidator.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.common.base.Preconditions;
-
-import java.io.IOException;
-
-/**
- * GCP implementation of {@link PathValidator}. Only GCS paths are allowed.
- */
-public class DataflowPathValidator implements PathValidator {
-
-  private DataflowPipelineOptions dataflowOptions;
-
-  DataflowPathValidator(DataflowPipelineOptions options) {
-    this.dataflowOptions = options;
-  }
-
-  public static DataflowPathValidator fromOptions(PipelineOptions options) {
-    return new DataflowPathValidator(options.as(DataflowPipelineOptions.class));
-  }
-
-  /**
-   * Validates the the input GCS path is accessible and that the path
-   * is well formed.
-   */
-  @Override
-  public String validateInputFilePatternSupported(String filepattern) {
-    GcsPath gcsPath = getGcsPath(filepattern);
-    Preconditions.checkArgument(
-        dataflowOptions.getGcsUtil().isGcsPatternSupported(gcsPath.getObject()));
-    String returnValue = verifyPath(filepattern);
-    verifyPathIsAccessible(filepattern, "Could not find file %s");
-    return returnValue;
-  }
-
-  /**
-   * Validates the the output GCS path is accessible and that the path
-   * is well formed.
-   */
-  @Override
-  public String validateOutputFilePrefixSupported(String filePrefix) {
-    String returnValue = verifyPath(filePrefix);
-    verifyPathIsAccessible(filePrefix, "Output path does not exist or is not writeable: %s");
-    return returnValue;
-  }
-
-  @Override
-  public String verifyPath(String path) {
-    GcsPath gcsPath = getGcsPath(path);
-    Preconditions.checkArgument(gcsPath.isAbsolute(),
-        "Must provide absolute paths for Dataflow");
-    Preconditions.checkArgument(!gcsPath.getObject().contains("//"),
-        "Dataflow Service does not allow objects with consecutive slashes");
-    return gcsPath.toResourceName();
-  }
-
-  private void verifyPathIsAccessible(String path, String errorMessage) {
-    GcsPath gcsPath = getGcsPath(path);
-    try {
-      Preconditions.checkArgument(dataflowOptions.getGcsUtil().bucketExists(gcsPath),
-        errorMessage, path);
-    } catch (IOException e) {
-      throw new RuntimeException(
-          String.format("Unable to verify that GCS bucket gs://%s exists.", gcsPath.getBucket()),
-          e);
-    }
-  }
-
-  private GcsPath getGcsPath(String path) {
-    try {
-      return GcsPath.fromUri(path);
-    } catch (IllegalArgumentException e) {
-      throw new IllegalArgumentException(String.format(
-          "%s expected a valid 'gs://' path but was given '%s'",
-          dataflowOptions.getRunner().getSimpleName(), path), e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
deleted file mode 100644
index 39b3005..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DataflowReleaseInfo.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.json.GenericJson;
-import com.google.api.client.util.Key;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Properties;
-
-/**
- * Utilities for working with the Dataflow distribution.
- */
-public final class DataflowReleaseInfo extends GenericJson {
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowReleaseInfo.class);
-
-  private static final String DATAFLOW_PROPERTIES_PATH =
-      "/com/google/cloud/dataflow/sdk/sdk.properties";
-
-  private static class LazyInit {
-    private static final DataflowReleaseInfo INSTANCE =
-        new DataflowReleaseInfo(DATAFLOW_PROPERTIES_PATH);
-  }
-
-  /**
-   * Returns an instance of DataflowReleaseInfo.
-   */
-  public static DataflowReleaseInfo getReleaseInfo() {
-    return LazyInit.INSTANCE;
-  }
-
-  @Key private String name = "Google Cloud Dataflow Java SDK";
-  @Key private String version = "Unknown";
-
-  /** Provides the SDK name. */
-  public String getName() {
-    return name;
-  }
-
-  /** Provides the SDK version. */
-  public String getVersion() {
-    return version;
-  }
-
-  private DataflowReleaseInfo(String resourcePath) {
-    Properties properties = new Properties();
-
-    InputStream in = DataflowReleaseInfo.class.getResourceAsStream(
-        DATAFLOW_PROPERTIES_PATH);
-    if (in == null) {
-      LOG.warn("Dataflow properties resource not found: {}", resourcePath);
-      return;
-    }
-
-    try {
-      properties.load(in);
-    } catch (IOException e) {
-      LOG.warn("Error loading Dataflow properties resource: ", e);
-    }
-
-    for (String name : properties.stringPropertyNames()) {
-      if (name.equals("name")) {
-        // We don't allow the properties to override the SDK name.
-        continue;
-      }
-      put(name, properties.getProperty(name));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
deleted file mode 100644
index 6e97053..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectModeExecutionContext.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * {@link ExecutionContext} for use in direct mode.
- */
-public class DirectModeExecutionContext
-    extends BaseExecutionContext<DirectModeExecutionContext.StepContext> {
-
-  private Object key;
-  private List<ValueWithMetadata<?>> output = Lists.newArrayList();
-  private Map<TupleTag<?>, List<ValueWithMetadata<?>>> sideOutputs = Maps.newHashMap();
-
-  protected DirectModeExecutionContext() {}
-
-  public static DirectModeExecutionContext create() {
-    return new DirectModeExecutionContext();
-  }
-
-  @Override
-  protected StepContext createStepContext(
-      String stepName, String transformName, StateSampler stateSampler) {
-    return new StepContext(this, stepName, transformName);
-  }
-
-  public Object getKey() {
-    return key;
-  }
-
-  public void setKey(Object newKey) {
-    // The direct mode runner may reorder elements, so we need to keep
-    // around the state used for each key.
-    for (ExecutionContext.StepContext stepContext : getAllStepContexts()) {
-      ((StepContext) stepContext).switchKey(newKey);
-    }
-    key = newKey;
-  }
-
-  @Override
-  public void noteOutput(WindowedValue<?> outputElem) {
-    output.add(ValueWithMetadata.of(outputElem).withKey(getKey()));
-  }
-
-  @Override
-  public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> outputElem) {
-    List<ValueWithMetadata<?>> output = sideOutputs.get(tag);
-    if (output == null) {
-      output = Lists.newArrayList();
-      sideOutputs.put(tag, output);
-    }
-    output.add(ValueWithMetadata.of(outputElem).withKey(getKey()));
-  }
-
-  public <T> List<ValueWithMetadata<T>> getOutput(@SuppressWarnings("unused") TupleTag<T> tag) {
-    @SuppressWarnings({"unchecked", "rawtypes"}) // Cast not expressible without rawtypes
-    List<ValueWithMetadata<T>> typedOutput = (List) output;
-    return typedOutput;
-  }
-
-  public <T> List<ValueWithMetadata<T>> getSideOutput(TupleTag<T> tag) {
-    if (sideOutputs.containsKey(tag)) {
-      @SuppressWarnings({"unchecked", "rawtypes"}) // Cast not expressible without rawtypes
-      List<ValueWithMetadata<T>> typedOutput = (List) sideOutputs.get(tag);
-      return typedOutput;
-    } else {
-      return Lists.newArrayList();
-    }
-  }
-
-  /**
-   * {@link ExecutionContext.StepContext} used in direct mode.
-   */
-  public static class StepContext extends BaseExecutionContext.StepContext {
-
-    /** A map from each key to the state associated with it. */
-    private final Map<Object, InMemoryStateInternals<Object>> stateInternals = Maps.newHashMap();
-    private InMemoryStateInternals<Object> currentStateInternals = null;
-
-    private StepContext(ExecutionContext executionContext, String stepName, String transformName) {
-      super(executionContext, stepName, transformName);
-      switchKey(null);
-    }
-
-    public void switchKey(Object newKey) {
-      currentStateInternals = stateInternals.get(newKey);
-      if (currentStateInternals == null) {
-        currentStateInternals = InMemoryStateInternals.forKey(newKey);
-        stateInternals.put(newKey, currentStateInternals);
-      }
-    }
-
-    @Override
-    public StateInternals<Object> stateInternals() {
-      return checkNotNull(currentStateInternals);
-    }
-
-    @Override
-    public TimerInternals timerInternals() {
-      throw new UnsupportedOperationException("Direct mode cannot return timerInternals");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java
deleted file mode 100644
index ee8c922..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DirectSideInputReader.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Iterables;
-
-/**
- * Basic side input reader wrapping a {@link PTuple} of side input iterables. Encapsulates
- * conversion according to the {@link PCollectionView} and projection to a particular
- * window.
- */
-public class DirectSideInputReader implements SideInputReader {
-
-  private PTuple sideInputValues;
-
-  private DirectSideInputReader(PTuple sideInputValues) {
-    this.sideInputValues = sideInputValues;
-  }
-
-  public static DirectSideInputReader of(PTuple sideInputValues) {
-    return new DirectSideInputReader(sideInputValues);
-  }
-
-  @Override
-  public <T> boolean contains(PCollectionView<T> view) {
-    return sideInputValues.has(view.getTagInternal());
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return sideInputValues.isEmpty();
-  }
-
-  @Override
-  public <T> T get(PCollectionView<T> view, final BoundedWindow window) {
-    final TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
-    if (!sideInputValues.has(tag)) {
-      throw new IllegalArgumentException("calling getSideInput() with unknown view");
-    }
-
-    if (view.getWindowingStrategyInternal().getWindowFn() instanceof GlobalWindows) {
-      return view.fromIterableInternal(sideInputValues.get(tag));
-    } else {
-      return view.fromIterableInternal(
-          Iterables.filter(sideInputValues.get(tag),
-              new Predicate<WindowedValue<?>>() {
-                  @Override
-                  public boolean apply(WindowedValue<?> element) {
-                    return element.getWindows().contains(window);
-                  }
-                }));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
deleted file mode 100644
index 15a3a47..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnInfo.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.io.Serializable;
-
-/**
- * Wrapper class holding the necessary information to serialize a DoFn.
- *
- * @param <InputT> the type of the (main) input elements of the DoFn
- * @param <OutputT> the type of the (main) output elements of the DoFn
- */
-public class DoFnInfo<InputT, OutputT> implements Serializable {
-  private final DoFn<InputT, OutputT> doFn;
-  private final WindowingStrategy<?, ?> windowingStrategy;
-  private final Iterable<PCollectionView<?>> sideInputViews;
-  private final Coder<InputT> inputCoder;
-
-  public DoFnInfo(DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy) {
-    this.doFn = doFn;
-    this.windowingStrategy = windowingStrategy;
-    this.sideInputViews = null;
-    this.inputCoder = null;
-  }
-
-  public DoFnInfo(DoFn<InputT, OutputT> doFn, WindowingStrategy<?, ?> windowingStrategy,
-                  Iterable<PCollectionView<?>> sideInputViews, Coder<InputT> inputCoder) {
-    this.doFn = doFn;
-    this.windowingStrategy = windowingStrategy;
-    this.sideInputViews = sideInputViews;
-    this.inputCoder = inputCoder;
-  }
-
-  public DoFn<InputT, OutputT> getDoFn() {
-    return doFn;
-  }
-
-  public WindowingStrategy<?, ?> getWindowingStrategy() {
-    return windowingStrategy;
-  }
-
-  public Iterable<PCollectionView<?>> getSideInputViews() {
-    return sideInputViews;
-  }
-
-  public Coder<InputT> getInputCoder() {
-    return inputCoder;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
deleted file mode 100644
index 51c3f39..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunner.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.ProcessContext;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-/**
- * An wrapper interface that represents the execution of a {@link DoFn}.
- */
-public interface DoFnRunner<InputT, OutputT> {
-  /**
-   * Prepares and calls {@link DoFn#startBundle}.
-   */
-  public void startBundle();
-
-  /**
-   * Calls {@link DoFn#processElement} with a {@link ProcessContext} containing the current element.
-   */
-  public void processElement(WindowedValue<InputT> elem);
-
-  /**
-   * Calls {@link DoFn#finishBundle} and performs additional tasks, such as
-   * flushing in-memory states.
-   */
-  public void finishBundle();
-
-  /**
-   * An internal interface for signaling that a {@link DoFn} requires late data dropping.
-   */
-  public interface ReduceFnExecutor<K, InputT, OutputT, W> {
-    /**
-     * Gets this object as a {@link DoFn}.
-     *
-     * Most implementors of this interface are expected to be {@link DoFn} instances, and will
-     * return themselves.
-     */
-    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn();
-
-    /**
-     * Returns an aggregator that tracks elements that are dropped due to being late.
-     */
-    Aggregator<Long, Long> getDroppedDueToLatenessAggregator();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
deleted file mode 100644
index 04ec59f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunnerBase.java
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-import org.joda.time.Instant;
-import org.joda.time.format.PeriodFormat;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * A base implementation of {@link DoFnRunner}.
- *
- * <p> Sub-classes should override {@link #invokeProcessElement}.
- */
-public abstract class DoFnRunnerBase<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
-
-  /** The DoFn being run. */
-  public final DoFn<InputT, OutputT> fn;
-
-  /** The context used for running the DoFn. */
-  public final DoFnContext<InputT, OutputT> context;
-
-  protected DoFnRunnerBase(
-      PipelineOptions options,
-      DoFn<InputT, OutputT> fn,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    this.fn = fn;
-    this.context = new DoFnContext<>(
-        options,
-        fn,
-        sideInputReader,
-        outputManager,
-        mainOutputTag,
-        sideOutputTags,
-        stepContext,
-        addCounterMutator,
-        windowingStrategy == null ? null : windowingStrategy.getWindowFn());
-  }
-
-  /**
-   * An implementation of {@code OutputManager} using simple lists, for testing and in-memory
-   * contexts such as the {@link DirectPipelineRunner}.
-   */
-  public static class ListOutputManager implements OutputManager {
-
-    private Map<TupleTag<?>, List<WindowedValue<?>>> outputLists = Maps.newHashMap();
-
-    @Override
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
-
-      if (outputList == null) {
-        outputList = Lists.newArrayList();
-        @SuppressWarnings({"rawtypes", "unchecked"})
-        List<WindowedValue<?>> untypedList = (List) outputList;
-        outputLists.put(tag, untypedList);
-      }
-
-      outputList.add(output);
-    }
-
-    public <T> List<WindowedValue<T>> getOutput(TupleTag<T> tag) {
-      // Safe cast by design, inexpressible in Java without rawtypes
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      List<WindowedValue<T>> outputList = (List) outputLists.get(tag);
-      return (outputList != null) ? outputList : Collections.<WindowedValue<T>>emptyList();
-    }
-  }
-
-  @Override
-  public void startBundle() {
-    // This can contain user code. Wrap it in case it throws an exception.
-    try {
-      fn.startBundle(context);
-    } catch (Throwable t) {
-      // Exception in user code.
-      throw wrapUserCodeException(t);
-    }
-  }
-
-  @Override
-  public void processElement(WindowedValue<InputT> elem) {
-    if (elem.getWindows().size() <= 1
-        || (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
-            && context.sideInputReader.isEmpty())) {
-      invokeProcessElement(elem);
-    } else {
-      // We could modify the windowed value (and the processContext) to
-      // avoid repeated allocations, but this is more straightforward.
-      for (BoundedWindow window : elem.getWindows()) {
-        invokeProcessElement(WindowedValue.of(
-            elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
-      }
-    }
-  }
-
-  /**
-   * Invokes {@link DoFn#processElement} after certain pre-processings has been done in
-   * {@link DoFnRunnerBase#processElement}.
-   */
-  protected abstract void invokeProcessElement(WindowedValue<InputT> elem);
-
-  @Override
-  public void finishBundle() {
-    // This can contain user code. Wrap it in case it throws an exception.
-    try {
-      fn.finishBundle(context);
-    } catch (Throwable t) {
-      // Exception in user code.
-      throw wrapUserCodeException(t);
-    }
-  }
-
-  /**
-   * A concrete implementation of {@code DoFn.Context} used for running a {@link DoFn}.
-   *
-   * @param <InputT> the type of the DoFn's (main) input elements
-   * @param <OutputT> the type of the DoFn's (main) output elements
-   */
-  private static class DoFnContext<InputT, OutputT>
-      extends DoFn<InputT, OutputT>.Context {
-    private static final int MAX_SIDE_OUTPUTS = 1000;
-
-    final PipelineOptions options;
-    final DoFn<InputT, OutputT> fn;
-    final SideInputReader sideInputReader;
-    final OutputManager outputManager;
-    final TupleTag<OutputT> mainOutputTag;
-    final StepContext stepContext;
-    final CounterSet.AddCounterMutator addCounterMutator;
-    final WindowFn<?, ?> windowFn;
-
-    /**
-     * The set of known output tags, some of which may be undeclared, so we can throw an
-     * exception when it exceeds {@link #MAX_SIDE_OUTPUTS}.
-     */
-    private Set<TupleTag<?>> outputTags;
-
-    public DoFnContext(PipelineOptions options,
-                       DoFn<InputT, OutputT> fn,
-                       SideInputReader sideInputReader,
-                       OutputManager outputManager,
-                       TupleTag<OutputT> mainOutputTag,
-                       List<TupleTag<?>> sideOutputTags,
-                       StepContext stepContext,
-                       CounterSet.AddCounterMutator addCounterMutator,
-                       WindowFn<?, ?> windowFn) {
-      fn.super();
-      this.options = options;
-      this.fn = fn;
-      this.sideInputReader = sideInputReader;
-      this.outputManager = outputManager;
-      this.mainOutputTag = mainOutputTag;
-      this.outputTags = Sets.newHashSet();
-
-      outputTags.add(mainOutputTag);
-      for (TupleTag<?> sideOutputTag : sideOutputTags) {
-        outputTags.add(sideOutputTag);
-      }
-
-      this.stepContext = stepContext;
-      this.addCounterMutator = addCounterMutator;
-      this.windowFn = windowFn;
-      super.setupDelegateAggregators();
-    }
-
-    //////////////////////////////////////////////////////////////////////////////
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return options;
-    }
-
-    <T, W extends BoundedWindow> WindowedValue<T> makeWindowedValue(
-        T output, Instant timestamp, Collection<W> windows, PaneInfo pane) {
-      final Instant inputTimestamp = timestamp;
-
-      if (timestamp == null) {
-        timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
-      }
-
-      if (windows == null) {
-        try {
-          // The windowFn can never succeed at accessing the element, so its type does not
-          // matter here
-          @SuppressWarnings("unchecked")
-          WindowFn<Object, W> objectWindowFn = (WindowFn<Object, W>) windowFn;
-          windows = objectWindowFn.assignWindows(objectWindowFn.new AssignContext() {
-            @Override
-            public Object element() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attempted to access input element when none was available");
-            }
-
-            @Override
-            public Instant timestamp() {
-              if (inputTimestamp == null) {
-                throw new UnsupportedOperationException(
-                    "WindowFn attempted to access input timestamp when none was available");
-              }
-              return inputTimestamp;
-            }
-
-            @Override
-            public Collection<? extends BoundedWindow> windows() {
-              throw new UnsupportedOperationException(
-                  "WindowFn attempted to access input windows when none were available");
-            }
-          });
-        } catch (Exception e) {
-          throw UserCodeException.wrap(e);
-        }
-      }
-
-      return WindowedValue.of(output, timestamp, windows, pane);
-    }
-
-    public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-      if (!sideInputReader.contains(view)) {
-        throw new IllegalArgumentException("calling sideInput() with unknown view");
-      }
-      BoundedWindow sideInputWindow =
-          view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
-      return sideInputReader.get(view, sideInputWindow);
-    }
-
-    void outputWindowedValue(
-        OutputT output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows,
-        PaneInfo pane) {
-      outputWindowedValue(makeWindowedValue(output, timestamp, windows, pane));
-    }
-
-    void outputWindowedValue(WindowedValue<OutputT> windowedElem) {
-      outputManager.output(mainOutputTag, windowedElem);
-      if (stepContext != null) {
-        stepContext.noteOutput(windowedElem);
-      }
-    }
-
-    protected <T> void sideOutputWindowedValue(TupleTag<T> tag,
-                                               T output,
-                                               Instant timestamp,
-                                               Collection<? extends BoundedWindow> windows,
-                                               PaneInfo pane) {
-      sideOutputWindowedValue(tag, makeWindowedValue(output, timestamp, windows, pane));
-    }
-
-    protected <T> void sideOutputWindowedValue(TupleTag<T> tag, WindowedValue<T> windowedElem) {
-      if (!outputTags.contains(tag)) {
-        // This tag wasn't declared nor was it seen before during this execution.
-        // Thus, this must be a new, undeclared and unconsumed output.
-        // To prevent likely user errors, enforce the limit on the number of side
-        // outputs.
-        if (outputTags.size() >= MAX_SIDE_OUTPUTS) {
-          throw new IllegalArgumentException(
-              "the number of side outputs has exceeded a limit of " + MAX_SIDE_OUTPUTS);
-        }
-        outputTags.add(tag);
-      }
-
-      outputManager.output(tag, windowedElem);
-      if (stepContext != null) {
-        stepContext.noteSideOutput(tag, windowedElem);
-      }
-    }
-
-    // Following implementations of output, outputWithTimestamp, and sideOutput
-    // are only accessible in DoFn.startBundle and DoFn.finishBundle, and will be shadowed by
-    // ProcessContext's versions in DoFn.processElement.
-    @Override
-    public void output(OutputT output) {
-      outputWindowedValue(output, null, null, PaneInfo.NO_FIRING);
-    }
-
-    @Override
-    public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      outputWindowedValue(output, timestamp, null, PaneInfo.NO_FIRING);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutput cannot be null");
-      sideOutputWindowedValue(tag, output, null, null, PaneInfo.NO_FIRING);
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      Preconditions.checkNotNull(tag, "TupleTag passed to sideOutputWithTimestamp cannot be null");
-      sideOutputWindowedValue(tag, output, timestamp, null, PaneInfo.NO_FIRING);
-    }
-
-    private String generateInternalAggregatorName(String userName) {
-      boolean system = fn.getClass().isAnnotationPresent(SystemDoFnInternal.class);
-      return (system ? "" : "user-") + stepContext.getStepName() + "-" + userName;
-    }
-
-    @Override
-    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(
-        String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
-      Preconditions.checkNotNull(combiner,
-          "Combiner passed to createAggregator cannot be null");
-      return new CounterAggregator<>(generateInternalAggregatorName(name),
-          combiner, addCounterMutator);
-    }
-  }
-
-  /**
-   * Returns a new {@code DoFn.ProcessContext} for the given element.
-   */
-  protected DoFn<InputT, OutputT>.ProcessContext createProcessContext(WindowedValue<InputT> elem) {
-    return new DoFnProcessContext<InputT, OutputT>(fn, context, elem);
-  }
-
-  protected RuntimeException wrapUserCodeException(Throwable t) {
-    throw UserCodeException.wrapIf(!isSystemDoFn(), t);
-  }
-
-  private boolean isSystemDoFn() {
-    return fn.getClass().isAnnotationPresent(SystemDoFnInternal.class);
-  }
-
-  /**
-   * A concrete implementation of {@code DoFn.ProcessContext} used for
-   * running a {@link DoFn} over a single element.
-   *
-   * @param <InputT> the type of the DoFn's (main) input elements
-   * @param <OutputT> the type of the DoFn's (main) output elements
-   */
-  static class DoFnProcessContext<InputT, OutputT>
-      extends DoFn<InputT, OutputT>.ProcessContext {
-
-
-    final DoFn<InputT, OutputT> fn;
-    final DoFnContext<InputT, OutputT> context;
-    final WindowedValue<InputT> windowedValue;
-
-    public DoFnProcessContext(DoFn<InputT, OutputT> fn,
-                              DoFnContext<InputT, OutputT> context,
-                              WindowedValue<InputT> windowedValue) {
-      fn.super();
-      this.fn = fn;
-      this.context = context;
-      this.windowedValue = windowedValue;
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return context.getPipelineOptions();
-    }
-
-    @Override
-    public InputT element() {
-      return windowedValue.getValue();
-    }
-
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
-      Iterator<? extends BoundedWindow> windowIter = windows().iterator();
-      BoundedWindow window;
-      if (!windowIter.hasNext()) {
-        if (context.windowFn instanceof GlobalWindows) {
-          // TODO: Remove this once GroupByKeyOnly no longer outputs elements
-          // without windows
-          window = GlobalWindow.INSTANCE;
-        } else {
-          throw new IllegalStateException(
-              "sideInput called when main input element is not in any windows");
-        }
-      } else {
-        window = windowIter.next();
-        if (windowIter.hasNext()) {
-          throw new IllegalStateException(
-              "sideInput called when main input element is in multiple windows");
-        }
-      }
-      return context.sideInput(view, window);
-    }
-
-    @Override
-    public BoundedWindow window() {
-      if (!(fn instanceof RequiresWindowAccess)) {
-        throw new UnsupportedOperationException(
-            "window() is only available in the context of a DoFn marked as RequiresWindow.");
-      }
-      return Iterables.getOnlyElement(windows());
-    }
-
-    @Override
-    public PaneInfo pane() {
-      return windowedValue.getPane();
-    }
-
-    @Override
-    public void output(OutputT output) {
-      context.outputWindowedValue(windowedValue.withValue(output));
-    }
-
-    @Override
-    public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      checkTimestamp(timestamp);
-      context.outputWindowedValue(output, timestamp,
-          windowedValue.getWindows(), windowedValue.getPane());
-    }
-
-    void outputWindowedValue(
-        OutputT output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows,
-        PaneInfo pane) {
-      context.outputWindowedValue(output, timestamp, windows, pane);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      Preconditions.checkNotNull(tag, "Tag passed to sideOutput cannot be null");
-      context.sideOutputWindowedValue(tag, windowedValue.withValue(output));
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      Preconditions.checkNotNull(tag, "Tag passed to sideOutputWithTimestamp cannot be null");
-      checkTimestamp(timestamp);
-      context.sideOutputWindowedValue(
-          tag, output, timestamp, windowedValue.getWindows(), windowedValue.getPane());
-    }
-
-    @Override
-    public Instant timestamp() {
-      return windowedValue.getTimestamp();
-    }
-
-    public Collection<? extends BoundedWindow> windows() {
-      return windowedValue.getWindows();
-    }
-
-    private void checkTimestamp(Instant timestamp) {
-      if (timestamp.isBefore(windowedValue.getTimestamp().minus(fn.getAllowedTimestampSkew()))) {
-        throw new IllegalArgumentException(String.format(
-            "Cannot output with timestamp %s. Output timestamps must be no earlier than the "
-            + "timestamp of the current input (%s) minus the allowed skew (%s). See the "
-            + "DoFn#getAllowedTimestampSkew() Javadoc for details on changing the allowed skew.",
-            timestamp, windowedValue.getTimestamp(),
-            PeriodFormat.getDefault().print(fn.getAllowedTimestampSkew().toPeriod())));
-      }
-    }
-
-    @Override
-    public WindowingInternals<InputT, OutputT> windowingInternals() {
-      return new WindowingInternals<InputT, OutputT>() {
-        @Override
-        public void outputWindowedValue(OutputT output, Instant timestamp,
-            Collection<? extends BoundedWindow> windows, PaneInfo pane) {
-          context.outputWindowedValue(output, timestamp, windows, pane);
-        }
-
-        @Override
-        public Collection<? extends BoundedWindow> windows() {
-          return windowedValue.getWindows();
-        }
-
-        @Override
-        public PaneInfo pane() {
-          return windowedValue.getPane();
-        }
-
-        @Override
-        public TimerInternals timerInternals() {
-          return context.stepContext.timerInternals();
-        }
-
-        @Override
-        public <T> void writePCollectionViewData(
-            TupleTag<?> tag,
-            Iterable<WindowedValue<T>> data,
-            Coder<T> elemCoder) throws IOException {
-          @SuppressWarnings("unchecked")
-          Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) context.windowFn.windowCoder();
-
-          context.stepContext.writePCollectionViewData(
-              tag, data, IterableCoder.of(WindowedValue.getFullCoder(elemCoder, windowCoder)),
-              window(), windowCoder);
-        }
-
-        @Override
-        public StateInternals<?> stateInternals() {
-          return context.stepContext.stateInternals();
-        }
-
-        @Override
-        public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-          return context.sideInput(view, mainInputWindow);
-        }
-      };
-    }
-
-    @Override
-    protected <AggregatorInputT, AggregatorOutputT> Aggregator<AggregatorInputT, AggregatorOutputT>
-        createAggregatorInternal(
-            String name, CombineFn<AggregatorInputT, ?, AggregatorOutputT> combiner) {
-      return context.createAggregatorInternal(name, combiner);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
deleted file mode 100644
index d56b36e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/DoFnRunners.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.ReduceFnExecutor;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.List;
-
-/**
- * Static utility methods that provide {@link DoFnRunner} implementations.
- */
-public class DoFnRunners {
-  /**
-   * Information about how to create output receivers and output to them.
-   */
-  public interface OutputManager {
-    /**
-     * Outputs a single element to the receiver indicated by the given {@link TupleTag}.
-     */
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output);
-  }
-
-  /**
-   * Returns a basic implementation of {@link DoFnRunner} that works for most {@link DoFn DoFns}.
-   *
-   * <p>It invokes {@link DoFn#processElement} for each input.
-   */
-  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> simpleRunner(
-      PipelineOptions options,
-      DoFn<InputT, OutputT> fn,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      CounterSet.AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    return new SimpleDoFnRunner<>(
-        options,
-        fn,
-        sideInputReader,
-        outputManager,
-        mainOutputTag,
-        sideOutputTags,
-        stepContext,
-        addCounterMutator,
-        windowingStrategy);
-  }
-
-  /**
-   * Returns an implementation of {@link DoFnRunner} that handles late data dropping.
-   *
-   * <p>It drops elements from expired windows before they reach the underlying {@link DoFn}.
-   */
-  public static <K, InputT, OutputT, W extends BoundedWindow>
-      DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> lateDataDroppingRunner(
-          PipelineOptions options,
-          ReduceFnExecutor<K, InputT, OutputT, W> reduceFnExecutor,
-          SideInputReader sideInputReader,
-          OutputManager outputManager,
-          TupleTag<KV<K, OutputT>> mainOutputTag,
-          List<TupleTag<?>> sideOutputTags,
-          StepContext stepContext,
-          CounterSet.AddCounterMutator addCounterMutator,
-          WindowingStrategy<?, W> windowingStrategy) {
-    DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> simpleDoFnRunner =
-        simpleRunner(
-            options,
-            reduceFnExecutor.asDoFn(),
-            sideInputReader,
-            outputManager,
-            mainOutputTag,
-            sideOutputTags,
-            stepContext,
-            addCounterMutator,
-            windowingStrategy);
-    return new LateDataDroppingDoFnRunner<>(
-        simpleDoFnRunner,
-        windowingStrategy,
-        stepContext.timerInternals(),
-        reduceFnExecutor.getDroppedDueToLatenessAggregator());
-  }
-
-  public static <InputT, OutputT> DoFnRunner<InputT, OutputT> createDefault(
-      PipelineOptions options,
-      DoFn<InputT, OutputT> doFn,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      StepContext stepContext,
-      AddCounterMutator addCounterMutator,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    if (doFn instanceof ReduceFnExecutor) {
-      @SuppressWarnings("rawtypes")
-      ReduceFnExecutor fn = (ReduceFnExecutor) doFn;
-      @SuppressWarnings({"unchecked", "cast", "rawtypes"})
-      DoFnRunner<InputT, OutputT> runner = (DoFnRunner<InputT, OutputT>) lateDataDroppingRunner(
-          options,
-          fn,
-          sideInputReader,
-          outputManager,
-          (TupleTag) mainOutputTag,
-          sideOutputTags,
-          stepContext,
-          addCounterMutator,
-          (WindowingStrategy) windowingStrategy);
-      return runner;
-    }
-    return simpleRunner(
-        options,
-        doFn,
-        sideInputReader,
-        outputManager,
-        mainOutputTag,
-        sideOutputTags,
-        stepContext,
-        addCounterMutator,
-        windowingStrategy);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
deleted file mode 100644
index 22a3762..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutableTrigger.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.common.base.Preconditions;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * A wrapper around a trigger used during execution. While an actual trigger may appear multiple
- * times (both in the same trigger expression and in other trigger expressions), the
- * {@code ExecutableTrigger} wrapped around them forms a tree (only one occurrence).
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
- */
-public class ExecutableTrigger<W extends BoundedWindow> implements Serializable {
-
-  /** Store the index assigned to this trigger. */
-  private final int triggerIndex;
-  private final int firstIndexAfterSubtree;
-  private final List<ExecutableTrigger<W>> subTriggers = new ArrayList<>();
-  private final Trigger<W> trigger;
-
-  public static <W extends BoundedWindow> ExecutableTrigger<W> create(Trigger<W> trigger) {
-    return create(trigger, 0);
-  }
-
-  private static <W extends BoundedWindow> ExecutableTrigger<W> create(
-      Trigger<W> trigger, int nextUnusedIndex) {
-    if (trigger instanceof OnceTrigger) {
-      return new ExecutableOnceTrigger<W>((OnceTrigger<W>) trigger, nextUnusedIndex);
-    } else {
-      return new ExecutableTrigger<W>(trigger, nextUnusedIndex);
-    }
-  }
-
-  public static <W extends BoundedWindow> ExecutableTrigger<W> createForOnceTrigger(
-      OnceTrigger<W> trigger, int nextUnusedIndex) {
-    return new ExecutableOnceTrigger<W>(trigger, nextUnusedIndex);
-  }
-
-  private ExecutableTrigger(Trigger<W> trigger, int nextUnusedIndex) {
-    this.trigger = Preconditions.checkNotNull(trigger, "trigger must not be null");
-    this.triggerIndex = nextUnusedIndex++;
-
-    if (trigger.subTriggers() != null) {
-      for (Trigger<W> subTrigger : trigger.subTriggers()) {
-        ExecutableTrigger<W> subExecutable = create(subTrigger, nextUnusedIndex);
-        subTriggers.add(subExecutable);
-        nextUnusedIndex = subExecutable.firstIndexAfterSubtree;
-      }
-    }
-    firstIndexAfterSubtree = nextUnusedIndex;
-  }
-
-  public List<ExecutableTrigger<W>> subTriggers() {
-    return subTriggers;
-  }
-
-  @Override
-  public String toString() {
-    return trigger.toString();
-  }
-
-  /**
-   * Return the underlying trigger specification corresponding to this {@code ExecutableTrigger}.
-   */
-  public Trigger<W> getSpec() {
-    return trigger;
-  }
-
-  public int getTriggerIndex() {
-    return triggerIndex;
-  }
-
-  public final int getFirstIndexAfterSubtree() {
-    return firstIndexAfterSubtree;
-  }
-
-  public boolean isCompatible(ExecutableTrigger<W> other) {
-    return trigger.isCompatible(other.trigger);
-  }
-
-  public ExecutableTrigger<W> getSubTriggerContaining(int index) {
-    Preconditions.checkNotNull(subTriggers);
-    Preconditions.checkState(index > triggerIndex && index < firstIndexAfterSubtree,
-        "Cannot find sub-trigger containing index not in this tree.");
-    ExecutableTrigger<W> previous = null;
-    for (ExecutableTrigger<W> subTrigger : subTriggers) {
-      if (index < subTrigger.triggerIndex) {
-        return previous;
-      }
-      previous = subTrigger;
-    }
-    return previous;
-  }
-
-  /**
-   * Invoke the {@link Trigger#onElement} method for this trigger, ensuring that the bits are
-   * properly updated if the trigger finishes.
-   */
-  public void invokeOnElement(Trigger<W>.OnElementContext c) throws Exception {
-    trigger.onElement(c.forTrigger(this));
-  }
-
-  /**
-   * Invoke the {@link Trigger#onMerge} method for this trigger, ensuring that the bits are properly
-   * updated.
-   */
-  public void invokeOnMerge(Trigger<W>.OnMergeContext c) throws Exception {
-    Trigger<W>.OnMergeContext subContext = c.forTrigger(this);
-    trigger.onMerge(subContext);
-  }
-
-  public boolean invokeShouldFire(Trigger<W>.TriggerContext c) throws Exception {
-    return trigger.shouldFire(c.forTrigger(this));
-  }
-
-  public void invokeOnFire(Trigger<W>.TriggerContext c) throws Exception {
-    trigger.onFire(c.forTrigger(this));
-  }
-
-  /**
-   * Invoke clear for the current this trigger.
-   */
-  public void invokeClear(Trigger<W>.TriggerContext c) throws Exception {
-    trigger.clear(c.forTrigger(this));
-  }
-
-  /**
-   * {@link ExecutableTrigger} that enforces the fact that the trigger should always FIRE_AND_FINISH
-   * and never just FIRE.
-   */
-  private static class ExecutableOnceTrigger<W extends BoundedWindow> extends ExecutableTrigger<W> {
-
-    public ExecutableOnceTrigger(OnceTrigger<W> trigger, int nextUnusedIndex) {
-      super(trigger, nextUnusedIndex);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
deleted file mode 100644
index cff5b95..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExecutionContext.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.io.IOException;
-import java.util.Collection;
-
-/**
- * Context for the current execution. This is guaranteed to exist during processing,
- * but does not necessarily persist between different batches of work.
- */
-public interface ExecutionContext {
-  /**
-   * Returns the {@link StepContext} associated with the given step.
-   */
-  StepContext getOrCreateStepContext(
-      String stepName, String transformName, StateSampler stateSampler);
-
-  /**
-   * Returns a collection view of all of the {@link StepContext}s.
-   */
-  Collection<? extends StepContext> getAllStepContexts();
-
-  /**
-   * Hook for subclasses to implement that will be called whenever
-   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
-   * is called.
-   */
-  void noteOutput(WindowedValue<?> output);
-
-  /**
-   * Hook for subclasses to implement that will be called whenever
-   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#sideOutput}
-   * is called.
-   */
-  void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output);
-
-  /**
-   * Per-step, per-key context used for retrieving state.
-   */
-  public interface StepContext {
-
-    /**
-     * The name of the step.
-     */
-    String getStepName();
-
-    /**
-     * The name of the transform for the step.
-     */
-    String getTransformName();
-
-    /**
-     * Hook for subclasses to implement that will be called whenever
-     * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
-     * is called.
-     */
-    void noteOutput(WindowedValue<?> output);
-
-    /**
-     * Hook for subclasses to implement that will be called whenever
-     * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#sideOutput}
-     * is called.
-     */
-    void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output);
-
-    /**
-     * Writes the given {@code PCollectionView} data to a globally accessible location.
-     */
-    <T, W extends BoundedWindow> void writePCollectionViewData(
-        TupleTag<?> tag,
-        Iterable<WindowedValue<T>> data,
-        Coder<Iterable<WindowedValue<T>>> dataCoder,
-        W window,
-        Coder<W> windowCoder)
-            throws IOException;
-
-    StateInternals<?> stateInternals();
-
-    TimerInternals timerInternals();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
deleted file mode 100644
index dff5fd1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayInputStream.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-
-/**
- * {@link ByteArrayInputStream} that allows accessing the entire internal buffer without copying.
- */
-public class ExposedByteArrayInputStream extends ByteArrayInputStream{
-
-  public ExposedByteArrayInputStream(byte[] buf) {
-    super(buf);
-  }
-
-  /** Read all remaining bytes.
-   * @throws IOException */
-  public byte[] readAll() throws IOException {
-    if (pos == 0 && count == buf.length) {
-      pos = count;
-      return buf;
-    }
-    byte[] ret = new byte[count - pos];
-    super.read(ret);
-    return ret;
-  }
-
-  @Override
-  public void close() {
-    try {
-      super.close();
-    } catch (IOException exn) {
-      throw new RuntimeException("Unexpected IOException closing ByteArrayInputStream", exn);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java
deleted file mode 100644
index d8e4d50..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ExposedByteArrayOutputStream.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-/**
- * {@link ByteArrayOutputStream} special cased to treat writes of a single byte-array specially.
- * When calling {@link #toByteArray()} after writing only one {@code byte[]} using
- * {@link #writeAndOwn(byte[])}, it will return that array directly.
- */
-public class ExposedByteArrayOutputStream extends ByteArrayOutputStream {
-
-  private byte[] swappedBuffer;
-
-  /**
-   * If true, this stream doesn't allow direct access to the passed in byte-array. It behaves just
-   * like a normal {@link ByteArrayOutputStream}.
-   *
-   * <p>It is set to true after any write operations other than the first call to
-   * {@link #writeAndOwn(byte[])}.
-   */
-  private boolean isFallback = false;
-
-  /**
-   * Fall back to the behavior of a normal {@link ByteArrayOutputStream}.
-   */
-  private void fallback() {
-    isFallback = true;
-    if (swappedBuffer != null) {
-      // swappedBuffer != null means buf is actually provided by the caller of writeAndOwn(),
-      // while swappedBuffer is the original buffer.
-      // Recover the buffer and copy the bytes from buf.
-      byte[] tempBuffer = buf;
-      count = 0;
-      buf = swappedBuffer;
-      super.write(tempBuffer, 0, tempBuffer.length);
-      swappedBuffer = null;
-    }
-  }
-
-  /**
-   * Write {@code b} to the stream and take the ownership of {@code b}.
-   * If the stream is empty, {@code b} itself will be used as the content of the stream and
-   * no content copy will be involved.
-   * <p><i>Note: After passing any byte array to this method, it must not be modified again.</i>
-   *
-   * @throws IOException
-   */
-  public void writeAndOwn(byte[] b) throws IOException {
-    if (b.length == 0) {
-      return;
-    }
-    if (count == 0) {
-      // Optimized first-time whole write.
-      // The original buffer will be swapped to swappedBuffer, while the input b is used as buf.
-      swappedBuffer = buf;
-      buf = b;
-      count = b.length;
-    } else {
-      fallback();
-      super.write(b);
-    }
-  }
-
-  @Override
-  public void write(byte[] b, int off, int len) {
-    fallback();
-    super.write(b, off, len);
-  }
-
-  @Override
-  public void write(int b) {
-    fallback();
-    super.write(b);
-  }
-
-  @Override
-  public byte[] toByteArray() {
-    // Note: count == buf.length is not a correct criteria to "return buf;", because the internal
-    // buf may be reused after reset().
-    if (!isFallback && count > 0) {
-      return buf;
-    } else {
-      return super.toByteArray();
-    }
-  }
-
-  @Override
-  public void reset() {
-    if (count == 0) {
-      return;
-    }
-    count = 0;
-    if (isFallback) {
-      isFallback = false;
-    } else {
-      buf = swappedBuffer;
-      swappedBuffer = null;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
deleted file mode 100644
index 77d0b83..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FileIOChannelFactory.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.common.base.Predicate;
-import com.google.common.base.Predicates;
-import com.google.common.collect.Iterables;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.nio.file.FileSystems;
-import java.nio.file.Files;
-import java.nio.file.NoSuchFileException;
-import java.nio.file.PathMatcher;
-import java.nio.file.Paths;
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.regex.Matcher;
-
-/**
- * Implements IOChannelFactory for local files.
- */
-public class FileIOChannelFactory implements IOChannelFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(FileIOChannelFactory.class);
-
-  // This implementation only allows for wildcards in the file name.
-  // The directory portion must exist as-is.
-  @Override
-  public Collection<String> match(String spec) throws IOException {
-    File file = new File(spec);
-
-    File parent = file.getAbsoluteFile().getParentFile();
-    if (!parent.exists()) {
-      throw new IOException("Unable to find parent directory of " + spec);
-    }
-
-    // Method getAbsolutePath() on Windows platform may return something like
-    // "c:\temp\file.txt". FileSystem.getPathMatcher() call below will treat
-    // '\' (backslash) as an escape character, instead of a directory
-    // separator. Replacing backslash with double-backslash solves the problem.
-    // We perform the replacement on all platforms, even those that allow
-    // backslash as a part of the filename, because Globs.toRegexPattern will
-    // eat one backslash.
-    String pathToMatch = file.getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"),
-                                                           Matcher.quoteReplacement("\\\\"));
-
-    final PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + pathToMatch);
-
-    Iterable<File> files = com.google.common.io.Files.fileTreeTraverser().preOrderTraversal(parent);
-    Iterable<File> matchedFiles = Iterables.filter(files,
-        Predicates.and(
-            com.google.common.io.Files.isFile(),
-            new Predicate<File>() {
-              @Override
-              public boolean apply(File input) {
-                return matcher.matches(input.toPath());
-              }
-        }));
-
-    List<String> result = new LinkedList<>();
-    for (File match : matchedFiles) {
-      result.add(match.getPath());
-    }
-
-    return result;
-  }
-
-  @Override
-  public ReadableByteChannel open(String spec) throws IOException {
-    LOG.debug("opening file {}", spec);
-    @SuppressWarnings("resource") // The caller is responsible for closing the channel.
-    FileInputStream inputStream = new FileInputStream(spec);
-    // Use this method for creating the channel (rather than new FileChannel) so that we get
-    // regular FileNotFoundException. Closing the underyling channel will close the inputStream.
-    return inputStream.getChannel();
-  }
-
-  @Override
-  public WritableByteChannel create(String spec, String mimeType)
-      throws IOException {
-    LOG.debug("creating file {}", spec);
-    File file = new File(spec);
-    if (file.getAbsoluteFile().getParentFile() != null
-        && !file.getAbsoluteFile().getParentFile().exists()
-        && !file.getAbsoluteFile().getParentFile().mkdirs()) {
-      throw new IOException("Unable to create parent directories for '" + spec + "'");
-    }
-    return Channels.newChannel(
-        new BufferedOutputStream(new FileOutputStream(file)));
-  }
-
-  @Override
-  public long getSizeBytes(String spec) throws IOException {
-    try {
-      return Files.size(FileSystems.getDefault().getPath(spec));
-    } catch (NoSuchFileException e) {
-      throw new FileNotFoundException(e.getReason());
-    }
-  }
-
-  @Override
-  public boolean isReadSeekEfficient(String spec) throws IOException {
-    return true;
-  }
-
-  @Override
-  public String resolve(String path, String other) throws IOException {
-    return Paths.get(path).resolve(other).toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java
deleted file mode 100644
index e75be23..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggers.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * A mutable set which tracks whether any particular {@link ExecutableTrigger} is
- * finished.
- */
-public interface FinishedTriggers {
-  /**
-   * Returns {@code true} if the trigger is finished.
-   */
-  public boolean isFinished(ExecutableTrigger<?> trigger);
-
-  /**
-   * Sets the fact that the trigger is finished.
-   */
-  public void setFinished(ExecutableTrigger<?> trigger, boolean value);
-
-  /**
-   * Sets the trigger and all of its subtriggers to unfinished.
-   */
-  public void clearRecursively(ExecutableTrigger<?> trigger);
-
-  /**
-   * Create an independent copy of this mutable {@link FinishedTriggers}.
-   */
-  public FinishedTriggers copy();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java
deleted file mode 100644
index 09f7af7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersBitSet.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.util.BitSet;
-
-/**
- * A {@link FinishedTriggers} implementation based on an underlying {@link BitSet}.
- */
-public class FinishedTriggersBitSet implements FinishedTriggers {
-
-  private final BitSet bitSet;
-
-  private FinishedTriggersBitSet(BitSet bitSet) {
-    this.bitSet = bitSet;
-  }
-
-  public static FinishedTriggersBitSet emptyWithCapacity(int capacity) {
-    return new FinishedTriggersBitSet(new BitSet(capacity));
-  }
-
-  public static FinishedTriggersBitSet fromBitSet(BitSet bitSet) {
-    return new FinishedTriggersBitSet(bitSet);
-  }
-
-  /**
-   * Returns the underlying {@link BitSet} for this {@link FinishedTriggersBitSet}.
-   */
-  public BitSet getBitSet() {
-    return bitSet;
-  }
-
-  @Override
-  public boolean isFinished(ExecutableTrigger<?> trigger) {
-    return bitSet.get(trigger.getTriggerIndex());
-  }
-
-  @Override
-  public void setFinished(ExecutableTrigger<?> trigger, boolean value) {
-    bitSet.set(trigger.getTriggerIndex(), value);
-  }
-
-  @Override
-  public void clearRecursively(ExecutableTrigger<?> trigger) {
-    bitSet.clear(trigger.getTriggerIndex(), trigger.getFirstIndexAfterSubtree());
-  }
-
-  @Override
-  public FinishedTriggersBitSet copy() {
-    return new FinishedTriggersBitSet((BitSet) bitSet.clone());
-  }
-}
-
-

[04/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java
deleted file mode 100644
index e301d43..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContexts.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import javax.annotation.Nullable;
-
-/**
- * Factory that produces {@link StateContext} based on different inputs.
- */
-public class StateContexts {
-  private static final StateContext<BoundedWindow> NULL_CONTEXT =
-      new StateContext<BoundedWindow>() {
-        @Override
-        public PipelineOptions getPipelineOptions() {
-          throw new IllegalArgumentException("cannot call getPipelineOptions() in a null context");
-        }
-
-        @Override
-        public <T> T sideInput(PCollectionView<T> view) {
-          throw new IllegalArgumentException("cannot call sideInput() in a null context");
-        }
-
-        @Override
-        public BoundedWindow window() {
-          throw new IllegalArgumentException("cannot call window() in a null context");
-        }};
-
-  /**
-   * Returns a fake {@link StateContext}.
-   */
-  @SuppressWarnings("unchecked")
-  public static <W extends BoundedWindow> StateContext<W> nullContext() {
-    return (StateContext<W>) NULL_CONTEXT;
-  }
-
-  /**
-   * Returns a {@link StateContext} that only contains the state window.
-   */
-  public static <W extends BoundedWindow> StateContext<W> windowOnly(final W window) {
-    return new StateContext<W>() {
-      @Override
-      public PipelineOptions getPipelineOptions() {
-        throw new IllegalArgumentException(
-            "cannot call getPipelineOptions() in a window only context");
-      }
-      @Override
-      public <T> T sideInput(PCollectionView<T> view) {
-        throw new IllegalArgumentException("cannot call sideInput() in a window only context");
-      }
-      @Override
-      public W window() {
-        return window;
-      }
-    };
-  }
-
-  /**
-   * Returns a {@link StateContext} from {@code PipelineOptions}, {@link WindowingInternals},
-   * and the state window.
-   */
-  public static <W extends BoundedWindow> StateContext<W> createFromComponents(
-      @Nullable final PipelineOptions options,
-      final WindowingInternals<?, ?> windowingInternals,
-      final W window) {
-    @SuppressWarnings("unchecked")
-    StateContext<W> typedNullContext = (StateContext<W>) NULL_CONTEXT;
-    if (options == null) {
-      return typedNullContext;
-    } else {
-      return new StateContext<W>() {
-
-        @Override
-        public PipelineOptions getPipelineOptions() {
-          return options;
-        }
-
-        @Override
-        public <T> T sideInput(PCollectionView<T> view) {
-          return windowingInternals.sideInput(view, window);
-        }
-
-        @Override
-        public W window() {
-          return window;
-        }
-      };
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
deleted file mode 100644
index b31afb4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateInternals.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-
-/**
- * {@code StateInternals} describes the functionality a runner needs to provide for the
- * State API to be supported.
- *
- * <p>The SDK will only use this after elements have been partitioned by key. For instance, after a
- * {@link GroupByKey} operation. The runner implementation must ensure that any writes using
- * {@link StateInternals} are implicitly scoped to the key being processed and the specific step
- * accessing state.
- *
- * <p>The runner implementation must also ensure that any writes to the associated state objects
- * are persisted together with the completion status of the processing that produced these
- * writes.
- *
- * <p>This is a low-level API intended for use by the Dataflow SDK. It should not be
- * used directly, and is highly likely to change.
- */
-@Experimental(Kind.STATE)
-public interface StateInternals<K> {
-
-  /** The key for this {@link StateInternals}. */
-  K getKey();
-
-  /**
-   * Return the state associated with {@code address} in the specified {@code namespace}.
-   */
-  <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address);
-
-  /**
-   * Return the state associated with {@code address} in the specified {@code namespace}
-   * with the {@link StateContext}.
-   */
-  <T extends State> T state(
-      StateNamespace namespace, StateTag<? super K, T> address, StateContext<?> c);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
deleted file mode 100644
index 0b33ea9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateMerging.java
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Helpers for merging state.
- */
-public class StateMerging {
-  /**
-   * Clear all state in {@code address} in all windows under merge (even result windows)
-   * in {@code context}.
-   */
-  public static <K, StateT extends State, W extends BoundedWindow> void clear(
-      MergingStateAccessor<K, W> context, StateTag<? super K, StateT> address) {
-    for (StateT state : context.accessInEachMergingWindow(address).values()) {
-      state.clear();
-    }
-  }
-
-  /**
-   * Prefetch all bag state in {@code address} across all windows under merge in
-   * {@code context}, except for the bag state in the final state address window which we can
-   * blindly append to.
-   */
-  public static <K, T, W extends BoundedWindow> void prefetchBags(
-      MergingStateAccessor<K, W> context, StateTag<? super K, BagState<T>> address) {
-    Map<W, BagState<T>> map = context.accessInEachMergingWindow(address);
-    if (map.isEmpty()) {
-      // Nothing to prefetch.
-      return;
-    }
-    BagState<T> result = context.access(address);
-    // Prefetch everything except what's already in result.
-    for (BagState<T> source : map.values()) {
-      if (!source.equals(result)) {
-        source.readLater();
-      }
-    }
-  }
-
-  /**
-   * Merge all bag state in {@code address} across all windows under merge.
-   */
-  public static <K, T, W extends BoundedWindow> void mergeBags(
-      MergingStateAccessor<K, W> context, StateTag<? super K, BagState<T>> address) {
-    mergeBags(context.accessInEachMergingWindow(address).values(), context.access(address));
-  }
-
-  /**
-   * Merge all bag state in {@code sources} (which may include {@code result}) into {@code result}.
-   */
-  public static <T, W extends BoundedWindow> void mergeBags(
-      Collection<BagState<T>> sources, BagState<T> result) {
-    if (sources.isEmpty()) {
-      // Nothing to merge.
-      return;
-    }
-    // Prefetch everything except what's already in result.
-    List<ReadableState<Iterable<T>>> futures = new ArrayList<>(sources.size());
-    for (BagState<T> source : sources) {
-      if (!source.equals(result)) {
-        source.readLater();
-        futures.add(source);
-      }
-    }
-    if (futures.isEmpty()) {
-      // Result already holds all the values.
-      return;
-    }
-    // Transfer from sources to result.
-    for (ReadableState<Iterable<T>> future : futures) {
-      for (T element : future.read()) {
-        result.add(element);
-      }
-    }
-    // Clear sources except for result.
-    for (BagState<T> source : sources) {
-      if (!source.equals(result)) {
-        source.clear();
-      }
-    }
-  }
-
-  /**
-   * Prefetch all combining value state for {@code address} across all merging windows in {@code
-   * context}.
-   */
-  public static <K, StateT extends CombiningState<?, ?>, W extends BoundedWindow> void
-      prefetchCombiningValues(MergingStateAccessor<K, W> context,
-          StateTag<? super K, StateT> address) {
-    for (StateT state : context.accessInEachMergingWindow(address).values()) {
-      state.readLater();
-    }
-  }
-
-  /**
-   * Merge all value state in {@code address} across all merging windows in {@code context}.
-   */
-  public static <K, InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
-      MergingStateAccessor<K, W> context,
-      StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address) {
-    mergeCombiningValues(
-        context.accessInEachMergingWindow(address).values(), context.access(address));
-  }
-
-  /**
-   * Merge all value state from {@code sources} (which may include {@code result}) into
-   * {@code result}.
-   */
-  public static <InputT, AccumT, OutputT, W extends BoundedWindow> void mergeCombiningValues(
-      Collection<AccumulatorCombiningState<InputT, AccumT, OutputT>> sources,
-      AccumulatorCombiningState<InputT, AccumT, OutputT> result) {
-    if (sources.isEmpty()) {
-      // Nothing to merge.
-      return;
-    }
-    if (sources.size() == 1 && sources.contains(result)) {
-      // Result already holds combined value.
-      return;
-    }
-    // Prefetch.
-    List<ReadableState<AccumT>> futures = new ArrayList<>(sources.size());
-    for (AccumulatorCombiningState<InputT, AccumT, OutputT> source : sources) {
-      source.readLater();
-    }
-    // Read.
-    List<AccumT> accumulators = new ArrayList<>(futures.size());
-    for (AccumulatorCombiningState<InputT, AccumT, OutputT> source : sources) {
-      accumulators.add(source.getAccum());
-    }
-    // Merge (possibly update and return one of the existing accumulators).
-    AccumT merged = result.mergeAccumulators(accumulators);
-    // Clear sources.
-    for (AccumulatorCombiningState<InputT, AccumT, OutputT> source : sources) {
-      source.clear();
-    }
-    // Update result.
-    result.addAccum(merged);
-  }
-
-  /**
-   * Prefetch all watermark state for {@code address} across all merging windows in
-   * {@code context}.
-   */
-  public static <K, W extends BoundedWindow> void prefetchWatermarks(
-      MergingStateAccessor<K, W> context,
-      StateTag<? super K, WatermarkHoldState<W>> address) {
-    Map<W, WatermarkHoldState<W>> map = context.accessInEachMergingWindow(address);
-    WatermarkHoldState<W> result = context.access(address);
-    if (map.isEmpty()) {
-      // Nothing to prefetch.
-      return;
-    }
-    if (map.size() == 1 && map.values().contains(result)
-        && result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
-      // Nothing to change.
-      return;
-    }
-    if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
-      // No need to read existing holds.
-      return;
-    }
-    // Prefetch.
-    for (WatermarkHoldState<W> source : map.values()) {
-      source.readLater();
-    }
-  }
-
-  /**
-   * Merge all watermark state in {@code address} across all merging windows in {@code context},
-   * where the final merge result window is {@code mergeResult}.
-   */
-  public static <K, W extends BoundedWindow> void mergeWatermarks(
-      MergingStateAccessor<K, W> context,
-      StateTag<? super K, WatermarkHoldState<W>> address,
-      W mergeResult) {
-    mergeWatermarks(
-        context.accessInEachMergingWindow(address).values(), context.access(address), mergeResult);
-  }
-
-  /**
-   * Merge all watermark state in {@code sources} (which must include {@code result} if non-empty)
-   * into {@code result}, where the final merge result window is {@code mergeResult}.
-   */
-  public static <W extends BoundedWindow> void mergeWatermarks(
-      Collection<WatermarkHoldState<W>> sources, WatermarkHoldState<W> result,
-      W resultWindow) {
-    if (sources.isEmpty()) {
-      // Nothing to merge.
-      return;
-    }
-    if (sources.size() == 1 && sources.contains(result)
-        && result.getOutputTimeFn().dependsOnlyOnEarliestInputTimestamp()) {
-      // Nothing to merge.
-      return;
-    }
-    if (result.getOutputTimeFn().dependsOnlyOnWindow()) {
-      // Clear sources.
-      for (WatermarkHoldState<W> source : sources) {
-        source.clear();
-      }
-      // Update directly from window-derived hold.
-      Instant hold = result.getOutputTimeFn().assignOutputTime(
-          BoundedWindow.TIMESTAMP_MIN_VALUE, resultWindow);
-      Preconditions.checkState(hold.isAfter(BoundedWindow.TIMESTAMP_MIN_VALUE));
-      result.add(hold);
-    } else {
-      // Prefetch.
-      List<ReadableState<Instant>> futures = new ArrayList<>(sources.size());
-      for (WatermarkHoldState<W> source : sources) {
-        futures.add(source);
-      }
-      // Read.
-      List<Instant> outputTimesToMerge = new ArrayList<>(sources.size());
-      for (ReadableState<Instant> future : futures) {
-        Instant sourceOutputTime = future.read();
-        if (sourceOutputTime != null) {
-          outputTimesToMerge.add(sourceOutputTime);
-        }
-      }
-      // Clear sources.
-      for (WatermarkHoldState<W> source : sources) {
-        source.clear();
-      }
-      if (!outputTimesToMerge.isEmpty()) {
-        // Merge and update.
-        result.add(result.getOutputTimeFn().merge(resultWindow, outputTimesToMerge));
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
deleted file mode 100644
index f972e31..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespace.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import java.io.IOException;
-
-/**
- * A namespace used for scoping state stored with {@link StateInternals}.
- *
- * <p>Instances of {@code StateNamespace} are guaranteed to have a {@link #hashCode} and
- * {@link #equals} that uniquely identify the namespace.
- */
-public interface StateNamespace {
-
-  /**
-   * Return a {@link String} representation of the key. It is guaranteed that this
-   * {@code String} will uniquely identify the key.
-   *
-   * <p>This will encode the actual namespace as a {@code String}. It is
-   * preferable to use the {@code StateNamespace} object when possible.
-   *
-   * <p>The string produced by the standard implementations will not contain a '+' character. This
-   * enables adding a '+' between the actual namespace and other information, if needed, to separate
-   * the two.
-   */
-  String stringKey();
-
-  /**
-   * Append the string representation of this key to the {@link Appendable}.
-   */
-  void appendTo(Appendable sb) throws IOException;
-
-  /**
-   * Return an {@code Object} to use as a key in a cache.
-   *
-   * <p>Different namespaces may use the same key in order to be treated as a unit in the cache.
-   * The {@code Object}'s {@code hashCode} and {@code equals} methods will be used to determine
-   * equality.
-   */
-  Object getCacheKey();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
deleted file mode 100644
index 09b86d6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaceForTest.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import java.io.IOException;
-import java.util.Objects;
-
-/**
- * A simple {@link StateNamespace} used for testing.
- */
-public class StateNamespaceForTest implements StateNamespace {
-  private String key;
-
-  public StateNamespaceForTest(String key) {
-    this.key = key;
-  }
-
-  @Override
-  public String stringKey() {
-    return key;
-  }
-
-  @Override
-  public Object getCacheKey() {
-    return key;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-
-    if (!(obj instanceof StateNamespaceForTest)) {
-      return false;
-    }
-
-    return Objects.equals(this.key, ((StateNamespaceForTest) obj).key);
-  }
-
-  @Override
-  public int hashCode() {
-    return key.hashCode();
-  }
-
-  @Override
-  public void appendTo(Appendable sb) throws IOException {
-    sb.append(key);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
deleted file mode 100644
index 8fee995..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateNamespaces.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.common.base.Splitter;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * Factory methods for creating the {@link StateNamespace StateNamespaces}.
- */
-public class StateNamespaces {
-
-  private enum Namespace {
-    GLOBAL,
-    WINDOW,
-    WINDOW_AND_TRIGGER;
-  }
-
-  public static StateNamespace global() {
-    return new GlobalNamespace();
-  }
-
-  public static <W extends BoundedWindow> StateNamespace window(Coder<W> windowCoder, W window) {
-    return new WindowNamespace<>(windowCoder, window);
-  }
-
-  public static <W extends BoundedWindow>
-  StateNamespace windowAndTrigger(Coder<W> windowCoder, W window, int triggerIdx) {
-    return new WindowAndTriggerNamespace<>(windowCoder, window, triggerIdx);
-  }
-
-  private StateNamespaces() {}
-
-  /**
-   * {@link StateNamespace} that is global to the current key being processed.
-   */
-  public static class GlobalNamespace implements StateNamespace {
-
-    private static final String GLOBAL_STRING = "/";
-
-    @Override
-    public String stringKey() {
-      return GLOBAL_STRING;
-    }
-
-    @Override
-    public Object getCacheKey() {
-      return GLOBAL_STRING;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      return obj == this || obj instanceof GlobalNamespace;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(Namespace.GLOBAL);
-    }
-
-    @Override
-    public String toString() {
-      return "Global";
-    }
-
-    @Override
-    public void appendTo(Appendable sb) throws IOException {
-      sb.append(GLOBAL_STRING);
-    }
-  }
-
-  /**
-   * {@link StateNamespace} that is scoped to a specific window.
-   */
-  public static class WindowNamespace<W extends BoundedWindow> implements StateNamespace {
-
-    private static final String WINDOW_FORMAT = "/%s/";
-
-    private Coder<W> windowCoder;
-    private W window;
-
-    private WindowNamespace(Coder<W> windowCoder, W window) {
-      this.windowCoder = windowCoder;
-      this.window = window;
-    }
-
-    public W getWindow() {
-      return window;
-    }
-
-    @Override
-    public String stringKey() {
-      try {
-        return String.format(WINDOW_FORMAT, CoderUtils.encodeToBase64(windowCoder, window));
-      } catch (CoderException e) {
-        throw new RuntimeException("Unable to generate string key from window " + window, e);
-      }
-    }
-
-    @Override
-    public void appendTo(Appendable sb) throws IOException {
-      sb.append('/').append(CoderUtils.encodeToBase64(windowCoder, window)).append('/');
-    }
-
-    /**
-     * State in the same window will all be evicted together.
-     */
-    @Override
-    public Object getCacheKey() {
-      return window;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof WindowNamespace)) {
-        return false;
-      }
-
-      WindowNamespace<?> that = (WindowNamespace<?>) obj;
-      return Objects.equals(this.window, that.window);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(Namespace.WINDOW, window);
-    }
-
-    @Override
-    public String toString() {
-      return "Window(" + window + ")";
-    }
-  }
-
-  /**
-   * {@link StateNamespace} that is scoped to a particular window and trigger index.
-   */
-  public static class WindowAndTriggerNamespace<W extends BoundedWindow>
-      implements StateNamespace {
-
-    private static final String WINDOW_AND_TRIGGER_FORMAT = "/%s/%s/";
-
-    private static final int TRIGGER_RADIX = 36;
-    private Coder<W> windowCoder;
-    private W window;
-    private int triggerIndex;
-
-    private WindowAndTriggerNamespace(Coder<W> windowCoder, W window, int triggerIndex) {
-      this.windowCoder = windowCoder;
-      this.window = window;
-      this.triggerIndex = triggerIndex;
-    }
-
-    public W getWindow() {
-      return window;
-    }
-
-    public int getTriggerIndex() {
-      return triggerIndex;
-    }
-
-    @Override
-    public String stringKey() {
-      try {
-        return String.format(WINDOW_AND_TRIGGER_FORMAT,
-            CoderUtils.encodeToBase64(windowCoder, window),
-            // Use base 36 so that can address 36 triggers in a single byte and still be human
-            // readable.
-            Integer.toString(triggerIndex, TRIGGER_RADIX).toUpperCase());
-      } catch (CoderException e) {
-        throw new RuntimeException("Unable to generate string key from window " + window, e);
-      }
-    }
-
-    @Override
-    public void appendTo(Appendable sb) throws IOException {
-      sb.append('/').append(CoderUtils.encodeToBase64(windowCoder, window));
-      sb.append('/').append(Integer.toString(triggerIndex, TRIGGER_RADIX).toUpperCase());
-      sb.append('/');
-    }
-
-    /**
-     * State in the same window will all be evicted together.
-     */
-    @Override
-    public Object getCacheKey() {
-      return window;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof WindowAndTriggerNamespace)) {
-        return false;
-      }
-
-      WindowAndTriggerNamespace<?> that = (WindowAndTriggerNamespace<?>) obj;
-      return this.triggerIndex == that.triggerIndex
-          && Objects.equals(this.window, that.window);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(Namespace.WINDOW_AND_TRIGGER, window, triggerIndex);
-    }
-
-    @Override
-    public String toString() {
-      return "WindowAndTrigger(" + window + "," + triggerIndex + ")";
-    }
-  }
-
-  private static final Splitter SLASH_SPLITTER = Splitter.on('/');
-
-  /**
-   * Convert a {@code stringKey} produced using {@link StateNamespace#stringKey}
-   * on one of the namespaces produced by this class into the original
-   * {@link StateNamespace}.
-   */
-  public static <W extends BoundedWindow> StateNamespace fromString(
-      String stringKey, Coder<W> windowCoder) {
-    if (!stringKey.startsWith("/") || !stringKey.endsWith("/")) {
-      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'");
-    }
-
-    if (GlobalNamespace.GLOBAL_STRING.equals(stringKey)) {
-      return global();
-    }
-
-    List<String> parts = SLASH_SPLITTER.splitToList(stringKey);
-    if (parts.size() != 3 && parts.size() != 4) {
-      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'");
-    }
-    // Ends should be empty (we start and end with /)
-    if (!parts.get(0).isEmpty() || !parts.get(parts.size() - 1).isEmpty()) {
-      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'");
-    }
-
-    try {
-      W window = CoderUtils.decodeFromBase64(windowCoder, parts.get(1));
-      if (parts.size() > 3) {
-        int index = Integer.parseInt(parts.get(2), WindowAndTriggerNamespace.TRIGGER_RADIX);
-        return windowAndTrigger(windowCoder, window, index);
-      } else {
-        return window(windowCoder, window);
-      }
-    } catch (Exception  e) {
-      throw new RuntimeException("Invalid namespace string: '" + stringKey + "'", e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
deleted file mode 100644
index edd1dae..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTable.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
-import com.google.common.base.Supplier;
-import com.google.common.collect.Table;
-import com.google.common.collect.Tables;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Table mapping {@code StateNamespace} and {@code StateTag<?>} to a {@code State} instance.
- */
-public abstract class StateTable<K> {
-
-  private final Table<StateNamespace, StateTag<? super K, ?>, State> stateTable =
-      Tables.newCustomTable(new HashMap<StateNamespace, Map<StateTag<? super K, ?>, State>>(),
-          new Supplier<Map<StateTag<? super K, ?>, State>>() {
-        @Override
-        public Map<StateTag<? super K, ?>, State> get() {
-          return new HashMap<>();
-        }
-      });
-
-  /**
-   * Gets the {@link State} in the specified {@link StateNamespace} with the specified {@link
-   * StateTag}, binding it using the {@link #binderForNamespace} if it is not
-   * already present in this {@link StateTable}.
-   */
-  public <StateT extends State> StateT get(
-      StateNamespace namespace, StateTag<? super K, StateT> tag, StateContext<?> c) {
-    State storage = stateTable.get(namespace, tag);
-    if (storage != null) {
-      @SuppressWarnings("unchecked")
-      StateT typedStorage = (StateT) storage;
-      return typedStorage;
-    }
-
-    StateT typedStorage = tag.bind(binderForNamespace(namespace, c));
-    stateTable.put(namespace, tag, typedStorage);
-    return typedStorage;
-  }
-
-  public void clearNamespace(StateNamespace namespace) {
-    stateTable.rowKeySet().remove(namespace);
-  }
-
-  public void clear() {
-    stateTable.clear();
-  }
-
-  public Iterable<State> values() {
-    return stateTable.values();
-  }
-
-  public boolean isNamespaceInUse(StateNamespace namespace) {
-    return stateTable.containsRow(namespace);
-  }
-
-  public Map<StateTag<? super K, ?>, State> getTagsInUse(StateNamespace namespace) {
-    return stateTable.row(namespace);
-  }
-
-  public Set<StateNamespace> getNamespacesInUse() {
-    return stateTable.rowKeySet();
-  }
-
-  /**
-   * Provide the {@code StateBinder} to use for creating {@code Storage} instances
-   * in the specified {@code namespace}.
-   */
-  protected abstract StateBinder<K> binderForNamespace(StateNamespace namespace, StateContext<?> c);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
deleted file mode 100644
index c87bdb7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTag.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-
-import java.io.IOException;
-import java.io.Serializable;
-
-/**
- * An address for persistent state. This includes a unique identifier for the location, the
- * information necessary to encode the value, and details about the intended access pattern.
- *
- * <p>State can be thought of as a sparse table, with each {@code StateTag} defining a column
- * that has cells of type {@code StateT}.
- *
- * <p>Currently, this can only be used in a step immediately following a {@link GroupByKey}.
- *
- * @param <K> The type of key that must be used with the state tag. Contravariant: methods should
- *            accept values of type {@code KeyedStateTag<? super K, StateT>}.
- * @param <StateT> The type of state being tagged.
- */
-@Experimental(Kind.STATE)
-public interface StateTag<K, StateT extends State> extends Serializable {
-
-  /**
-   * Visitor for binding a {@link StateTag} and to the associated {@link State}.
-   *
-   * @param <K> the type of key this binder embodies.
-   */
-  public interface StateBinder<K> {
-    <T> ValueState<T> bindValue(StateTag<? super K, ValueState<T>> address, Coder<T> coder);
-
-    <T> BagState<T> bindBag(StateTag<? super K, BagState<T>> address, Coder<T> elemCoder);
-
-    <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-    bindCombiningValue(
-        StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn);
-
-    <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-    bindKeyedCombiningValue(
-        StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-        Coder<AccumT> accumCoder, KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn);
-
-    <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-    bindKeyedCombiningValueWithContext(
-        StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-        Coder<AccumT> accumCoder,
-        KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn);
-
-    /**
-     * Bind to a watermark {@link StateTag}.
-     *
-     * <p>This accepts the {@link OutputTimeFn} that dictates how watermark hold timestamps
-     * added to the returned {@link WatermarkHoldState} are to be combined.
-     */
-    <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-        StateTag<? super K, WatermarkHoldState<W>> address,
-        OutputTimeFn<? super W> outputTimeFn);
-  }
-
-  /** Append the UTF-8 encoding of this tag to the given {@link Appendable}. */
-  void appendTo(Appendable sb) throws IOException;
-
-  /**
-   * Returns the user-provided name of this state cell.
-   */
-  String getId();
-
-  /**
-   * Use the {@code binder} to create an instance of {@code StateT} appropriate for this address.
-   */
-  StateT bind(StateBinder<? extends K> binder);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
deleted file mode 100644
index ec9a78f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateTags.java
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.common.base.MoreObjects;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Objects;
-
-/**
- * Static utility methods for creating {@link StateTag} instances.
- */
-@Experimental(Kind.STATE)
-public class StateTags {
-
-  private static final CoderRegistry STANDARD_REGISTRY = new CoderRegistry();
-
-  static {
-    STANDARD_REGISTRY.registerStandardCoders();
-  }
-
-  private enum StateKind {
-    SYSTEM('s'),
-    USER('u');
-
-    private char prefix;
-
-    StateKind(char prefix) {
-      this.prefix = prefix;
-    }
-  }
-
-  private StateTags() { }
-
-  private interface SystemStateTag<K, StateT extends State> {
-    StateTag<K, StateT> asKind(StateKind kind);
-  }
-
-  /**
-   * Create a simple state tag for values of type {@code T}.
-   */
-  public static <T> StateTag<Object, ValueState<T>> value(String id, Coder<T> valueCoder) {
-    return new ValueStateTag<>(new StructuredId(id), valueCoder);
-  }
-
-  /**
-   * Create a state tag for values that use a {@link CombineFn} to automatically merge
-   * multiple {@code InputT}s into a single {@code OutputT}.
-   */
-  public static <InputT, AccumT, OutputT>
-    StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-    combiningValue(
-      String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-    return combiningValueInternal(id, accumCoder, combineFn);
-  }
-
-  /**
-   * Create a state tag for values that use a {@link KeyedCombineFn} to automatically merge
-   * multiple {@code InputT}s into a single {@code OutputT}. The key provided to the
-   * {@link KeyedCombineFn} comes from the keyed {@link StateAccessor}.
-   */
-  public static <K, InputT, AccumT,
-      OutputT> StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-      keyedCombiningValue(String id, Coder<AccumT> accumCoder,
-          KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
-    return keyedCombiningValueInternal(id, accumCoder, combineFn);
-  }
-
-  /**
-   * Create a state tag for values that use a {@link KeyedCombineFnWithContext} to automatically
-   * merge multiple {@code InputT}s into a single {@code OutputT}. The key provided to the
-   * {@link KeyedCombineFn} comes from the keyed {@link StateAccessor}, the context provided comes
-   * from the {@link StateContext}.
-   */
-  public static <K, InputT, AccumT, OutputT>
-      StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-      keyedCombiningValueWithContext(
-          String id,
-          Coder<AccumT> accumCoder,
-          KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn) {
-    return new KeyedCombiningValueWithContextStateTag<K, InputT, AccumT, OutputT>(
-        new StructuredId(id),
-        accumCoder,
-        combineFn);
-  }
-
-  /**
-   * Create a state tag for values that use a {@link CombineFn} to automatically merge
-   * multiple {@code InputT}s into a single {@code OutputT}.
-   *
-   * <p>This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and
-   * should only be used to initialize static values.
-   */
-  public static <InputT, AccumT, OutputT>
-      StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-      combiningValueFromInputInternal(
-          String id, Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-    try {
-      Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(STANDARD_REGISTRY, inputCoder);
-      return combiningValueInternal(id, accumCoder, combineFn);
-    } catch (CannotProvideCoderException e) {
-      throw new IllegalArgumentException(
-          "Unable to determine accumulator coder for " + combineFn.getClass().getSimpleName()
-          + " from " + inputCoder, e);
-    }
-  }
-
-  private static <InputT, AccumT,
-      OutputT> StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-      combiningValueInternal(
-      String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-    return
-        new CombiningValueStateTag<InputT, AccumT, OutputT>(
-            new StructuredId(id), accumCoder, combineFn);
-  }
-
-  private static <K, InputT, AccumT, OutputT>
-      StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> keyedCombiningValueInternal(
-          String id,
-          Coder<AccumT> accumCoder,
-          KeyedCombineFn<K, InputT, AccumT, OutputT> combineFn) {
-    return new KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>(
-        new StructuredId(id), accumCoder, combineFn);
-  }
-
-  /**
-   * Create a state tag that is optimized for adding values frequently, and
-   * occasionally retrieving all the values that have been added.
-   */
-  public static <T> StateTag<Object, BagState<T>> bag(String id, Coder<T> elemCoder) {
-    return new BagStateTag<T>(new StructuredId(id), elemCoder);
-  }
-
-  /**
-   * Create a state tag for holding the watermark.
-   */
-  public static <W extends BoundedWindow> StateTag<Object, WatermarkHoldState<W>>
-      watermarkStateInternal(String id, OutputTimeFn<? super W> outputTimeFn) {
-    return new WatermarkStateTagInternal<W>(new StructuredId(id), outputTimeFn);
-  }
-
-  /**
-   * Convert an arbitrary {@link StateTag} to a system-internal tag that is guaranteed not to
-   * collide with any user tags.
-   */
-  public static <K, StateT extends State> StateTag<K, StateT> makeSystemTagInternal(
-      StateTag<K, StateT> tag) {
-    if (!(tag instanceof SystemStateTag)) {
-      throw new IllegalArgumentException("Expected subclass of StateTagBase, got " + tag);
-    }
-    // Checked above
-    @SuppressWarnings("unchecked")
-    SystemStateTag<K, StateT> typedTag = (SystemStateTag<K, StateT>) tag;
-    return typedTag.asKind(StateKind.SYSTEM);
-  }
-
-  public static <K, InputT, AccumT, OutputT> StateTag<Object, BagState<AccumT>>
-      convertToBagTagInternal(
-          StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> combiningTag) {
-    if (combiningTag instanceof KeyedCombiningValueStateTag) {
-      // Checked above; conversion to a bag tag depends on the provided tag being one of those
-      // created via the factory methods in this class.
-      @SuppressWarnings("unchecked")
-      KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT> typedTag =
-          (KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>) combiningTag;
-      return typedTag.asBagTag();
-    } else if (combiningTag instanceof KeyedCombiningValueWithContextStateTag) {
-      @SuppressWarnings("unchecked")
-      KeyedCombiningValueWithContextStateTag<K, InputT, AccumT, OutputT> typedTag =
-          (KeyedCombiningValueWithContextStateTag<K, InputT, AccumT, OutputT>) combiningTag;
-      return typedTag.asBagTag();
-    } else {
-      throw new IllegalArgumentException("Unexpected StateTag " + combiningTag);
-    }
-  }
-
-  private static class StructuredId implements Serializable {
-    private final StateKind kind;
-    private final String rawId;
-
-    private StructuredId(String rawId) {
-      this(StateKind.USER, rawId);
-    }
-
-    private StructuredId(StateKind kind, String rawId) {
-      this.kind = kind;
-      this.rawId = rawId;
-    }
-
-    public StructuredId asKind(StateKind kind) {
-      return new StructuredId(kind, rawId);
-    }
-
-    public void appendTo(Appendable sb) throws IOException {
-      sb.append(kind.prefix).append(rawId);
-    }
-
-    public String getRawId() {
-      return rawId;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("id", rawId)
-          .add("kind", kind)
-          .toString();
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof StructuredId)) {
-        return false;
-      }
-
-      StructuredId that = (StructuredId) obj;
-      return Objects.equals(this.kind, that.kind)
-          && Objects.equals(this.rawId, that.rawId);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(kind, rawId);
-    }
-  }
-
-  /**
-   * A base class that just manages the structured ids.
-   */
-  private abstract static class StateTagBase<K, StateT extends State>
-      implements StateTag<K, StateT>, SystemStateTag<K, StateT> {
-
-    protected final StructuredId id;
-
-    protected StateTagBase(StructuredId id) {
-      this.id = id;
-    }
-
-    @Override
-    public String getId() {
-      return id.getRawId();
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("id", id)
-          .toString();
-    }
-
-    @Override
-    public void appendTo(Appendable sb) throws IOException {
-      id.appendTo(sb);
-    }
-
-    @Override
-    public abstract StateTag<K, StateT> asKind(StateKind kind);
-  }
-
-  /**
-   * A value state cell for values of type {@code T}.
-   *
-   * @param <T> the type of value being stored
-   */
-  private static class ValueStateTag<T> extends StateTagBase<Object, ValueState<T>>
-      implements StateTag<Object, ValueState<T>> {
-
-    private final Coder<T> coder;
-
-    private ValueStateTag(StructuredId id, Coder<T> coder) {
-      super(id);
-      this.coder = coder;
-    }
-
-    @Override
-    public ValueState<T> bind(StateBinder<? extends Object> visitor) {
-      return visitor.bindValue(this, coder);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof ValueStateTag)) {
-        return false;
-      }
-
-      ValueStateTag<?> that = (ValueStateTag<?>) obj;
-      return Objects.equals(this.id, that.id)
-          && Objects.equals(this.coder, that.coder);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass(), id, coder);
-    }
-
-    @Override
-    public StateTag<Object, ValueState<T>> asKind(StateKind kind) {
-      return new ValueStateTag<T>(id.asKind(kind), coder);
-    }
-  }
-
-  /**
-   * A state cell for values that are combined according to a {@link CombineFn}.
-   *
-   * @param <InputT> the type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  private static class CombiningValueStateTag<InputT, AccumT, OutputT>
-      extends KeyedCombiningValueStateTag<Object, InputT, AccumT, OutputT>
-      implements StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>,
-      SystemStateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>> {
-
-    private final Coder<AccumT> accumCoder;
-    private final CombineFn<InputT, AccumT, OutputT> combineFn;
-
-    private CombiningValueStateTag(
-        StructuredId id,
-        Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-      super(id, accumCoder, combineFn.asKeyedFn());
-      this.combineFn = combineFn;
-      this.accumCoder = accumCoder;
-    }
-
-    @Override
-    public StateTag<Object, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-    asKind(StateKind kind) {
-      return new CombiningValueStateTag<InputT, AccumT, OutputT>(
-          id.asKind(kind), accumCoder, combineFn);
-    }
-  }
-
-  /**
-   * A state cell for values that are combined according to a {@link KeyedCombineFnWithContext}.
-   *
-   * @param <K> the type of keys
-   * @param <InputT> the type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  private static class KeyedCombiningValueWithContextStateTag<K, InputT, AccumT, OutputT>
-    extends StateTagBase<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-    implements SystemStateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> {
-
-    private final Coder<AccumT> accumCoder;
-    private final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn;
-
-    protected KeyedCombiningValueWithContextStateTag(
-        StructuredId id,
-        Coder<AccumT> accumCoder,
-        KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn) {
-      super(id);
-      this.combineFn = combineFn;
-      this.accumCoder = accumCoder;
-    }
-
-    @Override
-    public AccumulatorCombiningState<InputT, AccumT, OutputT> bind(
-        StateBinder<? extends K> visitor) {
-      return visitor.bindKeyedCombiningValueWithContext(this, accumCoder, combineFn);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof KeyedCombiningValueWithContextStateTag)) {
-        return false;
-      }
-
-      KeyedCombiningValueWithContextStateTag<?, ?, ?, ?> that =
-          (KeyedCombiningValueWithContextStateTag<?, ?, ?, ?>) obj;
-      return Objects.equals(this.id, that.id)
-          && Objects.equals(this.accumCoder, that.accumCoder);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass(), id, accumCoder);
-    }
-
-    @Override
-    public StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> asKind(
-        StateKind kind) {
-      return new KeyedCombiningValueWithContextStateTag<>(
-          id.asKind(kind), accumCoder, combineFn);
-    }
-
-    private StateTag<Object, BagState<AccumT>> asBagTag() {
-      return new BagStateTag<AccumT>(id, accumCoder);
-    }
-  }
-
-  /**
-   * A state cell for values that are combined according to a {@link KeyedCombineFn}.
-   *
-   * @param <K> the type of keys
-   * @param <InputT> the type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  private static class KeyedCombiningValueStateTag<K, InputT, AccumT, OutputT>
-      extends StateTagBase<K, AccumulatorCombiningState<InputT, AccumT, OutputT>>
-      implements SystemStateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> {
-
-    private final Coder<AccumT> accumCoder;
-    private final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn;
-
-    protected KeyedCombiningValueStateTag(
-        StructuredId id,
-        Coder<AccumT> accumCoder, KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn) {
-      super(id);
-      this.keyedCombineFn = keyedCombineFn;
-      this.accumCoder = accumCoder;
-    }
-
-    @Override
-    public AccumulatorCombiningState<InputT, AccumT, OutputT> bind(
-        StateBinder<? extends K> visitor) {
-      return visitor.bindKeyedCombiningValue(this, accumCoder, keyedCombineFn);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof CombiningValueStateTag)) {
-        return false;
-      }
-
-      KeyedCombiningValueStateTag<?, ?, ?, ?> that = (KeyedCombiningValueStateTag<?, ?, ?, ?>) obj;
-      return Objects.equals(this.id, that.id)
-          && Objects.equals(this.accumCoder, that.accumCoder);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass(), id, accumCoder);
-    }
-
-    @Override
-    public StateTag<K, AccumulatorCombiningState<InputT, AccumT, OutputT>> asKind(
-        StateKind kind) {
-      return new KeyedCombiningValueStateTag<>(id.asKind(kind), accumCoder, keyedCombineFn);
-    }
-
-    private StateTag<Object, BagState<AccumT>> asBagTag() {
-      return new BagStateTag<AccumT>(id, accumCoder);
-    }
-  }
-
-  /**
-   * A state cell optimized for bag-like access patterns (frequent additions, occasional reads
-   * of all the values).
-   *
-   * @param <T> the type of value in the bag
-   */
-  private static class BagStateTag<T> extends StateTagBase<Object, BagState<T>>
-      implements StateTag<Object, BagState<T>>{
-
-    private final Coder<T> elemCoder;
-
-    private BagStateTag(StructuredId id, Coder<T> elemCoder) {
-      super(id);
-      this.elemCoder = elemCoder;
-    }
-
-    @Override
-    public BagState<T> bind(StateBinder<? extends Object> visitor) {
-      return visitor.bindBag(this, elemCoder);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof BagStateTag)) {
-        return false;
-      }
-
-      BagStateTag<?> that = (BagStateTag<?>) obj;
-      return Objects.equals(this.id, that.id)
-          && Objects.equals(this.elemCoder, that.elemCoder);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass(), id, elemCoder);
-    }
-
-    @Override
-    public StateTag<Object, BagState<T>> asKind(StateKind kind) {
-      return new BagStateTag<>(id.asKind(kind), elemCoder);
-    }
-  }
-
-  private static class WatermarkStateTagInternal<W extends BoundedWindow>
-      extends StateTagBase<Object, WatermarkHoldState<W>> {
-
-    /**
-     * When multiple output times are added to hold the watermark, this determines how they are
-     * combined, and also the behavior when merging windows. Does not contribute to equality/hash
-     * since we have at most one watermark hold tag per computation.
-     */
-    private final OutputTimeFn<? super W> outputTimeFn;
-
-    private WatermarkStateTagInternal(StructuredId id, OutputTimeFn<? super W> outputTimeFn) {
-      super(id);
-      this.outputTimeFn = outputTimeFn;
-    }
-
-    @Override
-    public WatermarkHoldState<W> bind(StateBinder<? extends Object> visitor) {
-      return visitor.bindWatermark(this, outputTimeFn);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == this) {
-        return true;
-      }
-
-      if (!(obj instanceof WatermarkStateTagInternal)) {
-        return false;
-      }
-
-      WatermarkStateTagInternal<?> that = (WatermarkStateTagInternal<?>) obj;
-      return Objects.equals(this.id, that.id);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass(), id);
-    }
-
-    @Override
-    public StateTag<Object, WatermarkHoldState<W>> asKind(StateKind kind) {
-      return new WatermarkStateTagInternal<W>(id.asKind(kind), outputTimeFn);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
deleted file mode 100644
index 19c12bb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ValueState.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-
-/**
- * State holding a single value.
- *
- * @param <T> The type of values being stored.
- */
-@Experimental(Kind.STATE)
-public interface ValueState<T> extends ReadableState<T>, State {
-  /**
-   * Set the value of the buffer.
-   */
-  void write(T input);
-
-  @Override
-  ValueState<T> readLater();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkHoldState.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkHoldState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkHoldState.java
deleted file mode 100644
index 8a1adc9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/WatermarkHoldState.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-
-import org.joda.time.Instant;
-
-/**
- * A {@link State} accepting and aggregating output timestamps, which determines
- * the time to which the output watermark must be held.
- *
- * <p><b><i>For internal use only. This API may change at any time.</i></b>
- */
-@Experimental(Kind.STATE)
-public interface WatermarkHoldState<W extends BoundedWindow>
-    extends CombiningState<Instant, Instant> {
-  /**
-   * Return the {@link OutputTimeFn} which will be used to determine a watermark hold time given
-   * an element timestamp, and to combine watermarks from windows which are about to be merged.
-   */
-  OutputTimeFn<? super W> getOutputTimeFn();
-
-  @Override
-  WatermarkHoldState<W> readLater();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
deleted file mode 100644
index 23cee07..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/KV.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
-import com.google.common.base.MoreObjects;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Objects;
-
-/**
- * An immutable key/value pair.
- *
- * <p>Various {@link PTransform PTransforms} like {@link GroupByKey} and {@link Combine#perKey}
- * operate on {@link PCollection PCollections} of {@link KV KVs}.
- *
- * @param <K> the type of the key
- * @param <V> the type of the value
- */
-public class KV<K, V> implements Serializable {
-  /** Returns a {@link KV} with the given key and value. */
-  public static <K, V> KV<K, V> of(K key, V value) {
-    return new KV<>(key, value);
-  }
-
-  /** Returns the key of this {@link KV}. */
-  public K getKey() {
-    return key;
-  }
-
-  /** Returns the value of this {@link KV}. */
-  public V getValue() {
-    return value;
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  final K key;
-  final V value;
-
-  private KV(K key, V value) {
-    this.key = key;
-    this.value = value;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-    if (!(other instanceof KV)) {
-      return false;
-    }
-    KV<?, ?> otherKv = (KV<?, ?>) other;
-    // Arrays are very common as values and keys, so deepEquals is mandatory
-    return Objects.deepEquals(this.key, otherKv.key)
-        && Objects.deepEquals(this.value, otherKv.value);
-  }
-
-  /**
-   * A {@link Comparator} that orders {@link KV KVs} by the natural ordering of their keys.
-   *
-   * <p>A {@code null} key is less than any non-{@code null} key.
-   */
-  public static class OrderByKey<K extends Comparable<? super K>, V> implements
-      SerializableComparator<KV<K, V>> {
-    @Override
-    public int compare(KV<K, V> a, KV<K, V> b) {
-      if (a.key == null) {
-        return b.key == null ? 0 : -1;
-      } else if (b.key == null) {
-        return 1;
-      } else {
-        return a.key.compareTo(b.key);
-      }
-    }
-  }
-
-  /**
-   * A {@link Comparator} that orders {@link KV KVs} by the natural ordering of their values.
-   *
-   * <p>A {@code null} value is less than any non-{@code null} value.
-   */
-  public static class OrderByValue<K, V extends Comparable<? super V>>
-      implements SerializableComparator<KV<K, V>> {
-    @Override
-    public int compare(KV<K, V> a, KV<K, V> b) {
-      if (a.value == null) {
-        return b.value == null ? 0 : -1;
-      } else if (b.value == null) {
-        return 1;
-      } else {
-        return a.value.compareTo(b.value);
-      }
-    }
-  }
-
-  @Override
-  public int hashCode() {
-    // Objects.deepEquals requires Arrays.deepHashCode for correctness
-    return Arrays.deepHashCode(new Object[]{key, value});
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(this)
-        .addValue(key)
-        .addValue(value)
-        .toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
deleted file mode 100644
index 23ac3ae..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PBegin.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO.Read;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-import java.util.Collection;
-import java.util.Collections;
-
-/**
- * {@link PBegin} is the "input" to a root {@link PTransform}, such as {@link Read Read} or
- * {@link Create}.
- *
- * <p>Typically created by calling {@link Pipeline#begin} on a Pipeline.
- */
-public class PBegin implements PInput {
-  /**
-   * Returns a {@link PBegin} in the given {@link Pipeline}.
-   */
-  public static PBegin in(Pipeline pipeline) {
-    return new PBegin(pipeline);
-  }
-
-  /**
-   * Like {@link #apply(String, PTransform)} but defaulting to the name
-   * of the {@link PTransform}.
-   */
-  public <OutputT extends POutput> OutputT apply(
-      PTransform<? super PBegin, OutputT> t) {
-    return Pipeline.applyTransform(this, t);
-  }
-
-  /**
-   * Applies the given {@link PTransform} to this input {@link PBegin},
-   * using {@code name} to identify this specific application of the transform.
-   * This name is used in various places, including the monitoring UI, logging,
-   * and to stably identify this application node in the job graph.
-   */
-  public <OutputT extends POutput> OutputT apply(
-      String name, PTransform<? super PBegin, OutputT> t) {
-    return Pipeline.applyTransform(name, this, t);
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-    return pipeline;
-  }
-
-  @Override
-  public Collection<? extends PValue> expand() {
-    // A PBegin contains no PValues.
-    return Collections.emptyList();
-  }
-
-  @Override
-  public void finishSpecifying() {
-    // Nothing more to be done.
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Constructs a {@link PBegin} in the given {@link Pipeline}.
-   */
-  protected PBegin(Pipeline pipeline) {
-    this.pipeline = pipeline;
-  }
-
-  private final Pipeline pipeline;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
deleted file mode 100644
index 6fffddf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollection.java
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-
-/**
- * A {@link PCollection PCollection&lt;T&gt;} is an immutable collection of values of type
- * {@code T}.  A {@link PCollection} can contain either a bounded or unbounded
- * number of elements.  Bounded and unbounded {@link PCollection PCollections} are produced
- * as the output of {@link PTransform PTransforms}
- * (including root PTransforms like {@link Read} and {@link Create}), and can
- * be passed as the inputs of other PTransforms.
- *
- * <p>Some root transforms produce bounded {@code PCollections} and others
- * produce unbounded ones.  For example, {@link TextIO.Read} reads a static set
- * of files, so it produces a bounded {@link PCollection}.
- * {@link PubsubIO.Read}, on the other hand, receives a potentially infinite stream
- * of Pubsub messages, so it produces an unbounded {@link PCollection}.
- *
- * <p>Each element in a {@link PCollection} may have an associated implicit
- * timestamp.  Readers assign timestamps to elements when they create
- * {@link PCollection PCollections}, and other {@link PTransform PTransforms} propagate these
- * timestamps from their input to their output. For example, {@link PubsubIO.Read}
- * assigns pubsub message timestamps to elements, and {@link TextIO.Read} assigns
- * the default value {@link BoundedWindow#TIMESTAMP_MIN_VALUE} to elements. User code can
- * explicitly assign timestamps to elements with
- * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#outputWithTimestamp}.
- *
- * <p>Additionally, a {@link PCollection} has an associated
- * {@link WindowFn} and each element is assigned to a set of windows.
- * By default, the windowing function is {@link GlobalWindows}
- * and all elements are assigned into a single default window.
- * This default can be overridden with the {@link Window}
- * {@link PTransform}.
- *
- * <p>See the individual {@link PTransform} subclasses for specific information
- * on how they propagate timestamps and windowing.
- *
- * @param <T> the type of the elements of this {@link PCollection}
- */
-public class PCollection<T> extends TypedPValue<T> {
-
-  /**
-   * The enumeration of cases for whether a {@link PCollection} is bounded.
-   */
-  public enum IsBounded {
-    /**
-     * Indicates that a {@link PCollection} contains bounded data elements, such as
-     * {@link PCollection PCollections} from {@link TextIO}, {@link BigQueryIO},
-     * {@link Create} e.t.c.
-     */
-    BOUNDED,
-    /**
-     * Indicates that a {@link PCollection} contains unbounded data elements, such as
-     * {@link PCollection PCollections} from {@link PubsubIO}.
-     */
-    UNBOUNDED;
-
-    /**
-     * Returns the composed IsBounded property.
-     *
-     * <p>The composed property is {@link #BOUNDED} only if all components are {@link #BOUNDED}.
-     * Otherwise, it is {@link #UNBOUNDED}.
-     */
-    public IsBounded and(IsBounded that) {
-      if (this == BOUNDED && that == BOUNDED) {
-        return BOUNDED;
-      } else {
-        return UNBOUNDED;
-      }
-    }
-  }
-
-  /**
-   * Returns the name of this {@link PCollection}.
-   *
-   * <p>By default, the name of a {@link PCollection} is based on the name of the
-   * {@link PTransform} that produces it.  It can be specified explicitly by
-   * calling {@link #setName}.
-   *
-   * @throws IllegalStateException if the name hasn't been set yet
-   */
-  @Override
-  public String getName() {
-    return super.getName();
-  }
-
-  /**
-   * Sets the name of this {@link PCollection}.  Returns {@code this}.
-   *
-   * @throws IllegalStateException if this {@link PCollection} has already been
-   * finalized and may no longer be set.
-   * Once {@link #apply} has been called, this will be the case.
-   */
-  @Override
-  public PCollection<T> setName(String name) {
-    super.setName(name);
-    return this;
-  }
-
-  /**
-   * Returns the {@link Coder} used by this {@link PCollection} to encode and decode
-   * the values stored in it.
-   *
-   * @throws IllegalStateException if the {@link Coder} hasn't been set, and
-   * couldn't be inferred.
-   */
-  @Override
-  public Coder<T> getCoder() {
-    return super.getCoder();
-  }
-
-  /**
-   * Sets the {@link Coder} used by this {@link PCollection} to encode and decode the
-   * values stored in it. Returns {@code this}.
-   *
-   * @throws IllegalStateException if this {@link PCollection} has already
-   * been finalized and may no longer be set.
-   * Once {@link #apply} has been called, this will be the case.
-   */
-  @Override
-  public PCollection<T> setCoder(Coder<T> coder) {
-    super.setCoder(coder);
-    return this;
-  }
-
-  /**
-   * Like {@link IsBounded#apply(String, PTransform)} but defaulting to the name
-   * of the {@link PTransform}.
-   *
-   * @return the output of the applied {@link PTransform}
-   */
-  public <OutputT extends POutput> OutputT apply(PTransform<? super PCollection<T>, OutputT> t) {
-    return Pipeline.applyTransform(this, t);
-  }
-
-  /**
-   * Applies the given {@link PTransform} to this input {@link PCollection},
-   * using {@code name} to identify this specific application of the transform.
-   * This name is used in various places, including the monitoring UI, logging,
-   * and to stably identify this application node in the job graph.
-   *
-   * @return the output of the applied {@link PTransform}
-   */
-  public <OutputT extends POutput> OutputT apply(
-      String name, PTransform<? super PCollection<T>, OutputT> t) {
-    return Pipeline.applyTransform(name, this, t);
-  }
-
-  /**
-   * Returns the {@link WindowingStrategy} of this {@link PCollection}.
-   */
-  public WindowingStrategy<?, ?> getWindowingStrategy() {
-    return windowingStrategy;
-  }
-
-  public IsBounded isBounded() {
-    return isBounded;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal details below here.
-
-  /**
-   * {@link WindowingStrategy} that will be used for merging windows and triggering output in this
-   * {@link PCollection} and subsequence {@link PCollection PCollections} produced from this one.
-   *
-   * <p>By default, no merging is performed.
-   */
-  private WindowingStrategy<?, ?> windowingStrategy;
-
-  private IsBounded isBounded;
-
-  private PCollection(Pipeline p) {
-    super(p);
-  }
-
-  /**
-   * Sets the {@link TypeDescriptor TypeDescriptor&lt;T&gt;} for this
-   * {@link PCollection PCollection&lt;T&gt;}. This may allow the enclosing
-   * {@link PCollectionTuple}, {@link PCollectionList}, or {@code PTransform<?, PCollection<T>>},
-   * etc., to provide more detailed reflective information.
-   */
-  @Override
-  public PCollection<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor) {
-    super.setTypeDescriptorInternal(typeDescriptor);
-    return this;
-  }
-
-  /**
-   * Sets the {@link WindowingStrategy} of this {@link PCollection}.
-   *
-   * <p>For use by primitive transformations only.
-   */
-  public PCollection<T> setWindowingStrategyInternal(WindowingStrategy<?, ?> windowingStrategy) {
-     this.windowingStrategy = windowingStrategy;
-     return this;
-  }
-
-  /**
-   * Sets the {@link PCollection.IsBounded} of this {@link PCollection}.
-   *
-   * <p>For use by internal transformations only.
-   */
-  public PCollection<T> setIsBoundedInternal(IsBounded isBounded) {
-    this.isBounded = isBounded;
-    return this;
-  }
-
-  /**
-   * Creates and returns a new {@link PCollection} for a primitive output.
-   *
-   * <p>For use by primitive transformations only.
-   */
-  public static <T> PCollection<T> createPrimitiveOutputInternal(
-      Pipeline pipeline,
-      WindowingStrategy<?, ?> windowingStrategy,
-      IsBounded isBounded) {
-    return new PCollection<T>(pipeline)
-        .setWindowingStrategyInternal(windowingStrategy)
-        .setIsBoundedInternal(isBounded);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
deleted file mode 100644
index b99af02..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionList.java
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.Partition;
-import com.google.common.collect.ImmutableList;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * A {@link PCollectionList PCollectionList&lt;T&gt;} is an immutable list of homogeneously
- * typed {@link PCollection PCollection&lt;T&gt;s}. A {@link PCollectionList} is used, for
- * instance, as the input to
- * {@link Flatten} or the output of {@link Partition}.
- *
- * <p>PCollectionLists can be created and accessed like follows:
- * <pre> {@code
- * PCollection<String> pc1 = ...;
- * PCollection<String> pc2 = ...;
- * PCollection<String> pc3 = ...;
- *
- * // Create a PCollectionList with three PCollections:
- * PCollectionList<String> pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
- *
- * // Create an empty PCollectionList:
- * Pipeline p = ...;
- * PCollectionList<String> pcs2 = PCollectionList.<String>empty(p);
- *
- * // Get PCollections out of a PCollectionList, by index (origin 0):
- * PCollection<String> pcX = pcs.get(1);
- * PCollection<String> pcY = pcs.get(0);
- * PCollection<String> pcZ = pcs.get(2);
- *
- * // Get a list of all PCollections in a PCollectionList:
- * List<PCollection<String>> allPcs = pcs.getAll();
- * } </pre>
- *
- * @param <T> the type of the elements of all the {@link PCollection PCollections} in this list
- */
-public class PCollectionList<T> implements PInput, POutput {
-  /**
-   * Returns an empty {@link PCollectionList} that is part of the given {@link Pipeline}.
-   *
-   * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling
-   * {@link #and} on the result.
-   */
-  public static <T> PCollectionList<T> empty(Pipeline pipeline) {
-    return new PCollectionList<>(pipeline);
-  }
-
-  /**
-   * Returns a singleton {@link PCollectionList} containing the given {@link PCollection}.
-   *
-   * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling
-   * {@link #and} on the result.
-   */
-  public static <T> PCollectionList<T> of(PCollection<T> pc) {
-    return new PCollectionList<T>(pc.getPipeline()).and(pc);
-  }
-
-  /**
-   * Returns a {@link PCollectionList} containing the given {@link PCollection PCollections},
-   * in order.
-   *
-   * <p>The argument list cannot be empty.
-   *
-   * <p>All the {@link PCollection PCollections} in the resulting {@link PCollectionList} must be
-   * part of the same {@link Pipeline}.
-   *
-   * <p>Longer PCollectionLists can be created by calling
-   * {@link #and} on the result.
-   */
-  public static <T> PCollectionList<T> of(Iterable<PCollection<T>> pcs) {
-    Iterator<PCollection<T>> pcsIter = pcs.iterator();
-    if (!pcsIter.hasNext()) {
-      throw new IllegalArgumentException(
-          "must either have a non-empty list of PCollections, " +
-          "or must first call empty(Pipeline)");
-    }
-    return new PCollectionList<T>(pcsIter.next().getPipeline()).and(pcs);
-  }
-
-  /**
-   * Returns a new {@link PCollectionList} that has all the {@link PCollection PCollections} of
-   * this {@link PCollectionList} plus the given {@link PCollection} appended to the end.
-   *
-   * <p>All the {@link PCollection PCollections} in the resulting {@link PCollectionList} must be
-   * part of the same {@link Pipeline}.
-   */
-  public PCollectionList<T> and(PCollection<T> pc) {
-    if (pc.getPipeline() != pipeline) {
-      throw new IllegalArgumentException(
-          "PCollections come from different Pipelines");
-    }
-    return new PCollectionList<>(pipeline,
-        new ImmutableList.Builder<PCollection<T>>()
-            .addAll(pcollections)
-            .add(pc)
-            .build());
-  }
-
-  /**
-   * Returns a new {@link PCollectionList} that has all the {@link PCollection PCollections} of
-   * this {@link PCollectionList} plus the given {@link PCollection PCollections} appended to the
-   * end, in order.
-   *
-   * <p>All the {@link PCollections} in the resulting {@link PCollectionList} must be
-   * part of the same {@link Pipeline}.
-   */
-  public PCollectionList<T> and(Iterable<PCollection<T>> pcs) {
-    List<PCollection<T>> copy = new ArrayList<>(pcollections);
-    for (PCollection<T> pc : pcs) {
-      if (pc.getPipeline() != pipeline) {
-        throw new IllegalArgumentException(
-            "PCollections come from different Pipelines");
-      }
-      copy.add(pc);
-    }
-    return new PCollectionList<>(pipeline, copy);
-  }
-
-  /**
-   * Returns the number of {@link PCollection PCollections} in this {@link PCollectionList}.
-   */
-  public int size() {
-    return pcollections.size();
-  }
-
-  /**
-   * Returns the {@link PCollection} at the given index (origin zero).
-   *
-   * @throws IndexOutOfBoundsException if the index is out of the range
-   * {@code [0..size()-1]}.
-   */
-  public PCollection<T> get(int index) {
-    return pcollections.get(index);
-  }
-
-  /**
-   * Returns an immutable List of all the {@link PCollection PCollections} in this
-   * {@link PCollectionList}.
-   */
-  public List<PCollection<T>> getAll() {
-    return pcollections;
-  }
-
-  /**
-   * Like {@link #apply(String, PTransform)} but defaulting to the name
-   * of the {@code PTransform}.
-   */
-  public <OutputT extends POutput> OutputT apply(
-      PTransform<PCollectionList<T>, OutputT> t) {
-    return Pipeline.applyTransform(this, t);
-  }
-
-  /**
-   * Applies the given {@link PTransform} to this input {@link PCollectionList},
-   * using {@code name} to identify this specific application of the transform.
-   * This name is used in various places, including the monitoring UI, logging,
-   * and to stably identify this application node in the job graph.
-   *
-   * @return the output of the applied {@link PTransform}
-   */
-  public <OutputT extends POutput> OutputT apply(
-      String name, PTransform<PCollectionList<T>, OutputT> t) {
-    return Pipeline.applyTransform(name, this, t);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal details below here.
-
-  final Pipeline pipeline;
-  final List<PCollection<T>> pcollections;
-
-  PCollectionList(Pipeline pipeline) {
-    this(pipeline, new ArrayList<PCollection<T>>());
-  }
-
-  PCollectionList(Pipeline pipeline, List<PCollection<T>> pcollections) {
-    this.pipeline = pipeline;
-    this.pcollections = Collections.unmodifiableList(pcollections);
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-    return pipeline;
-  }
-
-  @Override
-  public Collection<? extends PValue> expand() {
-    return pcollections;
-  }
-
-  @Override
-  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
-    int i = 0;
-    for (PCollection<T> pc : pcollections) {
-      pc.recordAsOutput(transform, "out" + i);
-      i++;
-    }
-  }
-
-  @Override
-  public void finishSpecifying() {
-    for (PCollection<T> pc : pcollections) {
-      pc.finishSpecifying();
-    }
-  }
-
-  @Override
-  public void finishSpecifyingOutput() {
-    for (PCollection<T> pc : pcollections) {
-      pc.finishSpecifyingOutput();
-    }
-  }
-}

[30/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
deleted file mode 100644
index 43cd9eb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutionContext.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
-import com.google.cloud.dataflow.sdk.util.BaseExecutionContext;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
-
-/**
- * Execution Context for the {@link InProcessPipelineRunner}.
- *
- * This implementation is not thread safe. A new {@link InProcessExecutionContext} must be created
- * for each thread that requires it.
- */
-class InProcessExecutionContext
-    extends BaseExecutionContext<InProcessExecutionContext.InProcessStepContext> {
-  private final Clock clock;
-  private final Object key;
-  private final CopyOnAccessInMemoryStateInternals<Object> existingState;
-  private final TransformWatermarks watermarks;
-
-  public InProcessExecutionContext(Clock clock, Object key,
-      CopyOnAccessInMemoryStateInternals<Object> existingState, TransformWatermarks watermarks) {
-    this.clock = clock;
-    this.key = key;
-    this.existingState = existingState;
-    this.watermarks = watermarks;
-  }
-
-  @Override
-  protected InProcessStepContext createStepContext(
-      String stepName, String transformName, StateSampler stateSampler) {
-    return new InProcessStepContext(this, stepName, transformName);
-  }
-
-  /**
-   * Step Context for the {@link InProcessPipelineRunner}.
-   */
-  public class InProcessStepContext
-      extends com.google.cloud.dataflow.sdk.util.BaseExecutionContext.StepContext {
-    private CopyOnAccessInMemoryStateInternals<Object> stateInternals;
-    private InProcessTimerInternals timerInternals;
-
-    public InProcessStepContext(
-        ExecutionContext executionContext, String stepName, String transformName) {
-      super(executionContext, stepName, transformName);
-    }
-
-    @Override
-    public CopyOnAccessInMemoryStateInternals<Object> stateInternals() {
-      if (stateInternals == null) {
-        stateInternals = CopyOnAccessInMemoryStateInternals.withUnderlying(key, existingState);
-      }
-      return stateInternals;
-    }
-
-    @Override
-    public InProcessTimerInternals timerInternals() {
-      if (timerInternals == null) {
-        timerInternals =
-            InProcessTimerInternals.create(clock, watermarks, TimerUpdate.builder(key));
-      }
-      return timerInternals;
-    }
-
-    /**
-     * Commits the state of this step, and returns the committed state. If the step has not
-     * accessed any state, return null.
-     */
-    public CopyOnAccessInMemoryStateInternals<?> commitState() {
-      if (stateInternals != null) {
-        return stateInternals.commit();
-      }
-      return null;
-    }
-
-    /**
-     * Gets the timer update of the {@link TimerInternals} of this {@link InProcessStepContext},
-     * which is empty if the {@link TimerInternals} were never accessed.
-     */
-    public TimerUpdate getTimerUpdate() {
-      if (timerInternals == null) {
-        return TimerUpdate.empty();
-      }
-      return timerInternals.getTimerUpdate();
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutor.java
deleted file mode 100644
index 7b60bca..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessExecutor.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-import java.util.Collection;
-
-/**
- * An executor that schedules and executes {@link AppliedPTransform AppliedPTransforms} for both
- * source and intermediate {@link PTransform PTransforms}.
- */
-interface InProcessExecutor {
-  /**
-   * Starts this executor. The provided collection is the collection of root transforms to
-   * initially schedule.
-   *
-   * @param rootTransforms
-   */
-  void start(Collection<AppliedPTransform<?, ?, ?>> rootTransforms);
-
-  /**
-   * Blocks until the job being executed enters a terminal state. A job is completed after all
-   * root {@link AppliedPTransform AppliedPTransforms} have completed, and all
-   * {@link CommittedBundle Bundles} have been consumed. Jobs may also terminate abnormally.
-   *
-   * @throws Throwable whenever an executor thread throws anything, transfers the throwable to the
-   *                   waiting thread and rethrows it
-   */
-  void awaitCompletion() throws Throwable;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java
deleted file mode 100644
index 5ee0e88..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineOptions.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.Hidden;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Validation.Required;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-/**
- * Options that can be used to configure the {@link InProcessPipelineRunner}.
- */
-public interface InProcessPipelineOptions extends PipelineOptions, ApplicationNameOptions {
-  /**
-   * Gets the {@link ExecutorServiceFactory} to use to create instances of {@link ExecutorService}
-   * to execute {@link PTransform PTransforms}.
-   *
-   * <p>Note that {@link ExecutorService ExecutorServices} returned by the factory must ensure that
-   * it cannot enter a state in which it will not schedule additional pending work unless currently
-   * scheduled work completes, as this may cause the {@link Pipeline} to cease processing.
-   *
-   * <p>Defaults to a {@link CachedThreadPoolExecutorServiceFactory}, which produces instances of
-   * {@link Executors#newCachedThreadPool()}.
-   */
-  @JsonIgnore
-  @Required
-  @Hidden
-  @Default.InstanceFactory(CachedThreadPoolExecutorServiceFactory.class)
-  ExecutorServiceFactory getExecutorServiceFactory();
-
-  void setExecutorServiceFactory(ExecutorServiceFactory executorService);
-
-  /**
-   * Gets the {@link Clock} used by this pipeline. The clock is used in place of accessing the
-   * system time when time values are required by the evaluator.
-   */
-  @Default.InstanceFactory(NanosOffsetClock.Factory.class)
-  @JsonIgnore
-  @Required
-  @Hidden
-  @Description(
-      "The processing time source used by the pipeline. When the current time is "
-          + "needed by the evaluator, the result of clock#now() is used.")
-  Clock getClock();
-
-  void setClock(Clock clock);
-
-  @Default.Boolean(false)
-  @Description(
-      "If the pipeline should shut down producers which have reached the maximum "
-          + "representable watermark. If this is set to true, a pipeline in which all PTransforms "
-          + "have reached the maximum watermark will be shut down, even if there are unbounded "
-          + "sources that could produce additional (late) data. By default, if the pipeline "
-          + "contains any unbounded PCollections, it will run until explicitly shut down.")
-  boolean isShutdownUnboundedProducersWithMaxWatermark();
-
-  void setShutdownUnboundedProducersWithMaxWatermark(boolean shutdown);
-
-  @Default.Boolean(true)
-  @Description(
-      "If the pipeline should block awaiting completion of the pipeline. If set to true, "
-          + "a call to Pipeline#run() will block until all PTransforms are complete. Otherwise, "
-          + "the Pipeline will execute asynchronously. If set to false, the completion of the "
-          + "pipeline can be awaited on by use of InProcessPipelineResult#awaitCompletion().")
-  boolean isBlockOnRun();
-
-  void setBlockOnRun(boolean b);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
deleted file mode 100644
index a1c8756..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessPipelineRunner.java
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.AggregatorPipelineExtractor;
-import com.google.cloud.dataflow.sdk.runners.AggregatorRetrievalException;
-import com.google.cloud.dataflow.sdk.runners.AggregatorValues;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.inprocess.GroupByKeyEvaluatorFactory.InProcessGroupByKey;
-import com.google.cloud.dataflow.sdk.runners.inprocess.GroupByKeyEvaluatorFactory.InProcessGroupByKeyOnly;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.util.MapAggregatorValues;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableSet;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
-
-import javax.annotation.Nullable;
-
-/**
- * An In-Memory implementation of the Dataflow Programming Model. Supports Unbounded
- * {@link PCollection PCollections}.
- */
-@Experimental
-public class InProcessPipelineRunner
-    extends PipelineRunner<InProcessPipelineRunner.InProcessPipelineResult> {
-  /**
-   * The default set of transform overrides to use in the {@link InProcessPipelineRunner}.
-   *
-   * <p>A transform override must have a single-argument constructor that takes an instance of the
-   * type of transform it is overriding.
-   */
-  @SuppressWarnings("rawtypes")
-  private static Map<Class<? extends PTransform>, Class<? extends PTransform>>
-      defaultTransformOverrides =
-          ImmutableMap.<Class<? extends PTransform>, Class<? extends PTransform>>builder()
-              .put(Create.Values.class, InProcessCreate.class)
-              .put(GroupByKey.class, InProcessGroupByKey.class)
-              .put(
-                  CreatePCollectionView.class,
-                  ViewEvaluatorFactory.InProcessCreatePCollectionView.class)
-              .build();
-
-  /**
-   * Part of a {@link PCollection}. Elements are output to a bundle, which will cause them to be
-   * executed by {@link PTransform PTransforms} that consume the {@link PCollection} this bundle is
-   * a part of at a later point. This is an uncommitted bundle and can have elements added to it.
-   *
-   * @param <T> the type of elements that can be added to this bundle
-   */
-  public static interface UncommittedBundle<T> {
-    /**
-     * Returns the PCollection that the elements of this {@link UncommittedBundle} belong to.
-     */
-    PCollection<T> getPCollection();
-
-    /**
-     * Outputs an element to this bundle.
-     *
-     * @param element the element to add to this bundle
-     * @return this bundle
-     */
-    UncommittedBundle<T> add(WindowedValue<T> element);
-
-    /**
-     * Commits this {@link UncommittedBundle}, returning an immutable {@link CommittedBundle}
-     * containing all of the elements that were added to it. The {@link #add(WindowedValue)} method
-     * will throw an {@link IllegalStateException} if called after a call to commit.
-     * @param synchronizedProcessingTime the synchronized processing time at which this bundle was
-     *                                   committed
-     */
-    CommittedBundle<T> commit(Instant synchronizedProcessingTime);
-  }
-
-  /**
-   * Part of a {@link PCollection}. Elements are output to an {@link UncommittedBundle}, which will
-   * eventually committed. Committed elements are executed by the {@link PTransform PTransforms}
-   * that consume the {@link PCollection} this bundle is
-   * a part of at a later point.
-   * @param <T> the type of elements contained within this bundle
-   */
-  public static interface CommittedBundle<T> {
-    /**
-     * Returns the PCollection that the elements of this bundle belong to.
-     */
-    PCollection<T> getPCollection();
-
-    /**
-     * Returns whether this bundle is keyed. A bundle that is part of a {@link PCollection} that
-     * occurs after a {@link GroupByKey} is keyed by the result of the last {@link GroupByKey}.
-     */
-    boolean isKeyed();
-
-    /**
-     * Returns the (possibly null) key that was output in the most recent {@link GroupByKey} in the
-     * execution of this bundle.
-     */
-    @Nullable
-    Object getKey();
-
-    /**
-     * Returns an {@link Iterable} containing all of the elements that have been added to this
-     * {@link CommittedBundle}.
-     */
-    Iterable<WindowedValue<T>> getElements();
-
-    /**
-     * Returns the processing time output watermark at the time the producing {@link PTransform}
-     * committed this bundle. Downstream synchronized processing time watermarks cannot progress
-     * past this point before consuming this bundle.
-     *
-     * <p>This value is no greater than the earliest incomplete processing time or synchronized
-     * processing time {@link TimerData timer} at the time this bundle was committed, including any
-     * timers that fired to produce this bundle.
-     */
-    Instant getSynchronizedProcessingOutputWatermark();
-  }
-
-  /**
-   * A {@link PCollectionViewWriter} is responsible for writing contents of a {@link PCollection} to
-   * a storage mechanism that can be read from while constructing a {@link PCollectionView}.
-   * @param <ElemT> the type of elements the input {@link PCollection} contains.
-   * @param <ViewT> the type of the PCollectionView this writer writes to.
-   */
-  public static interface PCollectionViewWriter<ElemT, ViewT> {
-    void add(Iterable<WindowedValue<ElemT>> values);
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////////////////
-  private final InProcessPipelineOptions options;
-
-  public static InProcessPipelineRunner fromOptions(PipelineOptions options) {
-    return new InProcessPipelineRunner(options.as(InProcessPipelineOptions.class));
-  }
-
-  private InProcessPipelineRunner(InProcessPipelineOptions options) {
-    this.options = options;
-  }
-
-  /**
-   * Returns the {@link PipelineOptions} used to create this {@link InProcessPipelineRunner}.
-   */
-  public InProcessPipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  @Override
-  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
-      PTransform<InputT, OutputT> transform, InputT input) {
-    Class<?> overrideClass = defaultTransformOverrides.get(transform.getClass());
-    if (overrideClass != null) {
-      // It is the responsibility of whoever constructs overrides to ensure this is type safe.
-      @SuppressWarnings("unchecked")
-      Class<PTransform<InputT, OutputT>> transformClass =
-          (Class<PTransform<InputT, OutputT>>) transform.getClass();
-
-      @SuppressWarnings("unchecked")
-      Class<PTransform<InputT, OutputT>> customTransformClass =
-          (Class<PTransform<InputT, OutputT>>) overrideClass;
-
-      PTransform<InputT, OutputT> customTransform =
-          InstanceBuilder.ofType(customTransformClass)
-          .withArg(transformClass, transform)
-          .build();
-
-      // This overrides the contents of the apply method without changing the TransformTreeNode that
-      // is generated by the PCollection application.
-      return super.apply(customTransform, input);
-    } else {
-      return super.apply(transform, input);
-    }
-  }
-
-  @Override
-  public InProcessPipelineResult run(Pipeline pipeline) {
-    ConsumerTrackingPipelineVisitor consumerTrackingVisitor = new ConsumerTrackingPipelineVisitor();
-    pipeline.traverseTopologically(consumerTrackingVisitor);
-    for (PValue unfinalized : consumerTrackingVisitor.getUnfinalizedPValues()) {
-      unfinalized.finishSpecifying();
-    }
-    @SuppressWarnings("rawtypes")
-    KeyedPValueTrackingVisitor keyedPValueVisitor =
-        KeyedPValueTrackingVisitor.create(
-            ImmutableSet.<Class<? extends PTransform>>of(
-                GroupByKey.class, InProcessGroupByKeyOnly.class));
-    pipeline.traverseTopologically(keyedPValueVisitor);
-
-    InProcessEvaluationContext context =
-        InProcessEvaluationContext.create(
-            getPipelineOptions(),
-            consumerTrackingVisitor.getRootTransforms(),
-            consumerTrackingVisitor.getValueToConsumers(),
-            consumerTrackingVisitor.getStepNames(),
-            consumerTrackingVisitor.getViews());
-
-    // independent executor service for each run
-    ExecutorService executorService =
-        context.getPipelineOptions().getExecutorServiceFactory().create();
-    InProcessExecutor executor =
-        ExecutorServiceParallelExecutor.create(
-            executorService,
-            consumerTrackingVisitor.getValueToConsumers(),
-            keyedPValueVisitor.getKeyedPValues(),
-            TransformEvaluatorRegistry.defaultRegistry(),
-            context);
-    executor.start(consumerTrackingVisitor.getRootTransforms());
-
-    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
-        new AggregatorPipelineExtractor(pipeline).getAggregatorSteps();
-    InProcessPipelineResult result =
-        new InProcessPipelineResult(executor, context, aggregatorSteps);
-    if (options.isBlockOnRun()) {
-      try {
-        result.awaitCompletion();
-      } catch (UserCodeException userException) {
-        throw new PipelineExecutionException(userException.getCause());
-      } catch (Throwable t) {
-        Throwables.propagate(t);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * The result of running a {@link Pipeline} with the {@link InProcessPipelineRunner}.
-   *
-   * Throws {@link UnsupportedOperationException} for all methods.
-   */
-  public static class InProcessPipelineResult implements PipelineResult {
-    private final InProcessExecutor executor;
-    private final InProcessEvaluationContext evaluationContext;
-    private final Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps;
-    private State state;
-
-    private InProcessPipelineResult(
-        InProcessExecutor executor,
-        InProcessEvaluationContext evaluationContext,
-        Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps) {
-      this.executor = executor;
-      this.evaluationContext = evaluationContext;
-      this.aggregatorSteps = aggregatorSteps;
-      // Only ever constructed after the executor has started.
-      this.state = State.RUNNING;
-    }
-
-    @Override
-    public State getState() {
-      return state;
-    }
-
-    @Override
-    public <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator)
-        throws AggregatorRetrievalException {
-      CounterSet counters = evaluationContext.getCounters();
-      Collection<PTransform<?, ?>> steps = aggregatorSteps.get(aggregator);
-      Map<String, T> stepValues = new HashMap<>();
-      for (AppliedPTransform<?, ?, ?> transform : evaluationContext.getSteps()) {
-        if (steps.contains(transform.getTransform())) {
-          String stepName =
-              String.format(
-                  "user-%s-%s", evaluationContext.getStepName(transform), aggregator.getName());
-          Counter<T> counter = (Counter<T>) counters.getExistingCounter(stepName);
-          if (counter != null) {
-            stepValues.put(transform.getFullName(), counter.getAggregate());
-          }
-        }
-      }
-      return new MapAggregatorValues<>(stepValues);
-    }
-
-    /**
-     * Blocks until the {@link Pipeline} execution represented by this
-     * {@link InProcessPipelineResult} is complete, returning the terminal state.
-     *
-     * <p>If the pipeline terminates abnormally by throwing an exception, this will rethrow the
-     * exception. Future calls to {@link #getState()} will return
-     * {@link com.google.cloud.dataflow.sdk.PipelineResult.State#FAILED}.
-     *
-     * <p>NOTE: if the {@link Pipeline} contains an {@link IsBounded#UNBOUNDED unbounded}
-     * {@link PCollection}, and the {@link PipelineRunner} was created with
-     * {@link InProcessPipelineOptions#isShutdownUnboundedProducersWithMaxWatermark()} set to false,
-     * this method will never return.
-     *
-     * See also {@link InProcessExecutor#awaitCompletion()}.
-     */
-    public State awaitCompletion() throws Throwable {
-      if (!state.isTerminal()) {
-        try {
-          executor.awaitCompletion();
-          state = State.DONE;
-        } catch (InterruptedException e) {
-          Thread.currentThread().interrupt();
-          throw e;
-        } catch (Throwable t) {
-          state = State.FAILED;
-          throw t;
-        }
-      }
-      return state;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java
deleted file mode 100644
index 37c9fcf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessSideInputContainer.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.PCollectionViewWindow;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Throwables;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
-import com.google.common.util.concurrent.SettableFuture;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ExecutionException;
-
-import javax.annotation.Nullable;
-
-/**
- * An in-process container for {@link PCollectionView PCollectionViews}, which provides methods for
- * constructing {@link SideInputReader SideInputReaders} which block until a side input is
- * available and writing to a {@link PCollectionView}.
- */
-class InProcessSideInputContainer {
-  private final InProcessEvaluationContext evaluationContext;
-  private final Collection<PCollectionView<?>> containedViews;
-  private final LoadingCache<PCollectionViewWindow<?>,
-      SettableFuture<Iterable<? extends WindowedValue<?>>>> viewByWindows;
-
-  /**
-   * Create a new {@link InProcessSideInputContainer} with the provided views and the provided
-   * context.
-   */
-  public static InProcessSideInputContainer create(
-      InProcessEvaluationContext context, Collection<PCollectionView<?>> containedViews) {
-    CacheLoader<PCollectionViewWindow<?>, SettableFuture<Iterable<? extends WindowedValue<?>>>>
-        loader = new CacheLoader<PCollectionViewWindow<?>,
-            SettableFuture<Iterable<? extends WindowedValue<?>>>>() {
-          @Override
-          public SettableFuture<Iterable<? extends WindowedValue<?>>> load(
-              PCollectionViewWindow<?> view) {
-            return SettableFuture.create();
-          }
-        };
-    LoadingCache<PCollectionViewWindow<?>, SettableFuture<Iterable<? extends WindowedValue<?>>>>
-        viewByWindows = CacheBuilder.newBuilder().build(loader);
-    return new InProcessSideInputContainer(context, containedViews, viewByWindows);
-  }
-
-  private InProcessSideInputContainer(InProcessEvaluationContext context,
-      Collection<PCollectionView<?>> containedViews,
-      LoadingCache<PCollectionViewWindow<?>, SettableFuture<Iterable<? extends WindowedValue<?>>>>
-      viewByWindows) {
-    this.evaluationContext = context;
-    this.containedViews = ImmutableSet.copyOf(containedViews);
-    this.viewByWindows = viewByWindows;
-  }
-
-  /**
-   * Return a view of this {@link InProcessSideInputContainer} that contains only the views in
-   * the provided argument. The returned {@link InProcessSideInputContainer} is unmodifiable without
-   * casting, but will change as this {@link InProcessSideInputContainer} is modified.
-   */
-  public SideInputReader createReaderForViews(Collection<PCollectionView<?>> newContainedViews) {
-    if (!containedViews.containsAll(newContainedViews)) {
-      Set<PCollectionView<?>> currentlyContained = ImmutableSet.copyOf(containedViews);
-      Set<PCollectionView<?>> newRequested = ImmutableSet.copyOf(newContainedViews);
-      throw new IllegalArgumentException("Can't create a SideInputReader with unknown views "
-          + Sets.difference(newRequested, currentlyContained));
-    }
-    return new SideInputContainerSideInputReader(newContainedViews);
-  }
-
-  /**
-   * Write the provided values to the provided view.
-   *
-   * <p>The windowed values are first exploded, then for each window the pane is determined. For
-   * each window, if the pane is later than the current pane stored within this container, write
-   * all of the values to the container as the new values of the {@link PCollectionView}.
-   *
-   * <p>The provided iterable is expected to contain only a single window and pane.
-   */
-  public void write(PCollectionView<?> view, Iterable<? extends WindowedValue<?>> values) {
-    Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow =
-        indexValuesByWindow(values);
-    for (Map.Entry<BoundedWindow, Collection<WindowedValue<?>>> windowValues :
-        valuesPerWindow.entrySet()) {
-      updatePCollectionViewWindowValues(view, windowValues.getKey(), windowValues.getValue());
-    }
-  }
-
-  /**
-   * Index the provided values by all {@link BoundedWindow windows} in which they appear.
-   */
-  private Map<BoundedWindow, Collection<WindowedValue<?>>> indexValuesByWindow(
-      Iterable<? extends WindowedValue<?>> values) {
-    Map<BoundedWindow, Collection<WindowedValue<?>>> valuesPerWindow = new HashMap<>();
-    for (WindowedValue<?> value : values) {
-      for (BoundedWindow window : value.getWindows()) {
-        Collection<WindowedValue<?>> windowValues = valuesPerWindow.get(window);
-        if (windowValues == null) {
-          windowValues = new ArrayList<>();
-          valuesPerWindow.put(window, windowValues);
-        }
-        windowValues.add(value);
-      }
-    }
-    return valuesPerWindow;
-  }
-
-  /**
-   * Set the value of the {@link PCollectionView} in the {@link BoundedWindow} to be based on the
-   * specified values, if the values are part of a later pane than currently exist within the
-   * {@link PCollectionViewWindow}.
-   */
-  private void updatePCollectionViewWindowValues(
-      PCollectionView<?> view, BoundedWindow window, Collection<WindowedValue<?>> windowValues) {
-    PCollectionViewWindow<?> windowedView = PCollectionViewWindow.of(view, window);
-    SettableFuture<Iterable<? extends WindowedValue<?>>> future = null;
-    try {
-      future = viewByWindows.get(windowedView);
-      if (future.isDone()) {
-        Iterator<? extends WindowedValue<?>> existingValues = future.get().iterator();
-        PaneInfo newPane = windowValues.iterator().next().getPane();
-        // The current value may have no elements, if no elements were produced for the window,
-        // but we are recieving late data.
-        if (!existingValues.hasNext()
-            || newPane.getIndex() > existingValues.next().getPane().getIndex()) {
-          viewByWindows.invalidate(windowedView);
-          viewByWindows.get(windowedView).set(windowValues);
-        }
-      } else {
-        future.set(windowValues);
-      }
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-      if (future != null && !future.isDone()) {
-        future.set(Collections.<WindowedValue<?>>emptyList());
-      }
-    } catch (ExecutionException e) {
-      Throwables.propagate(e.getCause());
-    }
-  }
-
-  private final class SideInputContainerSideInputReader implements SideInputReader {
-    private final Collection<PCollectionView<?>> readerViews;
-
-    private SideInputContainerSideInputReader(Collection<PCollectionView<?>> readerViews) {
-      this.readerViews = ImmutableSet.copyOf(readerViews);
-    }
-
-    @Override
-    @Nullable
-    public <T> T get(final PCollectionView<T> view, final BoundedWindow window) {
-      checkArgument(
-          readerViews.contains(view), "calling get(PCollectionView) with unknown view: " + view);
-      PCollectionViewWindow<T> windowedView = PCollectionViewWindow.of(view, window);
-      try {
-        final SettableFuture<Iterable<? extends WindowedValue<?>>> future =
-            viewByWindows.get(windowedView);
-
-        WindowingStrategy<?, ?> windowingStrategy = view.getWindowingStrategyInternal();
-        evaluationContext.scheduleAfterOutputWouldBeProduced(
-            view, window, windowingStrategy, new Runnable() {
-              @Override
-              public void run() {
-                // The requested window has closed without producing elements, so reflect that in
-                // the PCollectionView. If set has already been called, will do nothing.
-                future.set(Collections.<WindowedValue<?>>emptyList());
-          }
-
-          @Override
-          public String toString() {
-            return MoreObjects.toStringHelper("InProcessSideInputContainerEmptyCallback")
-                .add("view", view)
-                .add("window", window)
-                .toString();
-          }
-        });
-        // Safe covariant cast
-        @SuppressWarnings("unchecked")
-        Iterable<WindowedValue<?>> values = (Iterable<WindowedValue<?>>) future.get();
-        return view.fromIterableInternal(values);
-      } catch (InterruptedException e) {
-        Thread.currentThread().interrupt();
-        return null;
-      } catch (ExecutionException e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    @Override
-    public <T> boolean contains(PCollectionView<T> view) {
-      return readerViews.contains(view);
-    }
-
-    @Override
-    public boolean isEmpty() {
-      return readerViews.isEmpty();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternals.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternals.java
deleted file mode 100644
index 06ba7b8..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTimerInternals.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate.TimerUpdateBuilder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-
-import org.joda.time.Instant;
-
-import javax.annotation.Nullable;
-
-/**
- * An implementation of {@link TimerInternals} where all relevant data exists in memory.
- */
-public class InProcessTimerInternals implements TimerInternals {
-  private final Clock processingTimeClock;
-  private final TransformWatermarks watermarks;
-  private final TimerUpdateBuilder timerUpdateBuilder;
-
-  public static InProcessTimerInternals create(
-      Clock clock, TransformWatermarks watermarks, TimerUpdateBuilder timerUpdateBuilder) {
-    return new InProcessTimerInternals(clock, watermarks, timerUpdateBuilder);
-  }
-
-  private InProcessTimerInternals(
-      Clock clock, TransformWatermarks watermarks, TimerUpdateBuilder timerUpdateBuilder) {
-    this.processingTimeClock = clock;
-    this.watermarks = watermarks;
-    this.timerUpdateBuilder = timerUpdateBuilder;
-  }
-
-  @Override
-  public void setTimer(TimerData timerKey) {
-    timerUpdateBuilder.setTimer(timerKey);
-  }
-
-  @Override
-  public void deleteTimer(TimerData timerKey) {
-    timerUpdateBuilder.deletedTimer(timerKey);
-  }
-
-  public TimerUpdate getTimerUpdate() {
-    return timerUpdateBuilder.build();
-  }
-
-  @Override
-  public Instant currentProcessingTime() {
-    return processingTimeClock.now();
-  }
-
-  @Override
-  @Nullable
-  public Instant currentSynchronizedProcessingTime() {
-    return watermarks.getSynchronizedProcessingInputTime();
-  }
-
-  @Override
-  @Nullable
-  public Instant currentInputWatermarkTime() {
-    return watermarks.getInputWatermark();
-  }
-
-  @Override
-  @Nullable
-  public Instant currentOutputWatermarkTime() {
-    return watermarks.getOutputWatermark();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
deleted file mode 100644
index 3f9e94a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessTransformResult.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
-
-import org.joda.time.Instant;
-
-import javax.annotation.Nullable;
-
-/**
- * The result of evaluating an {@link AppliedPTransform} with a {@link TransformEvaluator}.
- */
-public interface InProcessTransformResult {
-  /**
-   * Returns the {@link AppliedPTransform} that produced this result.
-   */
-  AppliedPTransform<?, ?, ?> getTransform();
-
-  /**
-   * Returns the {@link UncommittedBundle (uncommitted) Bundles} output by this transform. These
-   * will be committed by the evaluation context as part of completing this result.
-   */
-  Iterable<? extends UncommittedBundle<?>> getOutputBundles();
-
-  /**
-   * Returns the {@link CounterSet} used by this {@link PTransform}, or null if this transform did
-   * not use a {@link CounterSet}.
-   */
-  @Nullable CounterSet getCounters();
-
-  /**
-   * Returns the Watermark Hold for the transform at the time this result was produced.
-   *
-   * If the transform does not set any watermark hold, returns
-   * {@link BoundedWindow#TIMESTAMP_MAX_VALUE}.
-   */
-  Instant getWatermarkHold();
-
-  /**
-   * Returns the State used by the transform.
-   *
-   * If this evaluation did not access state, this may return null.
-   */
-  CopyOnAccessInMemoryStateInternals<?> getState();
-
-  /**
-   * Returns a TimerUpdateBuilder that was produced as a result of this evaluation. If the
-   * evaluation was triggered due to the delivery of one or more timers, those timers must be added
-   * to the builder before it is complete.
-   *
-   * <p>If this evaluation did not add or remove any timers, returns an empty TimerUpdate.
-   */
-  TimerUpdate getTimerUpdate();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/KeyedPValueTrackingVisitor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/KeyedPValueTrackingVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/KeyedPValueTrackingVisitor.java
deleted file mode 100644
index 23a8c0f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/KeyedPValueTrackingVisitor.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.runners.TransformTreeNode;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.values.PValue;
-
-import java.util.HashSet;
-import java.util.Set;
-
-/**
- * A pipeline visitor that tracks all keyed {@link PValue PValues}. A {@link PValue} is keyed if it
- * is the result of a {@link PTransform} that produces keyed outputs. A {@link PTransform} that
- * produces keyed outputs is assumed to colocate output elements that share a key.
- *
- * <p>All {@link GroupByKey} transforms, or their runner-specific implementation primitive, produce
- * keyed output.
- */
-// TODO: Handle Key-preserving transforms when appropriate and more aggressively make PTransforms
-// unkeyed
-class KeyedPValueTrackingVisitor implements PipelineVisitor {
-  @SuppressWarnings("rawtypes")
-  private final Set<Class<? extends PTransform>> producesKeyedOutputs;
-  private final Set<PValue> keyedValues;
-  private boolean finalized;
-
-  public static KeyedPValueTrackingVisitor create(
-      @SuppressWarnings("rawtypes") Set<Class<? extends PTransform>> producesKeyedOutputs) {
-    return new KeyedPValueTrackingVisitor(producesKeyedOutputs);
-  }
-
-  private KeyedPValueTrackingVisitor(
-      @SuppressWarnings("rawtypes") Set<Class<? extends PTransform>> producesKeyedOutputs) {
-    this.producesKeyedOutputs = producesKeyedOutputs;
-    this.keyedValues = new HashSet<>();
-  }
-
-  @Override
-  public void enterCompositeTransform(TransformTreeNode node) {
-    checkState(
-        !finalized,
-        "Attempted to use a %s that has already been finalized on a pipeline (visiting node %s)",
-        KeyedPValueTrackingVisitor.class.getSimpleName(),
-        node);
-  }
-
-  @Override
-  public void leaveCompositeTransform(TransformTreeNode node) {
-    checkState(
-        !finalized,
-        "Attempted to use a %s that has already been finalized on a pipeline (visiting node %s)",
-        KeyedPValueTrackingVisitor.class.getSimpleName(),
-        node);
-    if (node.isRootNode()) {
-      finalized = true;
-    } else if (producesKeyedOutputs.contains(node.getTransform().getClass())) {
-      keyedValues.addAll(node.getExpandedOutputs());
-    }
-  }
-
-  @Override
-  public void visitTransform(TransformTreeNode node) {}
-
-  @Override
-  public void visitValue(PValue value, TransformTreeNode producer) {
-    if (producesKeyedOutputs.contains(producer.getTransform().getClass())) {
-      keyedValues.addAll(value.expand());
-    }
-  }
-
-  public Set<PValue> getKeyedPValues() {
-    checkState(
-        finalized, "can't call getKeyedPValues before a Pipeline has been completely traversed");
-    return keyedValues;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/NanosOffsetClock.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/NanosOffsetClock.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/NanosOffsetClock.java
deleted file mode 100644
index 958e26d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/NanosOffsetClock.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import org.joda.time.Instant;
-
-import java.util.concurrent.TimeUnit;
-
-/**
- * A {@link Clock} that uses {@link System#nanoTime()} to track the progress of time.
- */
-public class NanosOffsetClock implements Clock {
-  private final long baseMillis;
-  private final long nanosAtBaseMillis;
-
-  public static NanosOffsetClock create() {
-    return new NanosOffsetClock();
-  }
-
-  private NanosOffsetClock() {
-    baseMillis = System.currentTimeMillis();
-    nanosAtBaseMillis = System.nanoTime();
-  }
-
-  @Override
-  public Instant now() {
-    return new Instant(
-        baseMillis + (TimeUnit.MILLISECONDS.convert(
-            System.nanoTime() - nanosAtBaseMillis, TimeUnit.NANOSECONDS)));
-  }
-
-  /**
-   * Creates instances of {@link NanosOffsetClock}.
-   */
-  public static class Factory implements DefaultValueFactory<Clock> {
-    @Override
-    public Clock create(PipelineOptions options) {
-      return new NanosOffsetClock();
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
deleted file mode 100644
index 2a21e8c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoInProcessEvaluator.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessExecutionContext.InProcessStepContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-class ParDoInProcessEvaluator<T> implements TransformEvaluator<T> {
-  private final DoFnRunner<T, ?> fnRunner;
-  private final AppliedPTransform<PCollection<T>, ?, ?> transform;
-  private final CounterSet counters;
-  private final Collection<UncommittedBundle<?>> outputBundles;
-  private final InProcessStepContext stepContext;
-
-  public ParDoInProcessEvaluator(
-      DoFnRunner<T, ?> fnRunner,
-      AppliedPTransform<PCollection<T>, ?, ?> transform,
-      CounterSet counters,
-      Collection<UncommittedBundle<?>> outputBundles,
-      InProcessStepContext stepContext) {
-    this.fnRunner = fnRunner;
-    this.transform = transform;
-    this.counters = counters;
-    this.outputBundles = outputBundles;
-    this.stepContext = stepContext;
-  }
-
-  @Override
-  public void processElement(WindowedValue<T> element) {
-    fnRunner.processElement(element);
-  }
-
-  @Override
-  public InProcessTransformResult finishBundle() {
-    fnRunner.finishBundle();
-    StepTransformResult.Builder resultBuilder;
-    CopyOnAccessInMemoryStateInternals<?> state = stepContext.commitState();
-    if (state != null) {
-      resultBuilder =
-          StepTransformResult.withHold(transform, state.getEarliestWatermarkHold())
-              .withState(state);
-    } else {
-      resultBuilder = StepTransformResult.withoutHold(transform);
-    }
-    return resultBuilder
-        .addOutput(outputBundles)
-        .withTimerUpdate(stepContext.getTimerUpdate())
-        .withCounters(counters)
-        .build();
-  }
-
-  static class BundleOutputManager implements OutputManager {
-    private final Map<TupleTag<?>, UncommittedBundle<?>> bundles;
-    private final Map<TupleTag<?>, List<?>> undeclaredOutputs;
-
-    public static BundleOutputManager create(Map<TupleTag<?>, UncommittedBundle<?>> outputBundles) {
-      return new BundleOutputManager(outputBundles);
-    }
-
-    private BundleOutputManager(Map<TupleTag<?>, UncommittedBundle<?>> bundles) {
-      this.bundles = bundles;
-      undeclaredOutputs = new HashMap<>();
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-      @SuppressWarnings("rawtypes")
-      UncommittedBundle bundle = bundles.get(tag);
-      if (bundle == null) {
-        List undeclaredContents = undeclaredOutputs.get(tag);
-        if (undeclaredContents == null) {
-          undeclaredContents = new ArrayList<T>();
-          undeclaredOutputs.put(tag, undeclaredContents);
-        }
-        undeclaredContents.add(output);
-      } else {
-        bundle.add(output);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
deleted file mode 100644
index 659bdd2..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoMultiEvaluatorFactory.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessExecutionContext.InProcessStepContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.ParDoInProcessEvaluator.BundleOutputManager;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo.BoundMulti;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
- * {@link BoundMulti} primitive {@link PTransform}.
- */
-class ParDoMultiEvaluatorFactory implements TransformEvaluatorFactory {
-  @Override
-  public <T> TransformEvaluator<T> forApplication(
-      AppliedPTransform<?, ?, ?> application,
-      CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
-    @SuppressWarnings({"cast", "unchecked", "rawtypes"})
-    TransformEvaluator<T> evaluator = (TransformEvaluator<T>) createMultiEvaluator(
-            (AppliedPTransform) application, inputBundle, evaluationContext);
-    return evaluator;
-  }
-
-  private static <InT, OuT> ParDoInProcessEvaluator<InT> createMultiEvaluator(
-      AppliedPTransform<PCollection<InT>, PCollectionTuple, BoundMulti<InT, OuT>> application,
-      CommittedBundle<InT> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
-    PCollectionTuple output = application.getOutput();
-    Map<TupleTag<?>, PCollection<?>> outputs = output.getAll();
-    Map<TupleTag<?>, UncommittedBundle<?>> outputBundles = new HashMap<>();
-    for (Map.Entry<TupleTag<?>, PCollection<?>> outputEntry : outputs.entrySet()) {
-      outputBundles.put(
-          outputEntry.getKey(),
-          evaluationContext.createBundle(inputBundle, outputEntry.getValue()));
-    }
-    InProcessExecutionContext executionContext =
-        evaluationContext.getExecutionContext(application, inputBundle.getKey());
-    String stepName = evaluationContext.getStepName(application);
-    InProcessStepContext stepContext =
-        executionContext.getOrCreateStepContext(stepName, stepName, null);
-
-    CounterSet counters = evaluationContext.createCounterSet();
-
-    DoFn<InT, OuT> fn = application.getTransform().getFn();
-    DoFnRunner<InT, OuT> runner =
-        DoFnRunners.createDefault(
-            evaluationContext.getPipelineOptions(),
-            fn,
-            evaluationContext.createSideInputReader(application.getTransform().getSideInputs()),
-            BundleOutputManager.create(outputBundles),
-            application.getTransform().getMainOutputTag(),
-            application.getTransform().getSideOutputTags().getAll(),
-            stepContext,
-            counters.getAddCounterMutator(),
-            application.getInput().getWindowingStrategy());
-
-    runner.startBundle();
-
-    return new ParDoInProcessEvaluator<>(
-        runner, application, counters, outputBundles.values(), stepContext);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
deleted file mode 100644
index e9bc1f7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ParDoSingleEvaluatorFactory.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessExecutionContext.InProcessStepContext;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.ParDoInProcessEvaluator.BundleOutputManager;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo.Bound;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.Collections;
-
-/**
- * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
- * {@link Bound ParDo.Bound} primitive {@link PTransform}.
- */
-class ParDoSingleEvaluatorFactory implements TransformEvaluatorFactory {
-  @Override
-  public <T> TransformEvaluator<T> forApplication(
-      final AppliedPTransform<?, ?, ?> application,
-      CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
-    @SuppressWarnings({"cast", "unchecked", "rawtypes"})
-    TransformEvaluator<T> evaluator = (TransformEvaluator<T>) createSingleEvaluator(
-            (AppliedPTransform) application, inputBundle, evaluationContext);
-    return evaluator;
-  }
-
-  private static <InputT, OutputT> ParDoInProcessEvaluator<InputT> createSingleEvaluator(
-      @SuppressWarnings("rawtypes") AppliedPTransform<PCollection<InputT>, PCollection<OutputT>,
-          Bound<InputT, OutputT>> application,
-      CommittedBundle<InputT> inputBundle, InProcessEvaluationContext evaluationContext) {
-    TupleTag<OutputT> mainOutputTag = new TupleTag<>("out");
-    UncommittedBundle<OutputT> outputBundle =
-        evaluationContext.createBundle(inputBundle, application.getOutput());
-
-    InProcessExecutionContext executionContext =
-        evaluationContext.getExecutionContext(application, inputBundle.getKey());
-    String stepName = evaluationContext.getStepName(application);
-    InProcessStepContext stepContext =
-        executionContext.getOrCreateStepContext(stepName, stepName, null);
-
-    CounterSet counters = evaluationContext.createCounterSet();
-
-    DoFnRunner<InputT, OutputT> runner =
-        DoFnRunners.createDefault(
-            evaluationContext.getPipelineOptions(),
-            application.getTransform().getFn(),
-            evaluationContext.createSideInputReader(application.getTransform().getSideInputs()),
-            BundleOutputManager.create(
-                Collections.<TupleTag<?>, UncommittedBundle<?>>singletonMap(
-                    mainOutputTag, outputBundle)),
-            mainOutputTag,
-            Collections.<TupleTag<?>>emptyList(),
-            stepContext,
-            counters.getAddCounterMutator(),
-            application.getInput().getWindowingStrategy());
-
-    runner.startBundle();
-    return new ParDoInProcessEvaluator<InputT>(
-        runner,
-        application,
-        counters,
-        Collections.<UncommittedBundle<?>>singleton(outputBundle),
-        stepContext);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepAndKey.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepAndKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepAndKey.java
deleted file mode 100644
index 1595572..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepAndKey.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.common.base.MoreObjects;
-
-import java.util.Objects;
-
-/**
- * A (Step, Key) pair. This is useful as a map key or cache key for things that are available
- * per-step in a keyed manner (e.g. State).
- */
-final class StepAndKey {
-  private final AppliedPTransform<?, ?, ?> step;
-  private final Object key;
-
-  /**
-   * Create a new {@link StepAndKey} with the provided step and key.
-   */
-  public static StepAndKey of(AppliedPTransform<?, ?, ?> step, Object key) {
-    return new StepAndKey(step, key);
-  }
-
-  private StepAndKey(AppliedPTransform<?, ?, ?> step, Object key) {
-    this.step = step;
-    this.key = key;
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(StepAndKey.class)
-        .add("step", step.getFullName())
-        .add("key", key)
-        .toString();
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(step, key);
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    } else if (!(other instanceof StepAndKey)) {
-      return false;
-    } else {
-      StepAndKey that = (StepAndKey) other;
-      return Objects.equals(this.step, that.step)
-          && Objects.equals(this.key, that.key);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
deleted file mode 100644
index 3c4ee29..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/StepTransformResult.java
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TimerUpdate;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-
-/**
- * An immutable {@link InProcessTransformResult}.
- */
-public class StepTransformResult implements InProcessTransformResult {
-  private final AppliedPTransform<?, ?, ?> transform;
-  private final Iterable<? extends UncommittedBundle<?>> bundles;
-  private final CopyOnAccessInMemoryStateInternals<?> state;
-  private final TimerUpdate timerUpdate;
-  private final CounterSet counters;
-  private final Instant watermarkHold;
-
-  private StepTransformResult(
-      AppliedPTransform<?, ?, ?> transform,
-      Iterable<? extends UncommittedBundle<?>> outputBundles,
-      CopyOnAccessInMemoryStateInternals<?> state,
-      TimerUpdate timerUpdate,
-      CounterSet counters,
-      Instant watermarkHold) {
-    this.transform = transform;
-    this.bundles = outputBundles;
-    this.state = state;
-    this.timerUpdate = timerUpdate;
-    this.counters = counters;
-    this.watermarkHold = watermarkHold;
-  }
-
-  @Override
-  public Iterable<? extends UncommittedBundle<?>> getOutputBundles() {
-    return bundles;
-  }
-
-  @Override
-  public CounterSet getCounters() {
-    return counters;
-  }
-
-  @Override
-  public AppliedPTransform<?, ?, ?> getTransform() {
-    return transform;
-  }
-
-  @Override
-  public Instant getWatermarkHold() {
-    return watermarkHold;
-  }
-
-  @Override
-  public CopyOnAccessInMemoryStateInternals<?> getState() {
-    return state;
-  }
-
-  @Override
-  public TimerUpdate getTimerUpdate() {
-    return timerUpdate;
-  }
-
-  public static Builder withHold(AppliedPTransform<?, ?, ?> transform, Instant watermarkHold) {
-    return new Builder(transform, watermarkHold);
-  }
-
-  public static Builder withoutHold(AppliedPTransform<?, ?, ?> transform) {
-    return new Builder(transform, BoundedWindow.TIMESTAMP_MAX_VALUE);
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(StepTransformResult.class)
-        .add("transform", transform)
-        .toString();
-  }
-
-  /**
-   * A builder for creating instances of {@link StepTransformResult}.
-   */
-  public static class Builder {
-    private final AppliedPTransform<?, ?, ?> transform;
-    private final ImmutableList.Builder<UncommittedBundle<?>> bundlesBuilder;
-    private CopyOnAccessInMemoryStateInternals<?> state;
-    private TimerUpdate timerUpdate;
-    private CounterSet counters;
-    private final Instant watermarkHold;
-
-    private Builder(AppliedPTransform<?, ?, ?> transform, Instant watermarkHold) {
-      this.transform = transform;
-      this.watermarkHold = watermarkHold;
-      this.bundlesBuilder = ImmutableList.builder();
-      this.timerUpdate = TimerUpdate.builder(null).build();
-    }
-
-    public StepTransformResult build() {
-      return new StepTransformResult(
-          transform,
-          bundlesBuilder.build(),
-          state,
-          timerUpdate,
-          counters,
-          watermarkHold);
-    }
-
-    public Builder withCounters(CounterSet counters) {
-      this.counters = counters;
-      return this;
-    }
-
-    public Builder withState(CopyOnAccessInMemoryStateInternals<?> state) {
-      this.state = state;
-      return this;
-    }
-
-    public Builder withTimerUpdate(TimerUpdate timerUpdate) {
-      this.timerUpdate = timerUpdate;
-      return this;
-    }
-
-    public Builder addOutput(
-        UncommittedBundle<?> outputBundle, UncommittedBundle<?>... outputBundles) {
-      bundlesBuilder.add(outputBundle);
-      bundlesBuilder.add(outputBundles);
-      return this;
-    }
-
-    public Builder addOutput(Collection<UncommittedBundle<?>> outputBundles) {
-      bundlesBuilder.addAll(outputBundles);
-      return this;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
deleted file mode 100644
index 270557d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluator.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-
-/**
- * An evaluator of a specific application of a transform. Will be used for at least one
- * {@link CommittedBundle}.
- *
- * @param <InputT> the type of elements that will be passed to {@link #processElement}
- */
-public interface TransformEvaluator<InputT> {
-  /**
-   * Process an element in the input {@link CommittedBundle}.
-   *
-   * @param element the element to process
-   */
-  void processElement(WindowedValue<InputT> element) throws Exception;
-
-  /**
-   * Finish processing the bundle of this {@link TransformEvaluator}.
-   *
-   * After {@link #finishBundle()} is called, the {@link TransformEvaluator} will not be reused,
-   * and no more elements will be processed.
-   *
-   * @return an {@link InProcessTransformResult} containing the results of this bundle evaluation.
-   */
-  InProcessTransformResult finishBundle() throws Exception;
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
deleted file mode 100644
index 860ddfe..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorFactory.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-import javax.annotation.Nullable;
-
-/**
- * A factory for creating instances of {@link TransformEvaluator} for the application of a
- * {@link PTransform}.
- */
-public interface TransformEvaluatorFactory {
-  /**
-   * Create a new {@link TransformEvaluator} for the application of the {@link PTransform}.
-   *
-   * Any work that must be done before input elements are processed (such as calling
-   * {@link DoFn#startBundle(DoFn.Context)}) must be done before the {@link TransformEvaluator} is
-   * made available to the caller.
-   *
-   * @throws Exception whenever constructing the underlying evaluator throws an exception
-   */
-  <InputT> TransformEvaluator<InputT> forApplication(
-      AppliedPTransform<?, ?, ?> application, @Nullable CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) throws Exception;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorRegistry.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorRegistry.java
deleted file mode 100644
index 0c8cb7e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformEvaluatorRegistry.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Flatten.FlattenPCollectionList;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.common.collect.ImmutableMap;
-
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link TransformEvaluatorFactory} that delegates to primitive {@link TransformEvaluatorFactory}
- * implementations based on the type of {@link PTransform} of the application.
- */
-class TransformEvaluatorRegistry implements TransformEvaluatorFactory {
-  public static TransformEvaluatorRegistry defaultRegistry() {
-    @SuppressWarnings("rawtypes")
-    ImmutableMap<Class<? extends PTransform>, TransformEvaluatorFactory> primitives =
-        ImmutableMap.<Class<? extends PTransform>, TransformEvaluatorFactory>builder()
-            .put(Read.Bounded.class, new BoundedReadEvaluatorFactory())
-            .put(Read.Unbounded.class, new UnboundedReadEvaluatorFactory())
-            .put(ParDo.Bound.class, new ParDoSingleEvaluatorFactory())
-            .put(ParDo.BoundMulti.class, new ParDoMultiEvaluatorFactory())
-            .put(
-                GroupByKeyEvaluatorFactory.InProcessGroupByKeyOnly.class,
-                new GroupByKeyEvaluatorFactory())
-            .put(FlattenPCollectionList.class, new FlattenEvaluatorFactory())
-            .put(ViewEvaluatorFactory.WriteView.class, new ViewEvaluatorFactory())
-            .build();
-    return new TransformEvaluatorRegistry(primitives);
-  }
-
-  // the TransformEvaluatorFactories can construct instances of all generic types of transform,
-  // so all instances of a primitive can be handled with the same evaluator factory.
-  @SuppressWarnings("rawtypes")
-  private final Map<Class<? extends PTransform>, TransformEvaluatorFactory> factories;
-
-  private TransformEvaluatorRegistry(
-      @SuppressWarnings("rawtypes")
-      Map<Class<? extends PTransform>, TransformEvaluatorFactory> factories) {
-    this.factories = factories;
-  }
-
-  @Override
-  public <InputT> TransformEvaluator<InputT> forApplication(
-      AppliedPTransform<?, ?, ?> application,
-      @Nullable CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext)
-      throws Exception {
-    TransformEvaluatorFactory factory = factories.get(application.getTransform().getClass());
-    return factory.forApplication(application, inputBundle, evaluationContext);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutor.java
deleted file mode 100644
index d630749..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutor.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.common.base.Throwables;
-
-import java.util.concurrent.Callable;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link Callable} responsible for constructing a {@link TransformEvaluator} from a
- * {@link TransformEvaluatorFactory} and evaluating it on some bundle of input, and registering
- * the result using a registered {@link CompletionCallback}.
- *
- * <p>A {@link TransformExecutor} that is currently executing also provides access to the thread
- * that it is being executed on.
- */
-class TransformExecutor<T> implements Callable<InProcessTransformResult> {
-  public static <T> TransformExecutor<T> create(
-      TransformEvaluatorFactory factory,
-      InProcessEvaluationContext evaluationContext,
-      CommittedBundle<T> inputBundle,
-      AppliedPTransform<?, ?, ?> transform,
-      CompletionCallback completionCallback,
-      TransformExecutorService transformEvaluationState) {
-    return new TransformExecutor<>(
-        factory,
-        evaluationContext,
-        inputBundle,
-        transform,
-        completionCallback,
-        transformEvaluationState);
-  }
-
-  private final TransformEvaluatorFactory evaluatorFactory;
-  private final InProcessEvaluationContext evaluationContext;
-
-  /** The transform that will be evaluated. */
-  private final AppliedPTransform<?, ?, ?> transform;
-  /** The inputs this {@link TransformExecutor} will deliver to the transform. */
-  private final CommittedBundle<T> inputBundle;
-
-  private final CompletionCallback onComplete;
-  private final TransformExecutorService transformEvaluationState;
-
-  private Thread thread;
-
-  private TransformExecutor(
-      TransformEvaluatorFactory factory,
-      InProcessEvaluationContext evaluationContext,
-      CommittedBundle<T> inputBundle,
-      AppliedPTransform<?, ?, ?> transform,
-      CompletionCallback completionCallback,
-      TransformExecutorService transformEvaluationState) {
-    this.evaluatorFactory = factory;
-    this.evaluationContext = evaluationContext;
-
-    this.inputBundle = inputBundle;
-    this.transform = transform;
-
-    this.onComplete = completionCallback;
-
-    this.transformEvaluationState = transformEvaluationState;
-  }
-
-  @Override
-  public InProcessTransformResult call() {
-    this.thread = Thread.currentThread();
-    try {
-      TransformEvaluator<T> evaluator =
-          evaluatorFactory.forApplication(transform, inputBundle, evaluationContext);
-      if (inputBundle != null) {
-        for (WindowedValue<T> value : inputBundle.getElements()) {
-          evaluator.processElement(value);
-        }
-      }
-      InProcessTransformResult result = evaluator.finishBundle();
-      onComplete.handleResult(inputBundle, result);
-      return result;
-    } catch (Throwable t) {
-      onComplete.handleThrowable(inputBundle, t);
-      throw Throwables.propagate(t);
-    } finally {
-      this.thread = null;
-      transformEvaluationState.complete(this);
-    }
-  }
-
-  /**
-   * If this {@link TransformExecutor} is currently executing, return the thread it is executing in.
-   * Otherwise, return null.
-   */
-  @Nullable
-  public Thread getThread() {
-    return this.thread;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorService.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorService.java
deleted file mode 100644
index 3f00da6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorService.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-/**
- * Schedules and completes {@link TransformExecutor TransformExecutors}, controlling concurrency as
- * appropriate for the {@link StepAndKey} the executor exists for.
- */
-interface TransformExecutorService {
-  /**
-   * Schedule the provided work to be eventually executed.
-   */
-  void schedule(TransformExecutor<?> work);
-
-  /**
-   * Finish executing the provided work. This may cause additional
-   * {@link TransformExecutor TransformExecutors} to be evaluated.
-   */
-  void complete(TransformExecutor<?> completed);
-}
-

[11/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
deleted file mode 100644
index 96629b1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MergingActiveWindowSet.java
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.MapCoder;
-import com.google.cloud.dataflow.sdk.coders.SetCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * An {@link ActiveWindowSet} for merging {@link WindowFn} implementations.
- *
- * <p>The underlying notion of {@link MergingActiveWindowSet} is that of representing equivalence
- * classes of merged windows as a mapping from the merged "super-window" to a set of
- * <i>state address</i> windows in which some state has been persisted. The mapping need not
- * contain EPHEMERAL windows, because they are created and merged without any persistent state.
- * Each window must be a state address window for at most one window, so the mapping is
- * invertible.
- *
- * <p>The states of a non-expired window are treated as follows:
- *
- * <ul>
- *   <li><b>NEW</b>: a NEW has an empty set of associated state address windows.</li>
- *   <li><b>ACTIVE</b>: an ACTIVE window will be associated with some nonempty set of state
- *       address windows. If the window has not merged, this will necessarily be the singleton set
- *       containing just itself, but it is not required that an ACTIVE window be amongst its
- *       state address windows.</li>
- *   <li><b>MERGED</b>: a MERGED window will be in the set of associated windows for some
- *       other window - that window is retrieved via {@link #representative} (this reverse
- *       association is implemented in O(1) time).</li>
- *   <li><b>EPHEMERAL</b>: EPHEMERAL windows are not persisted but are tracked transiently;
- *       an EPHEMERAL window must be registered with this {@link ActiveWindowSet} by a call
- *       to {@link #recordMerge} prior to any request for a {@link #representative}.</li>
- * </ul>
- *
- * <p>To illustrate why an ACTIVE window need not be amongst its own state address windows,
- * consider two active windows W1 and W2 that are merged to form W12. Further writes may be
- * applied to either of W1 or W2, since a read of W12 implies reading both of W12 and merging
- * their results. Hence W12 need not have state directly associated with it.
- */
-public class MergingActiveWindowSet<W extends BoundedWindow> implements ActiveWindowSet<W> {
-  private final WindowFn<Object, W> windowFn;
-  private final Map<W, Set<W>> activeWindowToStateAddressWindows;
-
-  /**
-   * As above, but only for EPHEMERAL windows. Does not need to be persisted.
-   */
-  private final Map<W, Set<W>> activeWindowToEphemeralWindows;
-
-  /**
-   * A map from window to the ACTIVE window it has been merged into.
-   *
-   * <p>Does not need to be persisted.
-   *
-   * <ul>
-   * <li>Key window may be ACTIVE, MERGED or EPHEMERAL.
-   * <li>ACTIVE windows map to themselves.
-   * <li>If W1 maps to W2 then W2 is in {@link #activeWindowToStateAddressWindows}.
-   * <li>If W1 = W2 then W1 is ACTIVE. If W1 is in the state address window set for W2 then W1 is
-   * MERGED. Otherwise W1 is EPHEMERAL.
-   * </ul>
-   */
-  private final Map<W, W> windowToActiveWindow;
-
-  /**
-   * Deep clone of {@link #activeWindowToStateAddressWindows} as of last commit.
-   *
-   * <p>Used to avoid writing to state if no changes have been made during the work unit.
-   */
-  private final Map<W, Set<W>> originalActiveWindowToStateAddressWindows;
-
-  /**
-   * Handle representing our state in the backend.
-   */
-  private final ValueState<Map<W, Set<W>>> valueState;
-
-  public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals<?> state) {
-    this.windowFn = windowFn;
-
-    StateTag<Object, ValueState<Map<W, Set<W>>>> mergeTreeAddr =
-        StateTags.makeSystemTagInternal(StateTags.value(
-            "tree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder()))));
-    valueState = state.state(StateNamespaces.global(), mergeTreeAddr);
-    // Little use trying to prefetch this state since the ReduceFnRunner is stymied until it is
-    // available.
-    activeWindowToStateAddressWindows = emptyIfNull(valueState.read());
-    activeWindowToEphemeralWindows = new HashMap<>();
-    originalActiveWindowToStateAddressWindows = deepCopy(activeWindowToStateAddressWindows);
-    windowToActiveWindow = invert(activeWindowToStateAddressWindows);
-  }
-
-  @Override
-  public void removeEphemeralWindows() {
-    for (Map.Entry<W, Set<W>> entry : activeWindowToEphemeralWindows.entrySet()) {
-      for (W ephemeral : entry.getValue()) {
-        windowToActiveWindow.remove(ephemeral);
-      }
-    }
-    activeWindowToEphemeralWindows.clear();
-  }
-
-  @Override
-  public void persist() {
-    if (activeWindowToStateAddressWindows.isEmpty()) {
-      // Force all persistent state to disappear.
-      valueState.clear();
-      return;
-    }
-    if (activeWindowToStateAddressWindows.equals(originalActiveWindowToStateAddressWindows)) {
-      // No change.
-      return;
-    }
-    // All NEW windows must have been accounted for.
-    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
-      Preconditions.checkState(
-          !entry.getValue().isEmpty(), "Cannot persist NEW window %s", entry.getKey());
-    }
-    // Should be no EPHEMERAL windows.
-    Preconditions.checkState(
-        activeWindowToEphemeralWindows.isEmpty(), "Unexpected EPHEMERAL windows before persist");
-
-    valueState.write(activeWindowToStateAddressWindows);
-    // No need to update originalActiveWindowToStateAddressWindows since this object is about to
-    // become garbage.
-  }
-
-  @Override
-  @Nullable
-  public W representative(W window) {
-    return windowToActiveWindow.get(window);
-  }
-
-  @Override
-  public Set<W> getActiveWindows() {
-    return activeWindowToStateAddressWindows.keySet();
-  }
-
-  @Override
-  public boolean isActive(W window) {
-    return activeWindowToStateAddressWindows.containsKey(window);
-  }
-
-  @Override
-  public void addNew(W window) {
-    if (!windowToActiveWindow.containsKey(window)) {
-      activeWindowToStateAddressWindows.put(window, new LinkedHashSet<W>());
-    }
-  }
-
-  @Override
-  public void addActive(W window) {
-    if (!windowToActiveWindow.containsKey(window)) {
-      Set<W> stateAddressWindows = new LinkedHashSet<>();
-      stateAddressWindows.add(window);
-      activeWindowToStateAddressWindows.put(window, stateAddressWindows);
-      windowToActiveWindow.put(window, window);
-    }
-  }
-
-  @Override
-  public void remove(W window) {
-    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(window);
-    if (stateAddressWindows == null) {
-      // Window is no longer active.
-      return;
-    }
-    for (W stateAddressWindow : stateAddressWindows) {
-      windowToActiveWindow.remove(stateAddressWindow);
-    }
-    activeWindowToStateAddressWindows.remove(window);
-    Set<W> ephemeralWindows = activeWindowToEphemeralWindows.get(window);
-    if (ephemeralWindows != null) {
-      for (W ephemeralWindow : ephemeralWindows) {
-        windowToActiveWindow.remove(ephemeralWindow);
-      }
-      activeWindowToEphemeralWindows.remove(window);
-    }
-    windowToActiveWindow.remove(window);
-  }
-
-  private class MergeContextImpl extends WindowFn<Object, W>.MergeContext {
-    private MergeCallback<W> mergeCallback;
-    private final List<Collection<W>> allToBeMerged;
-    private final List<Collection<W>> allActiveToBeMerged;
-    private final List<W> allMergeResults;
-    private final Set<W> seen;
-
-    public MergeContextImpl(MergeCallback<W> mergeCallback) {
-      windowFn.super();
-      this.mergeCallback = mergeCallback;
-      allToBeMerged = new ArrayList<>();
-      allActiveToBeMerged = new ArrayList<>();
-      allMergeResults = new ArrayList<>();
-      seen = new HashSet<>();
-    }
-
-    @Override
-    public Collection<W> windows() {
-      return activeWindowToStateAddressWindows.keySet();
-    }
-
-    @Override
-    public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-      // The arguments have come from userland.
-      Preconditions.checkNotNull(toBeMerged);
-      Preconditions.checkNotNull(mergeResult);
-      List<W> copyOfToBeMerged = new ArrayList<>(toBeMerged.size());
-      List<W> activeToBeMerged = new ArrayList<>(toBeMerged.size());
-      boolean includesMergeResult = false;
-      for (W window : toBeMerged) {
-        Preconditions.checkNotNull(window);
-        Preconditions.checkState(
-            isActive(window), "Expecting merge window %s to be active", window);
-        if (window.equals(mergeResult)) {
-          includesMergeResult = true;
-        }
-        boolean notDup = seen.add(window);
-        Preconditions.checkState(
-            notDup, "Expecting merge window %s to appear in at most one merge set", window);
-        copyOfToBeMerged.add(window);
-        if (!activeWindowToStateAddressWindows.get(window).isEmpty()) {
-          activeToBeMerged.add(window);
-        }
-      }
-      if (!includesMergeResult) {
-        Preconditions.checkState(
-            !isActive(mergeResult), "Expecting result window %s to be new", mergeResult);
-      }
-      allToBeMerged.add(copyOfToBeMerged);
-      allActiveToBeMerged.add(activeToBeMerged);
-      allMergeResults.add(mergeResult);
-    }
-
-    public void recordMerges() throws Exception {
-      for (int i = 0; i < allToBeMerged.size(); i++) {
-        mergeCallback.prefetchOnMerge(
-            allToBeMerged.get(i), allActiveToBeMerged.get(i), allMergeResults.get(i));
-      }
-      for (int i = 0; i < allToBeMerged.size(); i++) {
-        mergeCallback.onMerge(
-            allToBeMerged.get(i), allActiveToBeMerged.get(i), allMergeResults.get(i));
-        recordMerge(allToBeMerged.get(i), allMergeResults.get(i));
-      }
-      allToBeMerged.clear();
-      allActiveToBeMerged.clear();
-      allMergeResults.clear();
-      seen.clear();
-    }
-  }
-
-  @Override
-  public void merge(MergeCallback<W> mergeCallback) throws Exception {
-    MergeContextImpl context = new MergeContextImpl(mergeCallback);
-
-    // See what the window function does with the NEW and already ACTIVE windows.
-    // Entering userland.
-    windowFn.mergeWindows(context);
-
-    // Actually do the merging and invoke the callbacks.
-    context.recordMerges();
-
-    // Any remaining NEW windows should become implicitly ACTIVE.
-    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
-      if (entry.getValue().isEmpty()) {
-        // This window was NEW but since it survived merging must now become ACTIVE.
-        W window = entry.getKey();
-        entry.getValue().add(window);
-        windowToActiveWindow.put(window, window);
-      }
-    }
-  }
-
-  /**
-   * A {@link WindowFn#mergeWindows} call has determined that {@code toBeMerged} (which must
-   * all be ACTIVE}) should be considered equivalent to {@code activeWindow} (which is either a
-   * member of {@code toBeMerged} or is a new window). Make the corresponding change in
-   * the active window set.
-   */
-  private void recordMerge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-    Set<W> newStateAddressWindows = new LinkedHashSet<>();
-    Set<W> existingStateAddressWindows = activeWindowToStateAddressWindows.get(mergeResult);
-    if (existingStateAddressWindows != null) {
-      // Preserve all the existing state address windows for mergeResult.
-      newStateAddressWindows.addAll(existingStateAddressWindows);
-    }
-
-    Set<W> newEphemeralWindows = new HashSet<>();
-    Set<W> existingEphemeralWindows = activeWindowToEphemeralWindows.get(mergeResult);
-    if (existingEphemeralWindows != null) {
-      // Preserve all the existing EPHEMERAL windows for meregResult.
-      newEphemeralWindows.addAll(existingEphemeralWindows);
-    }
-
-    for (W other : toBeMerged) {
-      Set<W> otherStateAddressWindows = activeWindowToStateAddressWindows.get(other);
-      Preconditions.checkState(otherStateAddressWindows != null, "Window %s is not ACTIVE", other);
-
-      for (W otherStateAddressWindow : otherStateAddressWindows) {
-        // Since otherTarget equiv other AND other equiv mergeResult
-        // THEN otherTarget equiv mergeResult.
-        newStateAddressWindows.add(otherStateAddressWindow);
-        windowToActiveWindow.put(otherStateAddressWindow, mergeResult);
-      }
-      activeWindowToStateAddressWindows.remove(other);
-
-      Set<W> otherEphemeralWindows = activeWindowToEphemeralWindows.get(other);
-      if (otherEphemeralWindows != null) {
-        for (W otherEphemeral : otherEphemeralWindows) {
-          // Since otherEphemeral equiv other AND other equiv mergeResult
-          // THEN otherEphemeral equiv mergeResult.
-          newEphemeralWindows.add(otherEphemeral);
-          windowToActiveWindow.put(otherEphemeral, mergeResult);
-        }
-      }
-      activeWindowToEphemeralWindows.remove(other);
-
-      // Now other equiv mergeResult.
-      if (otherStateAddressWindows.contains(other)) {
-        // Other was ACTIVE and is now known to be MERGED.
-      } else if (otherStateAddressWindows.isEmpty()) {
-        // Other was NEW thus has no state. It is now EPHEMERAL.
-        newEphemeralWindows.add(other);
-      } else if (other.equals(mergeResult)) {
-        // Other was ACTIVE, was never used to store elements, but is still ACTIVE.
-        // Leave it as active.
-      } else {
-        // Other was ACTIVE, was never used to store element, as is no longer considered ACTIVE.
-        // It is now EPHEMERAL.
-        newEphemeralWindows.add(other);
-      }
-      windowToActiveWindow.put(other, mergeResult);
-    }
-
-    if (newStateAddressWindows.isEmpty()) {
-      // If stateAddressWindows is empty then toBeMerged must have only contained EPHEMERAL windows.
-      // Promote mergeResult to be active now.
-      newStateAddressWindows.add(mergeResult);
-    }
-    windowToActiveWindow.put(mergeResult, mergeResult);
-
-    activeWindowToStateAddressWindows.put(mergeResult, newStateAddressWindows);
-    if (!newEphemeralWindows.isEmpty()) {
-      activeWindowToEphemeralWindows.put(mergeResult, newEphemeralWindows);
-    }
-
-    merged(mergeResult);
-  }
-
-  @Override
-  public void merged(W window) {
-    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(window);
-    Preconditions.checkState(stateAddressWindows != null, "Window %s is not ACTIVE", window);
-    W first = Iterables.getFirst(stateAddressWindows, null);
-    stateAddressWindows.clear();
-    stateAddressWindows.add(first);
-  }
-
-  /**
-   * Return the state address windows for ACTIVE {@code window} from which all state associated
-   * should
-   * be read and merged.
-   */
-  @Override
-  public Set<W> readStateAddresses(W window) {
-    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(window);
-    Preconditions.checkState(stateAddressWindows != null, "Window %s is not ACTIVE", window);
-    return stateAddressWindows;
-  }
-
-  /**
-   * Return the state address window of ACTIVE {@code window} into which all new state should be
-   * written.
-   */
-  @Override
-  public W writeStateAddress(W window) {
-    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(window);
-    Preconditions.checkState(stateAddressWindows != null, "Window %s is not ACTIVE", window);
-    W result = Iterables.getFirst(stateAddressWindows, null);
-    Preconditions.checkState(result != null, "Window %s is still NEW", window);
-    return result;
-  }
-
-  @Override
-  public W mergedWriteStateAddress(Collection<W> toBeMerged, W mergeResult) {
-    Set<W> stateAddressWindows = activeWindowToStateAddressWindows.get(mergeResult);
-    if (stateAddressWindows != null && !stateAddressWindows.isEmpty()) {
-      return Iterables.getFirst(stateAddressWindows, null);
-    }
-    for (W mergedWindow : toBeMerged) {
-      stateAddressWindows = activeWindowToStateAddressWindows.get(mergedWindow);
-      if (stateAddressWindows != null && !stateAddressWindows.isEmpty()) {
-        return Iterables.getFirst(stateAddressWindows, null);
-      }
-    }
-    return mergeResult;
-  }
-
-  @VisibleForTesting
-  public void checkInvariants() {
-    Set<W> knownStateAddressWindows = new HashSet<>();
-    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
-      W active = entry.getKey();
-      Preconditions.checkState(!entry.getValue().isEmpty(),
-          "Unexpected empty state address window set for ACTIVE window %s", active);
-      for (W stateAddressWindow : entry.getValue()) {
-        Preconditions.checkState(knownStateAddressWindows.add(stateAddressWindow),
-            "%s is in more than one state address window set", stateAddressWindow);
-        Preconditions.checkState(active.equals(windowToActiveWindow.get(stateAddressWindow)),
-            "%s should have %s as its ACTIVE window", stateAddressWindow, active);
-      }
-    }
-    for (Map.Entry<W, Set<W>> entry : activeWindowToEphemeralWindows.entrySet()) {
-      W active = entry.getKey();
-      Preconditions.checkState(activeWindowToStateAddressWindows.containsKey(active),
-          "%s must be ACTIVE window", active);
-      Preconditions.checkState(
-          !entry.getValue().isEmpty(), "Unexpected empty EPHEMERAL set for %s", active);
-      for (W ephemeralWindow : entry.getValue()) {
-        Preconditions.checkState(knownStateAddressWindows.add(ephemeralWindow),
-            "%s is EPHEMERAL/state address of more than one ACTIVE window", ephemeralWindow);
-        Preconditions.checkState(active.equals(windowToActiveWindow.get(ephemeralWindow)),
-            "%s should have %s as its ACTIVE window", ephemeralWindow, active);
-      }
-    }
-    for (Map.Entry<W, W> entry : windowToActiveWindow.entrySet()) {
-      Preconditions.checkState(activeWindowToStateAddressWindows.containsKey(entry.getValue()),
-          "%s should be ACTIVE since representative for %s", entry.getValue(), entry.getKey());
-    }
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append("MergingActiveWindowSet {\n");
-    for (Map.Entry<W, Set<W>> entry : activeWindowToStateAddressWindows.entrySet()) {
-      W active = entry.getKey();
-      Set<W> stateAddressWindows = entry.getValue();
-      if (stateAddressWindows.isEmpty()) {
-        sb.append("  NEW ");
-        sb.append(active);
-        sb.append('\n');
-      } else {
-        sb.append("  ACTIVE ");
-        sb.append(active);
-        sb.append(":\n");
-        for (W stateAddressWindow : stateAddressWindows) {
-          if (stateAddressWindow.equals(active)) {
-            sb.append("    ACTIVE ");
-          } else {
-            sb.append("    MERGED ");
-          }
-          sb.append(stateAddressWindow);
-          sb.append("\n");
-          W active2 = windowToActiveWindow.get(stateAddressWindow);
-          Preconditions.checkState(active2.equals(active));
-        }
-        Set<W> ephemeralWindows = activeWindowToEphemeralWindows.get(active);
-        if (ephemeralWindows != null) {
-          for (W ephemeralWindow : ephemeralWindows) {
-            sb.append("    EPHEMERAL ");
-            sb.append(ephemeralWindow);
-            sb.append('\n');
-          }
-        }
-      }
-    }
-    sb.append("}");
-    return sb.toString();
-  }
-
-  // ======================================================================
-
-  /**
-   * Replace null {@code multimap} with empty map, and replace null entries in {@code multimap} with
-   * empty sets.
-   */
-  private static <W> Map<W, Set<W>> emptyIfNull(@Nullable Map<W, Set<W>> multimap) {
-    if (multimap == null) {
-      return new HashMap<>();
-    } else {
-      for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
-        if (entry.getValue() == null) {
-          entry.setValue(new LinkedHashSet<W>());
-        }
-      }
-      return multimap;
-    }
-  }
-
-  /** Return a deep copy of {@code multimap}. */
-  private static <W> Map<W, Set<W>> deepCopy(Map<W, Set<W>> multimap) {
-    Map<W, Set<W>> newMultimap = new HashMap<>();
-    for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
-      newMultimap.put(entry.getKey(), new LinkedHashSet<>(entry.getValue()));
-    }
-    return newMultimap;
-  }
-
-  /** Return inversion of {@code multimap}, which must be invertible. */
-  private static <W> Map<W, W> invert(Map<W, Set<W>> multimap) {
-    Map<W, W> result = new HashMap<>();
-    for (Map.Entry<W, Set<W>> entry : multimap.entrySet()) {
-      W active = entry.getKey();
-      for (W target : entry.getValue()) {
-        W previous = result.put(target, active);
-        Preconditions.checkState(previous == null,
-            "Window %s has both %s and %s as representatives", target, previous, active);
-      }
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
deleted file mode 100644
index 489d183..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MimeTypes.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/** Constants representing various mime types. */
-public class MimeTypes {
-  public static final String TEXT = "text/plain";
-  public static final String BINARY = "application/octet-stream";
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
deleted file mode 100644
index d450187..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MonitoringUtil.java
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
-
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.Dataflow.Projects.Jobs.Messages;
-import com.google.api.services.dataflow.model.JobMessage;
-import com.google.api.services.dataflow.model.ListJobMessagesResponse;
-import com.google.cloud.dataflow.sdk.PipelineResult.State;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ImmutableMap;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * A helper class for monitoring jobs submitted to the service.
- */
-public final class MonitoringUtil {
-
-  private static final String GCLOUD_DATAFLOW_PREFIX = "gcloud alpha dataflow";
-  private static final String ENDPOINT_OVERRIDE_ENV_VAR =
-      "CLOUDSDK_API_ENDPOINT_OVERRIDES_DATAFLOW";
-
-  private static final Map<String, State> DATAFLOW_STATE_TO_JOB_STATE =
-      ImmutableMap
-          .<String, State>builder()
-          .put("JOB_STATE_UNKNOWN", State.UNKNOWN)
-          .put("JOB_STATE_STOPPED", State.STOPPED)
-          .put("JOB_STATE_RUNNING", State.RUNNING)
-          .put("JOB_STATE_DONE", State.DONE)
-          .put("JOB_STATE_FAILED", State.FAILED)
-          .put("JOB_STATE_CANCELLED", State.CANCELLED)
-          .put("JOB_STATE_UPDATED", State.UPDATED)
-          .build();
-
-  private String projectId;
-  private Messages messagesClient;
-
-  /**
-   * An interface that can be used for defining callbacks to receive a list
-   * of JobMessages containing monitoring information.
-   */
-  public interface JobMessagesHandler {
-    /** Process the rows. */
-    void process(List<JobMessage> messages);
-  }
-
-  /** A handler that prints monitoring messages to a stream. */
-  public static class PrintHandler implements JobMessagesHandler {
-    private PrintStream out;
-
-    /**
-     * Construct the handler.
-     *
-     * @param stream The stream to write the messages to.
-     */
-    public PrintHandler(PrintStream stream) {
-      out = stream;
-    }
-
-    @Override
-    public void process(List<JobMessage> messages) {
-      for (JobMessage message : messages) {
-        if (message.getMessageText() == null || message.getMessageText().isEmpty()) {
-          continue;
-        }
-        String importanceString = null;
-        if (message.getMessageImportance() == null) {
-          continue;
-        } else if (message.getMessageImportance().equals("JOB_MESSAGE_ERROR")) {
-          importanceString = "Error:   ";
-        } else if (message.getMessageImportance().equals("JOB_MESSAGE_WARNING")) {
-          importanceString = "Warning: ";
-        } else if (message.getMessageImportance().equals("JOB_MESSAGE_BASIC")) {
-          importanceString = "Basic:  ";
-        } else if (message.getMessageImportance().equals("JOB_MESSAGE_DETAILED")) {
-          importanceString = "Detail:  ";
-        } else {
-          // TODO: Remove filtering here once getJobMessages supports minimum
-          // importance.
-          continue;
-        }
-        @Nullable Instant time = TimeUtil.fromCloudTime(message.getTime());
-        if (time == null) {
-          out.print("UNKNOWN TIMESTAMP: ");
-        } else {
-          out.print(time + ": ");
-        }
-        if (importanceString != null) {
-          out.print(importanceString);
-        }
-        out.println(message.getMessageText());
-      }
-      out.flush();
-    }
-  }
-
-  /** Construct a helper for monitoring. */
-  public MonitoringUtil(String projectId, Dataflow dataflow) {
-    this(projectId, dataflow.projects().jobs().messages());
-  }
-
-  // @VisibleForTesting
-  MonitoringUtil(String projectId, Messages messagesClient) {
-    this.projectId = projectId;
-    this.messagesClient = messagesClient;
-  }
-
-  /**
-   * Comparator for sorting rows in increasing order based on timestamp.
-   */
-  public static class TimeStampComparator implements Comparator<JobMessage> {
-    @Override
-    public int compare(JobMessage o1, JobMessage o2) {
-      @Nullable Instant t1 = fromCloudTime(o1.getTime());
-      if (t1 == null) {
-        return -1;
-      }
-      @Nullable Instant t2 = fromCloudTime(o2.getTime());
-      if (t2 == null) {
-        return 1;
-      }
-      return t1.compareTo(t2);
-    }
-  }
-
-  /**
-   * Return job messages sorted in ascending order by timestamp.
-   * @param jobId The id of the job to get the messages for.
-   * @param startTimestampMs Return only those messages with a
-   *   timestamp greater than this value.
-   * @return collection of messages
-   * @throws IOException
-   */
-  public ArrayList<JobMessage> getJobMessages(
-      String jobId, long startTimestampMs) throws IOException {
-    // TODO: Allow filtering messages by importance
-    Instant startTimestamp = new Instant(startTimestampMs);
-    ArrayList<JobMessage> allMessages = new ArrayList<>();
-    String pageToken = null;
-    while (true) {
-      Messages.List listRequest = messagesClient.list(projectId, jobId);
-      if (pageToken != null) {
-        listRequest.setPageToken(pageToken);
-      }
-      ListJobMessagesResponse response = listRequest.execute();
-
-      if (response == null || response.getJobMessages() == null) {
-        return allMessages;
-      }
-
-      for (JobMessage m : response.getJobMessages()) {
-        @Nullable Instant timestamp = fromCloudTime(m.getTime());
-        if (timestamp == null) {
-          continue;
-        }
-        if (timestamp.isAfter(startTimestamp)) {
-          allMessages.add(m);
-        }
-      }
-
-      if (response.getNextPageToken() == null) {
-        break;
-      } else {
-        pageToken = response.getNextPageToken();
-      }
-    }
-
-    Collections.sort(allMessages, new TimeStampComparator());
-    return allMessages;
-  }
-
-  public static String getJobMonitoringPageURL(String projectName, String jobId) {
-    try {
-      // Project name is allowed in place of the project id: the user will be redirected to a URL
-      // that has the project name replaced with project id.
-      return String.format(
-          "https://console.developers.google.com/project/%s/dataflow/job/%s",
-          URLEncoder.encode(projectName, "UTF-8"),
-          URLEncoder.encode(jobId, "UTF-8"));
-    } catch (UnsupportedEncodingException e) {
-      // Should never happen.
-      throw new AssertionError("UTF-8 encoding is not supported by the environment", e);
-    }
-  }
-
-  public static String getGcloudCancelCommand(DataflowPipelineOptions options, String jobId) {
-
-    // If using a different Dataflow API than default, prefix command with an API override.
-    String dataflowApiOverridePrefix = "";
-    String apiUrl = options.getDataflowClient().getBaseUrl();
-    if (!apiUrl.equals(Dataflow.DEFAULT_BASE_URL)) {
-      dataflowApiOverridePrefix = String.format("%s=%s ", ENDPOINT_OVERRIDE_ENV_VAR, apiUrl);
-    }
-
-    // Assemble cancel command from optional prefix and project/job parameters.
-    return String.format("%s%s jobs --project=%s cancel %s",
-        dataflowApiOverridePrefix, GCLOUD_DATAFLOW_PREFIX, options.getProject(), jobId);
-  }
-
-  public static State toState(String stateName) {
-    return MoreObjects.firstNonNull(DATAFLOW_STATE_TO_JOB_STATE.get(stateName),
-        State.UNKNOWN);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java
deleted file mode 100644
index 51e65ab..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetector.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * An object for detecting illegal mutations.
- *
- * <p>The {@link AutoCloseable} aspect of this interface allows use in a try-with-resources
- * style, where the implementing class may choose to perform a final mutation check upon
- * {@link #close()}.
- */
-public interface MutationDetector extends AutoCloseable {
-  /**
-   * @throws IllegalMutationException if illegal mutations are detected.
-   */
-  void verifyUnmodified();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java
deleted file mode 100644
index 412e3eb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MutationDetectors.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.common.base.Throwables;
-
-import java.util.Arrays;
-import java.util.Objects;
-
-/**
- * Static methods for creating and working with {@link MutationDetector}.
- */
-public class MutationDetectors {
-
-  private MutationDetectors() {}
-
-  /**
-     * Creates a new {@code MutationDetector} for the provided {@code value} that uses the provided
-     * {@link Coder} to perform deep copies and comparisons by serializing and deserializing values.
-     *
-     * <p>It is permissible for {@code value} to be {@code null}. Since {@code null} is immutable,
-     * the mutation check will always succeed.
-     */
-  public static <T> MutationDetector forValueWithCoder(T value, Coder<T> coder)
-      throws CoderException {
-    if (value == null) {
-      return noopMutationDetector();
-    } else {
-      return new CodedValueMutationDetector<>(value, coder);
-    }
-  }
-
-  /**
-   * Creates a new {@code MutationDetector} that always succeeds.
-   *
-   * <p>This is useful, for example, for providing a very efficient mutation detector for a value
-   * which is already immutable by design.
-   */
-  public static MutationDetector noopMutationDetector() {
-    return new NoopMutationDetector();
-  }
-
-  /**
-   * A {@link MutationDetector} for {@code null}, which is immutable.
-   */
-  private static class NoopMutationDetector implements MutationDetector {
-
-    @Override
-    public void verifyUnmodified() { }
-
-    @Override
-    public void close() { }
-  }
-
-  /**
-   * Given a value of type {@code T} and a {@link Coder} for that type, provides facilities to save
-   * check that the value has not changed.
-   *
-   * @param <T> the type of values checked for mutation
-   */
-  private static class CodedValueMutationDetector<T> implements MutationDetector {
-
-    private final Coder<T> coder;
-
-    /**
-     * A saved pointer to an in-memory value provided upon construction, which we will check for
-     * forbidden mutations.
-     */
-    private final T possiblyModifiedObject;
-
-    /**
-     * A saved encoded copy of the same value as {@link #possiblyModifiedObject}. Naturally, it
-     * will not change if {@link #possiblyModifiedObject} is mutated.
-     */
-    private final byte[] encodedOriginalObject;
-
-    /**
-     * The object decoded from {@link #encodedOriginalObject}. It will be used during every call to
-     * {@link #verifyUnmodified}, which could be called many times throughout the lifetime of this
-     * {@link CodedValueMutationDetector}.
-     */
-    private final T clonedOriginalObject;
-
-    /**
-     * Create a mutation detector for the provided {@code value}, using the provided {@link Coder}
-     * for cloning and checking serialized forms for equality.
-     */
-    public CodedValueMutationDetector(T value, Coder<T> coder) throws CoderException {
-      this.coder = coder;
-      this.possiblyModifiedObject = value;
-      this.encodedOriginalObject = CoderUtils.encodeToByteArray(coder, value);
-      this.clonedOriginalObject = CoderUtils.decodeFromByteArray(coder, encodedOriginalObject);
-    }
-
-    @Override
-    public void verifyUnmodified() {
-      try {
-        verifyUnmodifiedThrowingCheckedExceptions();
-      } catch (CoderException exn) {
-        Throwables.propagate(exn);
-      }
-    }
-
-    private void verifyUnmodifiedThrowingCheckedExceptions() throws CoderException {
-      // If either object believes they are equal, we trust that and short-circuit deeper checks.
-      if (Objects.equals(possiblyModifiedObject, clonedOriginalObject)
-          || Objects.equals(clonedOriginalObject, possiblyModifiedObject)) {
-        return;
-      }
-
-      // Since retainedObject is in general an instance of a subclass of T, when it is cloned to
-      // clonedObject using a Coder<T>, the two will generally be equivalent viewed as a T, but in
-      // general neither retainedObject.equals(clonedObject) nor clonedObject.equals(retainedObject)
-      // will hold.
-      //
-      // For example, CoderUtils.clone(IterableCoder<Integer>, IterableSubclass<Integer>) will
-      // produce an ArrayList<Integer> with the same contents as the IterableSubclass, but the
-      // latter will quite reasonably not consider itself equivalent to an ArrayList (and vice
-      // versa).
-      //
-      // To enable a reasonable comparison, we clone retainedObject again here, converting it to
-      // the same sort of T that the Coder<T> output when it created clonedObject.
-      T clonedPossiblyModifiedObject = CoderUtils.clone(coder, possiblyModifiedObject);
-
-      // If deepEquals() then we trust the equals implementation.
-      // This deliberately allows fields to escape this check.
-      if (Objects.deepEquals(clonedPossiblyModifiedObject, clonedOriginalObject)) {
-        return;
-      }
-
-      // If not deepEquals(), the class may just have a poor equals() implementation.
-      // So we next try checking their serialized forms. We re-serialize instead of checking
-      // encodedObject, because the Coder may treat it differently.
-      //
-      // For example, an unbounded Iterable will be encoded in an unbounded way, but decoded into an
-      // ArrayList, which will then be re-encoded in a bounded format. So we really do need to
-      // encode-decode-encode retainedObject.
-      if (Arrays.equals(
-          CoderUtils.encodeToByteArray(coder, clonedOriginalObject),
-          CoderUtils.encodeToByteArray(coder, clonedPossiblyModifiedObject))) {
-        return;
-      }
-
-      // If we got here, then they are not deepEquals() and do not have deepEquals() encodings.
-      // Even if there is some conceptual sense in which the objects are equivalent, it has not
-      // been adequately expressed in code.
-      illegalMutation(clonedOriginalObject, clonedPossiblyModifiedObject);
-    }
-
-    private void illegalMutation(T previousValue, T newValue) throws CoderException {
-      throw new IllegalMutationException(
-          String.format("Value %s mutated illegally, new value was %s."
-              + " Encoding was %s, now %s.",
-              previousValue, newValue,
-              CoderUtils.encodeToBase64(coder, previousValue),
-              CoderUtils.encodeToBase64(coder, newValue)),
-          previousValue, newValue);
-    }
-
-    @Override
-    public void close() {
-      verifyUnmodified();
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
deleted file mode 100644
index 1270f01..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonEmptyPanes.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.ReadableState;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateMerging;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-
-/**
- * Tracks which windows have non-empty panes. Specifically, which windows have new elements since
- * their last triggering.
- *
- * @param <W> The kind of windows being tracked.
- */
-public abstract class NonEmptyPanes<K, W extends BoundedWindow> {
-
-  static <K, W extends BoundedWindow> NonEmptyPanes<K, W> create(
-      WindowingStrategy<?, W> strategy, ReduceFn<K, ?, ?, W> reduceFn) {
-    if (strategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
-      return new DiscardingModeNonEmptyPanes<>(reduceFn);
-    } else {
-      return new GeneralNonEmptyPanes<>();
-    }
-  }
-
-  /**
-   * Record that some content has been added to the window in {@code context}, and therefore the
-   * current pane is not empty.
-   */
-  public abstract void recordContent(StateAccessor<K> context);
-
-  /**
-   * Record that the given pane is empty.
-   */
-  public abstract void clearPane(StateAccessor<K> state);
-
-  /**
-   * Return true if the current pane for the window in {@code context} is empty.
-   */
-  public abstract ReadableState<Boolean> isEmpty(StateAccessor<K> context);
-
-  /**
-   * Prefetch in preparation for merging.
-   */
-  public abstract void prefetchOnMerge(MergingStateAccessor<K, W> state);
-
-  /**
-   * Eagerly merge backing state.
-   */
-  public abstract void onMerge(MergingStateAccessor<K, W> context);
-
-  /**
-   * An implementation of {@code NonEmptyPanes} optimized for use with discarding mode. Uses the
-   * presence of data in the accumulation buffer to record non-empty panes.
-   */
-  private static class DiscardingModeNonEmptyPanes<K, W extends BoundedWindow>
-      extends NonEmptyPanes<K, W> {
-
-    private ReduceFn<K, ?, ?, W> reduceFn;
-
-    private DiscardingModeNonEmptyPanes(ReduceFn<K, ?, ?, W> reduceFn) {
-      this.reduceFn = reduceFn;
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty(StateAccessor<K> state) {
-      return reduceFn.isEmpty(state);
-    }
-
-    @Override
-    public void recordContent(StateAccessor<K> state) {
-      // Nothing to do -- the reduceFn is tracking contents
-    }
-
-    @Override
-    public void clearPane(StateAccessor<K> state) {
-      // Nothing to do -- the reduceFn is tracking contents
-    }
-
-    @Override
-    public void prefetchOnMerge(MergingStateAccessor<K, W> state) {
-      // Nothing to do -- the reduceFn is tracking contents
-    }
-
-    @Override
-    public void onMerge(MergingStateAccessor<K, W> context) {
-      // Nothing to do -- the reduceFn is tracking contents
-    }
-  }
-
-  /**
-   * An implementation of {@code NonEmptyPanes} for general use.
-   */
-  private static class GeneralNonEmptyPanes<K, W extends BoundedWindow>
-      extends NonEmptyPanes<K, W> {
-
-    private static final StateTag<Object, AccumulatorCombiningState<Long, long[], Long>>
-        PANE_ADDITIONS_TAG =
-        StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
-            "count", VarLongCoder.of(), new Sum.SumLongFn()));
-
-    @Override
-    public void recordContent(StateAccessor<K> state) {
-      state.access(PANE_ADDITIONS_TAG).add(1L);
-    }
-
-    @Override
-    public void clearPane(StateAccessor<K> state) {
-      state.access(PANE_ADDITIONS_TAG).clear();
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty(StateAccessor<K> state) {
-      return state.access(PANE_ADDITIONS_TAG).isEmpty();
-    }
-
-    @Override
-    public void prefetchOnMerge(MergingStateAccessor<K, W> state) {
-      StateMerging.prefetchCombiningValues(state, PANE_ADDITIONS_TAG);
-    }
-
-    @Override
-    public void onMerge(MergingStateAccessor<K, W> context) {
-      StateMerging.mergeCombiningValues(context, PANE_ADDITIONS_TAG);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
deleted file mode 100644
index cb7f9b0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NonMergingActiveWindowSet.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.common.collect.ImmutableSet;
-
-import java.util.Collection;
-import java.util.Set;
-
-/**
- * Implementation of {@link ActiveWindowSet} used with {@link WindowFn WindowFns} that don't support
- * merging.
- *
- * @param <W> the types of windows being managed
- */
-public class NonMergingActiveWindowSet<W extends BoundedWindow> implements ActiveWindowSet<W> {
-  @Override
-  public void removeEphemeralWindows() {}
-
-  @Override
-  public void persist() {}
-
-  @Override
-  public W representative(W window) {
-    // Always represented by itself.
-    return window;
-  }
-
-  @Override
-  public Set<W> getActiveWindows() {
-    // Only supported when merging.
-    throw new java.lang.UnsupportedOperationException();
-  }
-
-  @Override
-  public boolean isActive(W window) {
-    // Windows should never disappear, since we don't support merging.
-    return true;
-  }
-
-  @Override
-  public void addNew(W window) {}
-
-  @Override
-  public void addActive(W window) {}
-
-  @Override
-  public void remove(W window) {}
-
-  @Override
-  public void merge(MergeCallback<W> mergeCallback) throws Exception {}
-
-  @Override
-  public void merged(W window) {}
-
-  @Override
-  public Set<W> readStateAddresses(W window) {
-    return ImmutableSet.of(window);
-  }
-
-  @Override
-  public W writeStateAddress(W window) {
-    return window;
-  }
-
-  @Override
-  public W mergedWriteStateAddress(Collection<W> toBeMerged, W mergeResult) {
-    return mergeResult;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
deleted file mode 100644
index 9ef4c2e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopCredentialFactory.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.io.IOException;
-import java.security.GeneralSecurityException;
-
-/**
- * Construct an oauth credential to be used by the SDK and the SDK workers.
- * Always returns a null Credential object.
- */
-public class NoopCredentialFactory implements CredentialFactory {
-  public static NoopCredentialFactory fromOptions(PipelineOptions options) {
-    return new NoopCredentialFactory();
-  }
-
-  @Override
-  public Credential getCredential() throws IOException, GeneralSecurityException {
-    return null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
deleted file mode 100644
index 00abbb1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NoopPathValidator.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * Noop implementation of {@link PathValidator}. All paths are allowed and returned unchanged.
- */
-public class NoopPathValidator implements PathValidator {
-
-  private NoopPathValidator() {
-  }
-
-  public static PathValidator fromOptions(
-      @SuppressWarnings("unused") PipelineOptions options) {
-    return new NoopPathValidator();
-  }
-
-  @Override
-  public String validateInputFilePatternSupported(String filepattern) {
-    return filepattern;
-  }
-
-  @Override
-  public String validateOutputFilePrefixSupported(String filePrefix) {
-    return filePrefix;
-  }
-
-  @Override
-  public String verifyPath(String path) {
-    return path;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java
deleted file mode 100644
index 0fc2646..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/NullSideInputReader.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.collect.Sets;
-
-import java.util.Collections;
-import java.util.Set;
-
-/**
- * A {@link SideInputReader} representing a well-defined set of views, but not storing
- * any values for them. Used to check if a side input is present when the data itself
- * comes from elsewhere.
- */
-public class NullSideInputReader implements SideInputReader {
-
-  private Set<PCollectionView<?>> views;
-
-  public static NullSideInputReader empty() {
-    return new NullSideInputReader(Collections.<PCollectionView<?>>emptySet());
-  }
-
-  public static NullSideInputReader of(Iterable<? extends PCollectionView<?>> views) {
-    return new NullSideInputReader(views);
-  }
-
-  private NullSideInputReader(Iterable<? extends PCollectionView<?>> views) {
-    this.views = Sets.newHashSet(views);
-  }
-
-  @Override
-  public <T> T get(PCollectionView<T> view, BoundedWindow window) {
-    throw new IllegalArgumentException("cannot call NullSideInputReader.get()");
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return views.isEmpty();
-  }
-
-  @Override
-  public <T> boolean contains(PCollectionView<T> view) {
-    return views.contains(view);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
deleted file mode 100644
index 096c996..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/OutputReference.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-
-import com.google.api.client.json.GenericJson;
-import com.google.api.client.util.Key;
-
-/**
- * A representation used by {@link com.google.api.services.dataflow.model.Step}s
- * to reference the output of other {@code Step}s.
- */
-public final class OutputReference extends GenericJson {
-  @Key("@type")
-  public final String type = "OutputReference";
-
-  @Key("step_name")
-  private final String stepName;
-
-  @Key("output_name")
-  private final String outputName;
-
-  public OutputReference(String stepName, String outputName) {
-    this.stepName = checkNotNull(stepName);
-    this.outputName = checkNotNull(outputName);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
deleted file mode 100644
index 7cf636e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViewWindow.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.Objects;
-
-/**
- * A pair of a {@link PCollectionView} and a {@link BoundedWindow}, which can
- * be thought of as window "of" the view. This is a value class for use e.g.
- * as a compound cache key.
- *
- * @param <T> the type of the underlying PCollectionView
- */
-public final class PCollectionViewWindow<T> {
-
-  private final PCollectionView<T> view;
-  private final BoundedWindow window;
-
-  private PCollectionViewWindow(PCollectionView<T> view, BoundedWindow window) {
-    this.view = view;
-    this.window = window;
-  }
-
-  public static <T> PCollectionViewWindow<T> of(PCollectionView<T> view, BoundedWindow window) {
-    return new PCollectionViewWindow<>(view, window);
-  }
-
-  public PCollectionView<T> getView() {
-    return view;
-  }
-
-  public BoundedWindow getWindow() {
-    return window;
-  }
-
-  @Override
-  public boolean equals(Object otherObject) {
-    if (!(otherObject instanceof PCollectionViewWindow)) {
-      return false;
-    }
-    @SuppressWarnings("unchecked")
-    PCollectionViewWindow<T> other = (PCollectionViewWindow<T>) otherObject;
-    return getView().equals(other.getView()) && getWindow().equals(other.getWindow());
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(getView(), getWindow());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
deleted file mode 100644
index 7e73547..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PCollectionViews.java
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PValueBase;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.base.Function;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * Implementations of {@link PCollectionView} shared across the SDK.
- *
- * <p>For internal use only, subject to change.
- */
-public class PCollectionViews {
-
-  /**
-   * Returns a {@code PCollectionView<T>} capable of processing elements encoded using the provided
-   * {@link Coder} and windowed using the provided * {@link WindowingStrategy}.
-   *
-   * <p>If {@code hasDefault} is {@code true}, then the view will take on the value
-   * {@code defaultValue} for any empty windows.
-   */
-  public static <T, W extends BoundedWindow> PCollectionView<T> singletonView(
-      Pipeline pipeline,
-      WindowingStrategy<?, W> windowingStrategy,
-      boolean hasDefault,
-      T defaultValue,
-      Coder<T> valueCoder) {
-    return new SingletonPCollectionView<>(
-        pipeline, windowingStrategy, hasDefault, defaultValue, valueCoder);
-  }
-
-  /**
-   * Returns a {@code PCollectionView<Iterable<T>>} capable of processing elements encoded using the
-   * provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
-   */
-  public static <T, W extends BoundedWindow> PCollectionView<Iterable<T>> iterableView(
-      Pipeline pipeline,
-      WindowingStrategy<?, W> windowingStrategy,
-      Coder<T> valueCoder) {
-    return new IterablePCollectionView<>(pipeline, windowingStrategy, valueCoder);
-  }
-
-  /**
-   * Returns a {@code PCollectionView<List<T>>} capable of processing elements encoded using the
-   * provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
-   */
-  public static <T, W extends BoundedWindow> PCollectionView<List<T>> listView(
-      Pipeline pipeline,
-      WindowingStrategy<?, W> windowingStrategy,
-      Coder<T> valueCoder) {
-    return new ListPCollectionView<>(pipeline, windowingStrategy, valueCoder);
-  }
-
-  /**
-   * Returns a {@code PCollectionView<Map<K, V>>} capable of processing elements encoded using the
-   * provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
-   */
-  public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, V>> mapView(
-      Pipeline pipeline,
-      WindowingStrategy<?, W> windowingStrategy,
-      Coder<KV<K, V>> valueCoder) {
-
-    return new MapPCollectionView<K, V, W>(pipeline, windowingStrategy, valueCoder);
-  }
-
-  /**
-   * Returns a {@code PCollectionView<Map<K, Iterable<V>>>} capable of processing elements encoded
-   * using the provided {@link Coder} and windowed using the provided {@link WindowingStrategy}.
-   */
-  public static <K, V, W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>> multimapView(
-      Pipeline pipeline,
-      WindowingStrategy<?, W> windowingStrategy,
-      Coder<KV<K, V>> valueCoder) {
-    return new MultimapPCollectionView<K, V, W>(pipeline, windowingStrategy, valueCoder);
-  }
-
-  /**
-   * Implementation of conversion of singleton {@code Iterable<WindowedValue<T>>} to {@code T}.
-   *
-   * <p>For internal use only.
-   *
-   * <p>Instantiate via {@link PCollectionViews#singletonView}.
-   */
-  public static class SingletonPCollectionView<T, W extends BoundedWindow>
-     extends PCollectionViewBase<T, T, W> {
-    @Nullable private byte[] encodedDefaultValue;
-    @Nullable private transient T defaultValue;
-    @Nullable private Coder<T> valueCoder;
-    private boolean hasDefault;
-
-    private SingletonPCollectionView(
-        Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy,
-        boolean hasDefault, T defaultValue, Coder<T> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
-      this.hasDefault = hasDefault;
-      this.defaultValue = defaultValue;
-      this.valueCoder = valueCoder;
-      if (hasDefault) {
-        try {
-          this.encodedDefaultValue = CoderUtils.encodeToByteArray(valueCoder, defaultValue);
-        } catch (IOException e) {
-          throw new RuntimeException("Unexpected IOException: ", e);
-        }
-      }
-    }
-
-    /**
-     * Returns the default value that was specified.
-     *
-     * <p>For internal use only.
-     *
-     * @throws NoSuchElementException if no default was specified.
-     */
-    public T getDefaultValue() {
-      if (!hasDefault) {
-        throw new NoSuchElementException("Empty PCollection accessed as a singleton view.");
-      }
-      // Lazily decode the default value once
-      synchronized (this) {
-        if (encodedDefaultValue != null) {
-          try {
-            defaultValue = CoderUtils.decodeFromByteArray(valueCoder, encodedDefaultValue);
-            encodedDefaultValue = null;
-          } catch (IOException e) {
-            throw new RuntimeException("Unexpected IOException: ", e);
-          }
-        }
-      }
-      return defaultValue;
-    }
-
-    @Override
-    protected T fromElements(Iterable<WindowedValue<T>> contents) {
-      try {
-        return Iterables.getOnlyElement(contents).getValue();
-      } catch (NoSuchElementException exc) {
-        return getDefaultValue();
-      } catch (IllegalArgumentException exc) {
-        throw new IllegalArgumentException(
-            "PCollection with more than one element "
-            + "accessed as a singleton view.");
-      }
-    }
-  }
-
-  /**
-   * Implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code Iterable<T>}.
-   *
-   * <p>For internal use only.
-   *
-   * <p>Instantiate via {@link PCollectionViews#iterableView}.
-   */
-  public static class IterablePCollectionView<T, W extends BoundedWindow>
-      extends PCollectionViewBase<T, Iterable<T>, W> {
-    private IterablePCollectionView(
-        Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy, Coder<T> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
-    }
-
-    @Override
-    protected Iterable<T> fromElements(Iterable<WindowedValue<T>> contents) {
-      return Iterables.unmodifiableIterable(
-          Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
-        @SuppressWarnings("unchecked")
-        @Override
-        public T apply(WindowedValue<T> input) {
-          return input.getValue();
-        }
-      }));
-    }
-  }
-
-  /**
-   * Implementation of conversion {@code Iterable<WindowedValue<T>>} to {@code List<T>}.
-   *
-   * <p>For internal use only.
-   *
-   * <p>Instantiate via {@link PCollectionViews#listView}.
-   */
-  public static class ListPCollectionView<T, W extends BoundedWindow>
-      extends PCollectionViewBase<T, List<T>, W> {
-    private ListPCollectionView(
-        Pipeline pipeline, WindowingStrategy<?, W> windowingStrategy, Coder<T> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
-    }
-
-    @Override
-    protected List<T> fromElements(Iterable<WindowedValue<T>> contents) {
-      return ImmutableList.copyOf(
-          Iterables.transform(contents, new Function<WindowedValue<T>, T>() {
-            @SuppressWarnings("unchecked")
-            @Override
-            public T apply(WindowedValue<T> input) {
-              return input.getValue();
-            }
-          }));
-    }
-  }
-
-  /**
-   * Implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>>}
-   * to {@code Map<K, Iterable<V>>}.
-   *
-   * <p>For internal use only.
-   */
-  public static class MultimapPCollectionView<K, V, W extends BoundedWindow>
-      extends PCollectionViewBase<KV<K, V>, Map<K, Iterable<V>>, W> {
-    private MultimapPCollectionView(
-        Pipeline pipeline,
-        WindowingStrategy<?, W> windowingStrategy,
-        Coder<KV<K, V>> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
-    }
-
-    @Override
-    protected Map<K, Iterable<V>> fromElements(Iterable<WindowedValue<KV<K, V>>> elements) {
-      Multimap<K, V> multimap = HashMultimap.create();
-      for (WindowedValue<KV<K, V>> elem : elements) {
-        KV<K, V> kv = elem.getValue();
-        multimap.put(kv.getKey(), kv.getValue());
-      }
-      // Safe covariant cast that Java cannot express without rawtypes, even with unchecked casts
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Map<K, Iterable<V>> resultMap = (Map) multimap.asMap();
-      return Collections.unmodifiableMap(resultMap);
-    }
-  }
-
-  /**
-   * Implementation of conversion {@code Iterable<WindowedValue<KV<K, V>>} with
-   * one value per key to {@code Map<K, V>}.
-   *
-   * <p>For internal use only.
-   */
-  public static class MapPCollectionView<K, V, W extends BoundedWindow>
-      extends PCollectionViewBase<KV<K, V>, Map<K, V>, W> {
-    private MapPCollectionView(
-        Pipeline pipeline,
-        WindowingStrategy<?, W> windowingStrategy,
-        Coder<KV<K, V>> valueCoder) {
-      super(pipeline, windowingStrategy, valueCoder);
-    }
-
-    /**
-     * Input iterable must actually be {@code Iterable<WindowedValue<KV<K, V>>>}.
-     */
-    @Override
-    protected Map<K, V> fromElements(Iterable<WindowedValue<KV<K, V>>> elements) {
-      Map<K, V> map = new HashMap<>();
-      for (WindowedValue<KV<K, V>> elem : elements) {
-        KV<K, V> kv = elem.getValue();
-        if (map.containsKey(kv.getKey())) {
-          throw new IllegalArgumentException("Duplicate values for " + kv.getKey());
-        }
-        map.put(kv.getKey(), kv.getValue());
-      }
-      return Collections.unmodifiableMap(map);
-    }
-  }
-
-  /**
-   * A base class for {@link PCollectionView} implementations, with additional type parameters
-   * that are not visible at pipeline assembly time when the view is used as a side input.
-   */
-  private abstract static class PCollectionViewBase<ElemT, ViewT, W extends BoundedWindow>
-      extends PValueBase
-      implements PCollectionView<ViewT> {
-    /** A unique tag for the view, typed according to the elements underlying the view. */
-    private TupleTag<Iterable<WindowedValue<ElemT>>> tag;
-
-    /** The windowing strategy for the PCollection underlying the view. */
-    private WindowingStrategy<?, W> windowingStrategy;
-
-    /** The coder for the elements underlying the view. */
-    private Coder<Iterable<WindowedValue<ElemT>>> coder;
-
-    /**
-     * Implement this to complete the implementation. It is a conversion function from
-     * all of the elements of the underlying {@link PCollection} to the value of the view.
-     */
-    protected abstract ViewT fromElements(Iterable<WindowedValue<ElemT>> elements);
-
-    /**
-     * Call this constructor to initialize the fields for which this base class provides
-     * boilerplate accessors.
-     */
-    protected PCollectionViewBase(
-        Pipeline pipeline,
-        TupleTag<Iterable<WindowedValue<ElemT>>> tag,
-        WindowingStrategy<?, W> windowingStrategy,
-        Coder<ElemT> valueCoder) {
-      super(pipeline);
-      if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
-        throw new IllegalArgumentException("WindowFn of PCollectionView cannot be InvalidWindows");
-      }
-      this.tag = tag;
-      this.windowingStrategy = windowingStrategy;
-      this.coder =
-          IterableCoder.of(WindowedValue.getFullCoder(
-              valueCoder, windowingStrategy.getWindowFn().windowCoder()));
-    }
-
-    /**
-     * Call this constructor to initialize the fields for which this base class provides
-     * boilerplate accessors, with an auto-generated tag.
-     */
-    protected PCollectionViewBase(
-        Pipeline pipeline,
-        WindowingStrategy<?, W> windowingStrategy,
-        Coder<ElemT> valueCoder) {
-      this(pipeline, new TupleTag<Iterable<WindowedValue<ElemT>>>(), windowingStrategy, valueCoder);
-    }
-
-    /**
-     * For serialization only. Do not use directly. Subclasses should call from their own
-     * protected no-argument constructor.
-     */
-    @SuppressWarnings("unused")  // used for serialization
-    protected PCollectionViewBase() {
-      super();
-    }
-
-    @Override
-    public ViewT fromIterableInternal(Iterable<WindowedValue<?>> elements) {
-      // Safe cast: it is required that the rest of the SDK maintain the invariant
-      // that a PCollectionView is only provided an iterable for the elements of an
-      // appropriately typed PCollection.
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      Iterable<WindowedValue<ElemT>> typedElements = (Iterable) elements;
-      return fromElements(typedElements);
-    }
-
-    /**
-     * Returns a unique {@link TupleTag} identifying this {@link PCollectionView}.
-     *
-     * <p>For internal use only by runner implementors.
-     */
-    @Override
-    public TupleTag<Iterable<WindowedValue<?>>> getTagInternal() {
-      // Safe cast: It is required that the rest of the SDK maintain the invariant that
-      // this tag is only used to access the contents of an appropriately typed underlying
-      // PCollection
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      TupleTag<Iterable<WindowedValue<?>>> untypedTag = (TupleTag) tag;
-      return untypedTag;
-    }
-
-    /**
-     * Returns the {@link WindowingStrategy} of this {@link PCollectionView}, which should
-     * be that of the underlying {@link PCollection}.
-     *
-     * <p>For internal use only by runner implementors.
-     */
-    @Override
-    public WindowingStrategy<?, ?> getWindowingStrategyInternal() {
-      return windowingStrategy;
-    }
-
-    @Override
-    public Coder<Iterable<WindowedValue<?>>> getCoderInternal() {
-      // Safe cast: It is required that the rest of the SDK only use this untyped coder
-      // for the elements of an appropriately typed underlying PCollection.
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      Coder<Iterable<WindowedValue<?>>> untypedCoder = (Coder) coder;
-      return untypedCoder;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(tag);
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (!(other instanceof PCollectionView) || other == null) {
-        return false;
-      }
-      @SuppressWarnings("unchecked")
-      PCollectionView<?> otherView = (PCollectionView<?>) other;
-      return tag.equals(otherView.getTagInternal());
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(this).add("tag", tag).toString();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
deleted file mode 100644
index 5b87b5c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PTuple.java
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-/**
- * A {@code PTuple} is an immutable tuple of
- * heterogeneously-typed values, "keyed" by {@link TupleTag}s.
- *
- * <p>PTuples can be created and accessed like follows:
- * <pre> {@code
- * String v1 = ...;
- * Integer v2 = ...;
- * Iterable<String> v3 = ...;
- *
- * // Create TupleTags for each of the values to put in the
- * // PTuple (the type of the TupleTag enables tracking the
- * // static type of each of the values in the PTuple):
- * TupleTag<String> tag1 = new TupleTag<>();
- * TupleTag<Integer> tag2 = new TupleTag<>();
- * TupleTag<Iterable<String>> tag3 = new TupleTag<>();
- *
- * // Create a PTuple with three values:
- * PTuple povs =
- *     PTuple.of(tag1, v1)
- *         .and(tag2, v2)
- *         .and(tag3, v3);
- *
- * // Create an empty PTuple:
- * Pipeline p = ...;
- * PTuple povs2 = PTuple.empty(p);
- *
- * // Get values out of a PTuple, using the same tags
- * // that were used to put them in:
- * Integer vX = povs.get(tag2);
- * String vY = povs.get(tag1);
- * Iterable<String> vZ = povs.get(tag3);
- *
- * // Get a map of all values in a PTuple:
- * Map<TupleTag<?>, ?> allVs = povs.getAll();
- * } </pre>
- */
-public class PTuple {
-  /**
-   * Returns an empty PTuple.
-   *
-   * <p>Longer PTuples can be created by calling
-   * {@link #and} on the result.
-   */
-  public static PTuple empty() {
-    return new PTuple();
-  }
-
-  /**
-   * Returns a singleton PTuple containing the given
-   * value keyed by the given TupleTag.
-   *
-   * <p>Longer PTuples can be created by calling
-   * {@link #and} on the result.
-   */
-  public static <V> PTuple of(TupleTag<V> tag, V value) {
-    return empty().and(tag, value);
-  }
-
-  /**
-   * Returns a new PTuple that has all the values and
-   * tags of this PTuple plus the given value and tag.
-   *
-   * <p>The given TupleTag should not already be mapped to a
-   * value in this PTuple.
-   */
-  public <V> PTuple and(TupleTag<V> tag, V value) {
-    Map<TupleTag<?>, Object> newMap = new LinkedHashMap<TupleTag<?>, Object>();
-    newMap.putAll(valueMap);
-    newMap.put(tag, value);
-    return new PTuple(newMap);
-  }
-
-  /**
-   * Returns whether this PTuple contains a value with
-   * the given tag.
-   */
-  public <V> boolean has(TupleTag<V> tag) {
-    return valueMap.containsKey(tag);
-  }
-
-  /**
-   * Returns true if this {@code PTuple} is empty.
-   */
-  public boolean isEmpty() {
-    return valueMap.isEmpty();
-  }
-
-  /**
-   * Returns the value with the given tag in this
-   * PTuple.  Throws IllegalArgumentException if there is no
-   * such value, i.e., {@code !has(tag)}.
-   */
-  public <V> V get(TupleTag<V> tag) {
-    if (!has(tag)) {
-      throw new IllegalArgumentException(
-          "TupleTag not found in this PTuple");
-    }
-    @SuppressWarnings("unchecked")
-    V value = (V) valueMap.get(tag);
-    return value;
-  }
-
-  /**
-   * Returns an immutable Map from TupleTag to corresponding
-   * value, for all the members of this PTuple.
-   */
-  public Map<TupleTag<?>, ?> getAll() {
-    return valueMap;
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal details below here.
-
-  private final Map<TupleTag<?>, ?> valueMap;
-
-  @SuppressWarnings("rawtypes")
-  private PTuple() {
-    this(new LinkedHashMap());
-  }
-
-  private PTuple(Map<TupleTag<?>, ?> valueMap) {
-    this.valueMap = Collections.unmodifiableMap(valueMap);
-  }
-
-  /**
-   * Returns a PTuple with each of the given tags mapping
-   * to the corresponding value.
-   *
-   * <p>For internal use only.
-   */
-  public static PTuple ofInternal(Map<TupleTag<?>, ?> valueMap) {
-    return new PTuple(valueMap);
-  }
-}

[06/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java
deleted file mode 100644
index 773b65f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ZipFiles.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.Iterators;
-import com.google.common.io.ByteSource;
-import com.google.common.io.CharSource;
-import com.google.common.io.Closer;
-import com.google.common.io.Files;
-
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipOutputStream;
-
-/**
- * Functions for zipping a directory (including a subdirectory) into a ZIP-file
- * or unzipping it again.
- */
-public final class ZipFiles {
-  private ZipFiles() {}
-
-  /**
-   * Returns a new {@link ByteSource} for reading the contents of the given
-   * entry in the given zip file.
-   */
-  static ByteSource asByteSource(ZipFile file, ZipEntry entry) {
-    return new ZipEntryByteSource(file, entry);
-  }
-
-  /**
-   * Returns a new {@link CharSource} for reading the contents of the given
-   * entry in the given zip file as text using the given charset.
-   */
-  static CharSource asCharSource(
-      ZipFile file, ZipEntry entry, Charset charset) {
-    return asByteSource(file, entry).asCharSource(charset);
-  }
-
-  private static final class ZipEntryByteSource extends ByteSource {
-
-    private final ZipFile file;
-    private final ZipEntry entry;
-
-    ZipEntryByteSource(ZipFile file, ZipEntry entry) {
-      this.file = checkNotNull(file);
-      this.entry = checkNotNull(entry);
-    }
-
-    @Override
-    public InputStream openStream() throws IOException {
-      return file.getInputStream(entry);
-    }
-
-    // TODO: implement size() to try calling entry.getSize()?
-
-    @Override
-    public String toString() {
-      return "ZipFiles.asByteSource(" + file + ", " + entry + ")";
-    }
-  }
-
-  /**
-   * Returns a {@link FluentIterable} of all the entries in the given zip file.
-   */
-  // unmodifiable Iterator<? extends ZipEntry> can be safely cast
-  // to Iterator<ZipEntry>
-  @SuppressWarnings("unchecked")
-  static FluentIterable<ZipEntry> entries(final ZipFile file) {
-    checkNotNull(file);
-    return new FluentIterable<ZipEntry>() {
-      @Override
-      public Iterator<ZipEntry> iterator() {
-        return (Iterator<ZipEntry>) Iterators.forEnumeration(file.entries());
-      }
-    };
-  }
-
-  /**
-   * Unzips the zip file specified by the path and creates the directory structure <i>inside</i>
-   * the target directory. Refuses to unzip files that refer to a parent directory, for security
-   * reasons.
-   *
-   * @param zipFile the source zip-file to unzip
-   * @param targetDirectory the directory to unzip to. If the zip-file contains
-   *     any subdirectories, they will be created within our target directory.
-   * @throws IOException the unzipping failed, e.g. because the output was not writable, the {@code
-   *     zipFile} was not readable, or contains an illegal entry (contains "..", pointing outside
-   *     the target directory)
-   * @throws IllegalArgumentException the target directory is not a valid directory (e.g. does not
-   *     exist, or is a file instead of a directory)
-   */
-  static void unzipFile(
-      File zipFile,
-      File targetDirectory) throws IOException {
-    checkNotNull(zipFile);
-    checkNotNull(targetDirectory);
-    checkArgument(
-        targetDirectory.isDirectory(),
-        "%s is not a valid directory",
-        targetDirectory.getAbsolutePath());
-    final ZipFile zipFileObj = new ZipFile(zipFile);
-    try {
-      for (ZipEntry entry : entries(zipFileObj)) {
-        checkName(entry.getName());
-        File targetFile = new File(targetDirectory, entry.getName());
-        if (entry.isDirectory()) {
-          if (!targetFile.isDirectory() && !targetFile.mkdirs()) {
-            throw new IOException(
-                "Failed to create directory: " + targetFile.getAbsolutePath());
-          }
-        } else {
-          File parentFile = targetFile.getParentFile();
-          if (!parentFile.isDirectory()) {
-            if (!parentFile.mkdirs()) {
-              throw new IOException(
-                  "Failed to create directory: "
-                  + parentFile.getAbsolutePath());
-            }
-          }
-          // Write the file to the destination.
-          asByteSource(zipFileObj, entry).copyTo(Files.asByteSink(targetFile));
-        }
-      }
-    } finally {
-      zipFileObj.close();
-    }
-  }
-
-  /**
-   * Checks that the given entry name is legal for unzipping: if it contains
-   * ".." as a name element, it could cause the entry to be unzipped outside
-   * the directory we're unzipping to.
-   *
-   * @throws IOException if the name is illegal
-   */
-  private static void checkName(String name) throws IOException {
-    // First just check whether the entry name string contains "..".
-    // This should weed out the the vast majority of entries, which will not
-    // contain "..".
-    if (name.contains("..")) {
-      // If the string does contain "..", break it down into its actual name
-      // elements to ensure it actually contains ".." as a name, not just a
-      // name like "foo..bar" or even "foo..", which should be fine.
-      File file = new File(name);
-      while (file != null) {
-        if (file.getName().equals("..")) {
-          throw new IOException("Cannot unzip file containing an entry with "
-              + "\"..\" in the name: " + name);
-        }
-        file = file.getParentFile();
-      }
-    }
-  }
-
-  /**
-   * Zips an entire directory specified by the path.
-   *
-   * @param sourceDirectory the directory to read from. This directory and all
-   *     subdirectories will be added to the zip-file. The path within the zip
-   *     file is relative to the directory given as parameter, not absolute.
-   * @param zipFile the zip-file to write to.
-   * @throws IOException the zipping failed, e.g. because the input was not
-   *     readable.
-   */
-  static void zipDirectory(
-      File sourceDirectory,
-      File zipFile) throws IOException {
-    checkNotNull(sourceDirectory);
-    checkNotNull(zipFile);
-    checkArgument(
-        sourceDirectory.isDirectory(),
-        "%s is not a valid directory",
-        sourceDirectory.getAbsolutePath());
-    checkArgument(
-        !zipFile.exists(),
-        "%s does already exist, files are not being overwritten",
-        zipFile.getAbsolutePath());
-    Closer closer = Closer.create();
-    try {
-      OutputStream outputStream = closer.register(new BufferedOutputStream(
-          new FileOutputStream(zipFile)));
-      zipDirectory(sourceDirectory, outputStream);
-    } catch (Throwable t) {
-      throw closer.rethrow(t);
-    } finally {
-      closer.close();
-    }
-  }
-
-  /**
-   * Zips an entire directory specified by the path.
-   *
-   * @param sourceDirectory the directory to read from. This directory and all
-   *     subdirectories will be added to the zip-file. The path within the zip
-   *     file is relative to the directory given as parameter, not absolute.
-   * @param outputStream the stream to write the zip-file to. This method does not close
-   *     outputStream.
-   * @throws IOException the zipping failed, e.g. because the input was not
-   *     readable.
-   */
-  static void zipDirectory(
-      File sourceDirectory,
-      OutputStream outputStream) throws IOException {
-    checkNotNull(sourceDirectory);
-    checkNotNull(outputStream);
-    checkArgument(
-        sourceDirectory.isDirectory(),
-        "%s is not a valid directory",
-        sourceDirectory.getAbsolutePath());
-    ZipOutputStream zos = new ZipOutputStream(outputStream);
-    for (File file : sourceDirectory.listFiles()) {
-      zipDirectoryInternal(file, "", zos);
-    }
-    zos.finish();
-  }
-
-  /**
-   * Private helper function for zipping files. This one goes recursively
-   * through the input directory and all of its subdirectories and adds the
-   * single zip entries.
-   *
-   * @param inputFile the file or directory to be added to the zip file
-   * @param directoryName the string-representation of the parent directory
-   *     name. Might be an empty name, or a name containing multiple directory
-   *     names separated by "/". The directory name must be a valid name
-   *     according to the file system limitations. The directory name should be
-   *     empty or should end in "/".
-   * @param zos the zipstream to write to
-   * @throws IOException the zipping failed, e.g. because the output was not
-   *     writeable.
-   */
-  private static void zipDirectoryInternal(
-      File inputFile,
-      String directoryName,
-      ZipOutputStream zos) throws IOException {
-    String entryName = directoryName + inputFile.getName();
-    if (inputFile.isDirectory()) {
-      entryName += "/";
-
-      // We are hitting a sub-directory. Recursively add children to zip in deterministic,
-      // sorted order.
-      File[] childFiles = inputFile.listFiles();
-      if (childFiles.length > 0) {
-        Arrays.sort(childFiles);
-        // loop through the directory content, and zip the files
-        for (File file : childFiles) {
-          zipDirectoryInternal(file, entryName, zos);
-        }
-
-        // Since this directory has children, exit now without creating a zipentry specific to
-        // this directory. The entry for a non-entry directory is incompatible with certain
-        // implementations of unzip.
-        return;
-      }
-    }
-
-    // Put the zip-entry for this file or empty directory into the zipoutputstream.
-    ZipEntry entry = new ZipEntry(entryName);
-    entry.setTime(inputFile.lastModified());
-    zos.putNextEntry(entry);
-
-    // Copy file contents into zipoutput stream.
-    if (inputFile.isFile()) {
-      Files.asByteSource(inputFile).copyTo(zos);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
deleted file mode 100644
index 2c1985c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Counter.java
+++ /dev/null
@@ -1,1103 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.AND;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.MEAN;
-import static com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind.OR;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.util.concurrent.AtomicDouble;
-
-import java.util.Objects;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.atomic.AtomicReference;
-
-import javax.annotation.Nullable;
-
-/**
- * A Counter enables the aggregation of a stream of values over time.  The
- * cumulative aggregate value is updated as new values are added, or it can be
- * reset to a new value.  Multiple kinds of aggregation are supported depending
- * on the type of the counter.
- *
- * <p>Counters compare using value equality of their name, kind, and
- * cumulative value.  Equal counters should have equal toString()s.
- *
- * @param <T> the type of values aggregated by this counter
- */
-public abstract class Counter<T> {
-  /**
-   * Possible kinds of counter aggregation.
-   */
-  public static enum AggregationKind {
-
-    /**
-     * Computes the sum of all added values.
-     * Applicable to {@link Integer}, {@link Long}, and {@link Double} values.
-     */
-    SUM,
-
-    /**
-     * Computes the maximum value of all added values.
-     * Applicable to {@link Integer}, {@link Long}, and {@link Double} values.
-     */
-    MAX,
-
-    /**
-     * Computes the minimum value of all added values.
-     * Applicable to {@link Integer}, {@link Long}, and {@link Double} values.
-     */
-    MIN,
-
-    /**
-     * Computes the arithmetic mean of all added values.  Applicable to
-     * {@link Integer}, {@link Long}, and {@link Double} values.
-     */
-    MEAN,
-
-    /**
-     * Computes boolean AND over all added values.
-     * Applicable only to {@link Boolean} values.
-     */
-    AND,
-
-    /**
-     * Computes boolean OR over all added values. Applicable only to
-     * {@link Boolean} values.
-     */
-    OR
-    // TODO: consider adding VECTOR_SUM, HISTOGRAM, KV_SET, PRODUCT, TOP.
-  }
-
-  /**
-   * Constructs a new {@link Counter} that aggregates {@link Integer}, values
-   * according to the desired aggregation kind. The supported aggregation kinds
-   * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
-   * {@link AggregationKind#MAX}, and {@link AggregationKind#MEAN}.
-   * This is a convenience wrapper over a
-   * {@link Counter} implementation that aggregates {@link Long} values. This is
-   * useful when the application handles (boxed) {@link Integer} values that
-   * are not readily convertible to the (boxed) {@link Long} values otherwise
-   * expected by the {@link Counter} implementation aggregating {@link Long}
-   * values.
-   *
-   * @param name the name of the new counter
-   * @param kind the new counter's aggregation kind
-   * @return the newly constructed Counter
-   * @throws IllegalArgumentException if the aggregation kind is not supported
-   */
-  public static Counter<Integer> ints(String name, AggregationKind kind) {
-    return new IntegerCounter(name, kind);
-  }
-
-  /**
-   * Constructs a new {@link Counter} that aggregates {@link Long} values
-   * according to the desired aggregation kind. The supported aggregation kinds
-   * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
-   * {@link AggregationKind#MAX}, and {@link AggregationKind#MEAN}.
-   *
-   * @param name the name of the new counter
-   * @param kind the new counter's aggregation kind
-   * @return the newly constructed Counter
-   * @throws IllegalArgumentException if the aggregation kind is not supported
-   */
-  public static Counter<Long> longs(String name, AggregationKind kind) {
-    return new LongCounter(name, kind);
-  }
-
-  /**
-   * Constructs a new {@link Counter} that aggregates {@link Double} values
-   * according to the desired aggregation kind. The supported aggregation kinds
-   * are {@link AggregationKind#SUM}, {@link AggregationKind#MIN},
-   * {@link AggregationKind#MAX}, and {@link AggregationKind#MEAN}.
-   *
-   * @param name the name of the new counter
-   * @param kind the new counter's aggregation kind
-   * @return the newly constructed Counter
-   * @throws IllegalArgumentException if the aggregation kind is not supported
-   */
-  public static Counter<Double> doubles(String name, AggregationKind kind) {
-    return new DoubleCounter(name, kind);
-  }
-
-  /**
-   * Constructs a new {@link Counter} that aggregates {@link Boolean} values
-   * according to the desired aggregation kind. The only supported aggregation
-   * kinds are {@link AggregationKind#AND} and {@link AggregationKind#OR}.
-   *
-   * @param name the name of the new counter
-   * @param kind the new counter's aggregation kind
-   * @return the newly constructed Counter
-   * @throws IllegalArgumentException if the aggregation kind is not supported
-   */
-  public static Counter<Boolean> booleans(String name, AggregationKind kind) {
-    return new BooleanCounter(name, kind);
-  }
-
-  /**
-   * Constructs a new {@link Counter} that aggregates {@link String} values
-   * according to the desired aggregation kind. The only supported aggregation
-   * kind is {@link AggregationKind#MIN} and {@link AggregationKind#MAX}.
-   *
-   * @param name the name of the new counter
-   * @param kind the new counter's aggregation kind
-   * @return the newly constructed Counter
-   * @throws IllegalArgumentException if the aggregation kind is not supported
-   */
-  @SuppressWarnings("unused")
-  private static Counter<String> strings(String name, AggregationKind kind) {
-    return new StringCounter(name, kind);
-  }
-
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Adds a new value to the aggregation stream. Returns this (to allow method
-   * chaining).
-   */
-  public abstract Counter<T> addValue(T value);
-
-  /**
-   * Resets the aggregation stream to this new value. This aggregator must not
-   * be a MEAN aggregator. Returns this (to allow method chaining).
-   */
-  public abstract Counter<T> resetToValue(T value);
-
-  /**
-   * Resets the aggregation stream to this new value. Returns this (to allow
-   * method chaining). The value of elementCount must be non-negative, and this
-   * aggregator must be a MEAN aggregator.
-   */
-  public abstract Counter<T> resetMeanToValue(long elementCount, T value);
-
-  /**
-   * Resets the counter's delta value to have no values accumulated and returns
-   * the value of the delta prior to the reset.
-   *
-   * @return the aggregate delta at the time this method is called
-   */
-  public abstract T getAndResetDelta();
-
-  /**
-   * Resets the counter's delta value to have no values accumulated and returns
-   * the value of the delta prior to the reset, for a MEAN counter.
-   *
-   * @return the mean delta t the time this method is called
-   */
-  public abstract CounterMean<T> getAndResetMeanDelta();
-
-  /**
-   * Returns the counter's name.
-   */
-  public String getName() {
-    return name;
-  }
-
-  /**
-   * Returns the counter's aggregation kind.
-   */
-  public AggregationKind getKind() {
-    return kind;
-  }
-
-  /**
-   * Returns the counter's type.
-   */
-  public Class<?> getType() {
-    return new TypeDescriptor<T>(getClass()) {}.getRawType();
-  }
-
-  /**
-   * Returns the aggregated value, or the sum for MEAN aggregation, either
-   * total or, if delta, since the last update extraction or resetDelta.
-   */
-  public abstract T getAggregate();
-
-  /**
-   * The mean value of a {@code Counter}, represented as an aggregate value and
-   * a count.
-   *
-   * @param <T> the type of the aggregate
-   */
-  public static interface CounterMean<T> {
-    /**
-     * Gets the aggregate value of this {@code CounterMean}.
-     */
-    T getAggregate();
-
-    /**
-     * Gets the count of this {@code CounterMean}.
-     */
-    long getCount();
-  }
-
-  /**
-   * Returns the mean in the form of a CounterMean, or null if this is not a
-   * MEAN counter.
-   */
-  @Nullable
-  public abstract CounterMean<T> getMean();
-
-  /**
-   * Returns a string representation of the Counter. Useful for debugging logs.
-   * Example return value: "ElementCount:SUM(15)".
-   */
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append(getName());
-    sb.append(":");
-    sb.append(getKind());
-    sb.append("(");
-    switch (kind) {
-      case SUM:
-      case MAX:
-      case MIN:
-      case AND:
-      case OR:
-        sb.append(getAggregate());
-        break;
-      case MEAN:
-        sb.append(getMean());
-        break;
-      default:
-        throw illegalArgumentException();
-    }
-    sb.append(")");
-
-    return sb.toString();
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    } else if (o instanceof Counter) {
-      Counter<?> that = (Counter<?>) o;
-      if (this.name.equals(that.name) && this.kind == that.kind
-          && this.getClass().equals(that.getClass())) {
-        if (kind == MEAN) {
-          CounterMean<T> thisMean = this.getMean();
-          CounterMean<?> thatMean = that.getMean();
-          return thisMean == thatMean
-              || (Objects.equals(thisMean.getAggregate(), thatMean.getAggregate())
-                     && thisMean.getCount() == thatMean.getCount());
-        } else {
-          return Objects.equals(this.getAggregate(), that.getAggregate());
-        }
-      }
-    }
-    return false;
-  }
-
-  @Override
-  public int hashCode() {
-    if (kind == MEAN) {
-      CounterMean<T> mean = getMean();
-      return Objects.hash(getClass(), name, kind, mean.getAggregate(), mean.getCount());
-    } else {
-      return Objects.hash(getClass(), name, kind, getAggregate());
-    }
-  }
-
-  /**
-   * Returns whether this Counter is compatible with that Counter.  If
-   * so, they can be merged into a single Counter.
-   */
-  public boolean isCompatibleWith(Counter<?> that) {
-    return this.name.equals(that.name)
-        && this.kind == that.kind
-        && this.getClass().equals(that.getClass());
-  }
-
-  /**
-   * Merges this counter with the provided counter, returning this counter with the combined value
-   * of both counters. This may reset the delta of this counter.
-   *
-   * @throws IllegalArgumentException if the provided Counter is not compatible with this Counter
-   */
-  public abstract Counter<T> merge(Counter<T> that);
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  /** The name of this counter. */
-  protected final String name;
-
-  /** The kind of aggregation function to apply to this counter. */
-  protected final AggregationKind kind;
-
-  protected Counter(String name, AggregationKind kind) {
-    this.name = name;
-    this.kind = kind;
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Implements a {@link Counter} for {@link Long} values.
-   */
-  private static class LongCounter extends Counter<Long> {
-    private final AtomicLong aggregate;
-    private final AtomicLong deltaAggregate;
-    private final AtomicReference<LongCounterMean> mean;
-    private final AtomicReference<LongCounterMean> deltaMean;
-
-    /** Initializes a new {@link Counter} for {@link Long} values. */
-    private LongCounter(String name, AggregationKind kind) {
-      super(name, kind);
-      switch (kind) {
-        case MEAN:
-          mean = new AtomicReference<>();
-          deltaMean = new AtomicReference<>();
-          getAndResetMeanDelta();
-          mean.set(deltaMean.get());
-          aggregate = deltaAggregate = null;
-          break;
-        case SUM:
-        case MAX:
-        case MIN:
-          aggregate = new AtomicLong();
-          deltaAggregate = new AtomicLong();
-          getAndResetDelta();
-          aggregate.set(deltaAggregate.get());
-          mean = deltaMean = null;
-          break;
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public LongCounter addValue(Long value) {
-      switch (kind) {
-        case SUM:
-          aggregate.addAndGet(value);
-          deltaAggregate.addAndGet(value);
-          break;
-        case MEAN:
-          addToMeanAndSet(value, mean);
-          addToMeanAndSet(value, deltaMean);
-          break;
-        case MAX:
-          maxAndSet(value, aggregate);
-          maxAndSet(value, deltaAggregate);
-          break;
-        case MIN:
-          minAndSet(value, aggregate);
-          minAndSet(value, deltaAggregate);
-          break;
-        default:
-          throw illegalArgumentException();
-      }
-      return this;
-    }
-
-    private void minAndSet(Long value, AtomicLong target) {
-      long current;
-      long update;
-      do {
-        current = target.get();
-        update = Math.min(value, current);
-      } while (update < current && !target.compareAndSet(current, update));
-    }
-
-    private void maxAndSet(Long value, AtomicLong target) {
-      long current;
-      long update;
-      do {
-        current = target.get();
-        update = Math.max(value, current);
-      } while (update > current && !target.compareAndSet(current, update));
-    }
-
-    private void addToMeanAndSet(Long value, AtomicReference<LongCounterMean> target) {
-      LongCounterMean current;
-      LongCounterMean update;
-      do {
-        current = target.get();
-        update = new LongCounterMean(current.getAggregate() + value, current.getCount() + 1L);
-      } while (!target.compareAndSet(current, update));
-    }
-
-    @Override
-    public Long getAggregate() {
-      if (kind != MEAN) {
-        return aggregate.get();
-      } else {
-        return getMean().getAggregate();
-      }
-    }
-
-    @Override
-    public Long getAndResetDelta() {
-      switch (kind) {
-        case SUM:
-          return deltaAggregate.getAndSet(0L);
-        case MAX:
-          return deltaAggregate.getAndSet(Long.MIN_VALUE);
-        case MIN:
-          return deltaAggregate.getAndSet(Long.MAX_VALUE);
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public Counter<Long> resetToValue(Long value) {
-      if (kind == MEAN) {
-        throw illegalArgumentException();
-      }
-      aggregate.set(value);
-      deltaAggregate.set(value);
-      return this;
-    }
-
-    @Override
-    public Counter<Long> resetMeanToValue(long elementCount, Long value) {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      if (elementCount < 0) {
-        throw new IllegalArgumentException("elementCount must be non-negative");
-      }
-      LongCounterMean counterMean = new LongCounterMean(value, elementCount);
-      mean.set(counterMean);
-      deltaMean.set(counterMean);
-      return this;
-    }
-
-    @Override
-    public CounterMean<Long> getAndResetMeanDelta() {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      return deltaMean.getAndSet(new LongCounterMean(0L, 0L));
-    }
-
-    @Override
-    @Nullable
-    public CounterMean<Long> getMean() {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      return mean.get();
-    }
-
-    @Override
-    public Counter<Long> merge(Counter<Long> that) {
-      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
-      switch (kind) {
-        case SUM:
-        case MIN:
-        case MAX:
-          return addValue(that.getAggregate());
-        case MEAN:
-          CounterMean<Long> thisCounterMean = this.getMean();
-          CounterMean<Long> thatCounterMean = that.getMean();
-          return resetMeanToValue(
-              thisCounterMean.getCount() + thatCounterMean.getCount(),
-              thisCounterMean.getAggregate() + thatCounterMean.getAggregate());
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    private static class LongCounterMean implements CounterMean<Long> {
-      private final long aggregate;
-      private final long count;
-
-      public LongCounterMean(long aggregate, long count) {
-        this.aggregate = aggregate;
-        this.count = count;
-      }
-
-      @Override
-      public Long getAggregate() {
-        return aggregate;
-      }
-
-      @Override
-      public long getCount() {
-        return count;
-      }
-
-      @Override
-      public String toString() {
-        return aggregate + "/" + count;
-      }
-    }
-  }
-
-  /**
-   * Implements a {@link Counter} for {@link Double} values.
-   */
-  private static class DoubleCounter extends Counter<Double> {
-    AtomicDouble aggregate;
-    AtomicDouble deltaAggregate;
-    AtomicReference<DoubleCounterMean> mean;
-    AtomicReference<DoubleCounterMean> deltaMean;
-
-    /** Initializes a new {@link Counter} for {@link Double} values. */
-    private DoubleCounter(String name, AggregationKind kind) {
-      super(name, kind);
-      switch (kind) {
-        case MEAN:
-          aggregate = deltaAggregate = null;
-          mean = new AtomicReference<>();
-          deltaMean = new AtomicReference<>();
-          getAndResetMeanDelta();
-          mean.set(deltaMean.get());
-          break;
-        case SUM:
-        case MAX:
-        case MIN:
-          mean = deltaMean = null;
-          aggregate = new AtomicDouble();
-          deltaAggregate = new AtomicDouble();
-          getAndResetDelta();
-          aggregate.set(deltaAggregate.get());
-          break;
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public DoubleCounter addValue(Double value) {
-      switch (kind) {
-        case SUM:
-          aggregate.addAndGet(value);
-          deltaAggregate.addAndGet(value);
-          break;
-        case MEAN:
-          addToMeanAndSet(value, mean);
-          addToMeanAndSet(value, deltaMean);
-          break;
-        case MAX:
-          maxAndSet(value, aggregate);
-          maxAndSet(value, deltaAggregate);
-          break;
-        case MIN:
-          minAndSet(value, aggregate);
-          minAndSet(value, deltaAggregate);
-          break;
-        default:
-          throw illegalArgumentException();
-      }
-      return this;
-    }
-
-    private void addToMeanAndSet(Double value, AtomicReference<DoubleCounterMean> target) {
-      DoubleCounterMean current;
-      DoubleCounterMean update;
-      do {
-        current = target.get();
-        update = new DoubleCounterMean(current.getAggregate() + value, current.getCount() + 1);
-      } while (!target.compareAndSet(current, update));
-    }
-
-    private void maxAndSet(Double value, AtomicDouble target) {
-      double current;
-      double update;
-      do {
-        current = target.get();
-        update = Math.max(current, value);
-      } while (update > current && !target.compareAndSet(current, update));
-    }
-
-    private void minAndSet(Double value, AtomicDouble target) {
-      double current;
-      double update;
-      do {
-        current = target.get();
-        update = Math.min(current, value);
-      } while (update < current && !target.compareAndSet(current, update));
-    }
-
-    @Override
-    public Double getAndResetDelta() {
-      switch (kind) {
-        case SUM:
-          return deltaAggregate.getAndSet(0.0);
-        case MAX:
-          return deltaAggregate.getAndSet(Double.NEGATIVE_INFINITY);
-        case MIN:
-          return deltaAggregate.getAndSet(Double.POSITIVE_INFINITY);
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public Counter<Double> resetToValue(Double value) {
-      if (kind == MEAN) {
-        throw illegalArgumentException();
-      }
-      aggregate.set(value);
-      deltaAggregate.set(value);
-      return this;
-    }
-
-    @Override
-    public Counter<Double> resetMeanToValue(long elementCount, Double value) {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      if (elementCount < 0) {
-        throw new IllegalArgumentException("elementCount must be non-negative");
-      }
-      DoubleCounterMean counterMean = new DoubleCounterMean(value, elementCount);
-      mean.set(counterMean);
-      deltaMean.set(counterMean);
-      return this;
-    }
-
-    @Override
-    public CounterMean<Double> getAndResetMeanDelta() {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      return deltaMean.getAndSet(new DoubleCounterMean(0.0, 0L));
-    }
-
-    @Override
-    public Double getAggregate() {
-      if (kind != MEAN) {
-        return aggregate.get();
-      } else {
-        return getMean().getAggregate();
-      }
-    }
-
-    @Override
-    @Nullable
-    public CounterMean<Double> getMean() {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      return mean.get();
-    }
-
-    @Override
-    public Counter<Double> merge(Counter<Double> that) {
-      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
-      switch (kind) {
-        case SUM:
-        case MIN:
-        case MAX:
-          return addValue(that.getAggregate());
-        case MEAN:
-          CounterMean<Double> thisCounterMean = this.getMean();
-          CounterMean<Double> thatCounterMean = that.getMean();
-          return resetMeanToValue(
-              thisCounterMean.getCount() + thatCounterMean.getCount(),
-              thisCounterMean.getAggregate() + thatCounterMean.getAggregate());
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    private static class DoubleCounterMean implements CounterMean<Double> {
-      private final double aggregate;
-      private final long count;
-
-      public DoubleCounterMean(double aggregate, long count) {
-        this.aggregate = aggregate;
-        this.count = count;
-      }
-
-      @Override
-      public Double getAggregate() {
-        return aggregate;
-      }
-
-      @Override
-      public long getCount() {
-        return count;
-      }
-
-      @Override
-      public String toString() {
-        return aggregate + "/" + count;
-      }
-    }
-  }
-
-  /**
-   * Implements a {@link Counter} for {@link Boolean} values.
-   */
-  private static class BooleanCounter extends Counter<Boolean> {
-    private final AtomicBoolean aggregate;
-    private final AtomicBoolean deltaAggregate;
-
-    /** Initializes a new {@link Counter} for {@link Boolean} values. */
-    private BooleanCounter(String name, AggregationKind kind) {
-      super(name, kind);
-      aggregate = new AtomicBoolean();
-      deltaAggregate = new AtomicBoolean();
-      getAndResetDelta();
-      aggregate.set(deltaAggregate.get());
-    }
-
-    @Override
-    public BooleanCounter addValue(Boolean value) {
-      if (kind.equals(AND) && !value) {
-        aggregate.set(value);
-        deltaAggregate.set(value);
-      } else if (kind.equals(OR) && value) {
-        aggregate.set(value);
-        deltaAggregate.set(value);
-      }
-      return this;
-    }
-
-    @Override
-    public Boolean getAndResetDelta() {
-      switch (kind) {
-        case AND:
-          return deltaAggregate.getAndSet(true);
-        case OR:
-          return deltaAggregate.getAndSet(false);
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public Counter<Boolean> resetToValue(Boolean value) {
-      aggregate.set(value);
-      deltaAggregate.set(value);
-      return this;
-    }
-
-    @Override
-    public Counter<Boolean> resetMeanToValue(long elementCount, Boolean value) {
-      throw illegalArgumentException();
-    }
-
-    @Override
-    public CounterMean<Boolean> getAndResetMeanDelta() {
-      throw illegalArgumentException();
-    }
-
-    @Override
-    public Boolean getAggregate() {
-      return aggregate.get();
-    }
-
-    @Override
-    @Nullable
-    public CounterMean<Boolean> getMean() {
-      throw illegalArgumentException();
-    }
-
-    @Override
-    public Counter<Boolean> merge(Counter<Boolean> that) {
-      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
-      return addValue(that.getAggregate());
-    }
-  }
-
-  /**
-   * Implements a {@link Counter} for {@link String} values.
-   */
-  private static class StringCounter extends Counter<String> {
-    /** Initializes a new {@link Counter} for {@link String} values. */
-    private StringCounter(String name, AggregationKind kind) {
-      super(name, kind);
-      // TODO: Support MIN, MAX of Strings.
-      throw illegalArgumentException();
-    }
-
-    @Override
-    public StringCounter addValue(String value) {
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public Counter<String> resetToValue(String value) {
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public Counter<String> resetMeanToValue(long elementCount, String value) {
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public String getAndResetDelta() {
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public CounterMean<String> getAndResetMeanDelta() {
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public String getAggregate() {
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    @Nullable
-    public CounterMean<String> getMean() {
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public Counter<String> merge(Counter<String> that) {
-      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
-      switch (kind) {
-        default:
-          throw illegalArgumentException();
-      }
-    }
-  }
-
-  /**
-   * Implements a {@link Counter} for {@link Integer} values.
-   */
-  private static class IntegerCounter extends Counter<Integer> {
-    private final AtomicInteger aggregate;
-    private final AtomicInteger deltaAggregate;
-    private final AtomicReference<IntegerCounterMean> mean;
-    private final AtomicReference<IntegerCounterMean> deltaMean;
-
-    /** Initializes a new {@link Counter} for {@link Integer} values. */
-    private IntegerCounter(String name, AggregationKind kind) {
-      super(name, kind);
-      switch (kind) {
-        case MEAN:
-          aggregate = deltaAggregate = null;
-          mean = new AtomicReference<>();
-          deltaMean = new AtomicReference<>();
-          getAndResetMeanDelta();
-          mean.set(deltaMean.get());
-          break;
-        case SUM:
-        case MAX:
-        case MIN:
-          mean = deltaMean = null;
-          aggregate = new AtomicInteger();
-          deltaAggregate = new AtomicInteger();
-          getAndResetDelta();
-          aggregate.set(deltaAggregate.get());
-          break;
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public IntegerCounter addValue(Integer value) {
-      switch (kind) {
-        case SUM:
-          aggregate.getAndAdd(value);
-          deltaAggregate.getAndAdd(value);
-          break;
-        case MEAN:
-          addToMeanAndSet(value, mean);
-          addToMeanAndSet(value, deltaMean);
-          break;
-        case MAX:
-          maxAndSet(value, aggregate);
-          maxAndSet(value, deltaAggregate);
-          break;
-        case MIN:
-          minAndSet(value, aggregate);
-          minAndSet(value, deltaAggregate);
-          break;
-        default:
-          throw illegalArgumentException();
-      }
-      return this;
-    }
-
-    private void addToMeanAndSet(int value, AtomicReference<IntegerCounterMean> target) {
-      IntegerCounterMean current;
-      IntegerCounterMean update;
-      do {
-        current = target.get();
-        update = new IntegerCounterMean(current.getAggregate() + value, current.getCount() + 1);
-      } while (!target.compareAndSet(current, update));
-    }
-
-    private void maxAndSet(int value, AtomicInteger target) {
-      int current;
-      int update;
-      do {
-        current = target.get();
-        update = Math.max(value, current);
-      } while (update > current && !target.compareAndSet(current, update));
-    }
-
-    private void minAndSet(int value, AtomicInteger target) {
-      int current;
-      int update;
-      do {
-        current = target.get();
-        update = Math.min(value, current);
-      } while (update < current && !target.compareAndSet(current, update));
-    }
-
-    @Override
-    public Integer getAndResetDelta() {
-      switch (kind) {
-        case SUM:
-          return deltaAggregate.getAndSet(0);
-        case MAX:
-          return deltaAggregate.getAndSet(Integer.MIN_VALUE);
-        case MIN:
-          return deltaAggregate.getAndSet(Integer.MAX_VALUE);
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    @Override
-    public Counter<Integer> resetToValue(Integer value) {
-      if (kind == MEAN) {
-        throw illegalArgumentException();
-      }
-      aggregate.set(value);
-      deltaAggregate.set(value);
-      return this;
-    }
-
-    @Override
-    public Counter<Integer> resetMeanToValue(long elementCount, Integer value) {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      if (elementCount < 0) {
-        throw new IllegalArgumentException("elementCount must be non-negative");
-      }
-      IntegerCounterMean counterMean = new IntegerCounterMean(value, elementCount);
-      mean.set(counterMean);
-      deltaMean.set(counterMean);
-      return this;
-    }
-
-    @Override
-    public CounterMean<Integer> getAndResetMeanDelta() {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      return deltaMean.getAndSet(new IntegerCounterMean(0, 0L));
-    }
-
-    @Override
-    public Integer getAggregate() {
-      if (kind != MEAN) {
-        return aggregate.get();
-      } else {
-        return getMean().getAggregate();
-      }
-    }
-
-    @Override
-    @Nullable
-    public CounterMean<Integer> getMean() {
-      if (kind != MEAN) {
-        throw illegalArgumentException();
-      }
-      return mean.get();
-    }
-
-    @Override
-    public Counter<Integer> merge(Counter<Integer> that) {
-      checkArgument(this.isCompatibleWith(that), "Counters %s and %s are incompatible", this, that);
-      switch (kind) {
-        case SUM:
-        case MIN:
-        case MAX:
-          return addValue(that.getAggregate());
-        case MEAN:
-          CounterMean<Integer> thisCounterMean = this.getMean();
-          CounterMean<Integer> thatCounterMean = that.getMean();
-          return resetMeanToValue(
-              thisCounterMean.getCount() + thatCounterMean.getCount(),
-              thisCounterMean.getAggregate() + thatCounterMean.getAggregate());
-        default:
-          throw illegalArgumentException();
-      }
-    }
-
-    private static class IntegerCounterMean implements CounterMean<Integer> {
-      private final int aggregate;
-      private final long count;
-
-      public IntegerCounterMean(int aggregate, long count) {
-        this.aggregate = aggregate;
-        this.count = count;
-      }
-
-      @Override
-      public Integer getAggregate() {
-        return aggregate;
-      }
-
-      @Override
-      public long getCount() {
-        return count;
-      }
-
-      @Override
-      public String toString() {
-        return aggregate + "/" + count;
-      }
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Constructs an {@link IllegalArgumentException} explaining that this
-   * {@link Counter}'s aggregation kind is not supported by its value type.
-   */
-  protected IllegalArgumentException illegalArgumentException() {
-    return new IllegalArgumentException("Cannot compute " + kind
-        + " aggregation over " + getType().getSimpleName() + " values.");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java
deleted file mode 100644
index ba53f80..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterProvider.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-/**
- * A counter provider can provide {@link Counter} instances.
- *
- * @param <T> the input type of the counter.
- */
-public interface CounterProvider<T> {
-  Counter<T> getCounter(String name);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
deleted file mode 100644
index 9e9638f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/CounterSet.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import java.util.AbstractSet;
-import java.util.HashMap;
-import java.util.Iterator;
-
-/**
- * A CounterSet maintains a set of {@link Counter}s.
- *
- * <p>Thread-safe.
- */
-public class CounterSet extends AbstractSet<Counter<?>> {
-
-  /** Registered counters. */
-  private final HashMap<String, Counter<?>> counters = new HashMap<>();
-
-  private final AddCounterMutator addCounterMutator = new AddCounterMutator();
-
-  /**
-   * Constructs a CounterSet containing the given Counters.
-   */
-  public CounterSet(Counter<?>... counters) {
-    for (Counter<?> counter : counters) {
-      addNewCounter(counter);
-    }
-  }
-
-  /**
-   * Returns an object that supports adding additional counters into
-   * this CounterSet.
-   */
-  public AddCounterMutator getAddCounterMutator() {
-    return addCounterMutator;
-  }
-
-  /**
-   * Adds a new counter, throwing an exception if a counter of the
-   * same name already exists.
-   */
-  public void addNewCounter(Counter<?> counter) {
-    if (!addCounter(counter)) {
-      throw new IllegalArgumentException(
-          "Counter " + counter + " duplicates an existing counter in " + this);
-    }
-  }
-
-  /**
-   * Adds the given Counter to this CounterSet.
-   *
-   * <p>If a counter with the same name already exists, it will be
-   * reused, as long as it is compatible.
-   *
-   * @return the Counter that was reused, or added
-   * @throws IllegalArgumentException if a counter with the same
-   * name but an incompatible kind had already been added
-   */
-  public synchronized <T> Counter<T> addOrReuseCounter(Counter<T> counter) {
-    Counter<?> oldCounter = counters.get(counter.getName());
-    if (oldCounter == null) {
-      // A new counter.
-      counters.put(counter.getName(), counter);
-      return counter;
-    }
-    if (counter.isCompatibleWith(oldCounter)) {
-      // Return the counter to reuse.
-      @SuppressWarnings("unchecked")
-      Counter<T> compatibleCounter = (Counter<T>) oldCounter;
-      return compatibleCounter;
-    }
-    throw new IllegalArgumentException(
-        "Counter " + counter + " duplicates incompatible counter "
-        + oldCounter + " in " + this);
-  }
-
-  /**
-   * Adds a counter. Returns {@code true} if the counter was added to the set
-   * and false if the given counter was {@code null} or it already existed in
-   * the set.
-   *
-   * @param counter to register
-   */
-  public boolean addCounter(Counter<?> counter) {
-    return add(counter);
-  }
-
-  /**
-   * Returns the Counter with the given name in this CounterSet;
-   * returns null if no such Counter exists.
-   */
-  public synchronized Counter<?> getExistingCounter(String name) {
-    return counters.get(name);
-  }
-
-  @Override
-  public synchronized Iterator<Counter<?>> iterator() {
-    return counters.values().iterator();
-  }
-
-  @Override
-  public synchronized int size() {
-    return counters.size();
-  }
-
-  @Override
-  public synchronized boolean add(Counter<?> e) {
-    if (null == e) {
-      return false;
-    }
-    if (counters.containsKey(e.getName())) {
-      return false;
-    }
-    counters.put(e.getName(), e);
-    return true;
-  }
-
-  public synchronized void merge(CounterSet that) {
-    for (Counter<?> theirCounter : that) {
-      Counter<?> myCounter = counters.get(theirCounter.getName());
-      if (myCounter != null) {
-        mergeCounters(myCounter, theirCounter);
-      } else {
-        addCounter(theirCounter);
-      }
-    }
-  }
-
-  private <T> void mergeCounters(Counter<T> mine, Counter<?> theirCounter) {
-    checkArgument(
-        mine.isCompatibleWith(theirCounter),
-        "Can't merge CounterSets containing incompatible counters with the same name: "
-            + "%s (existing) and %s (merged)",
-        mine,
-        theirCounter);
-    @SuppressWarnings("unchecked")
-    Counter<T> theirs = (Counter<T>) theirCounter;
-    mine.merge(theirs);
-  }
-
-  /**
-   * A nested class that supports adding additional counters into the
-   * enclosing CounterSet. This is useful as a mutator, hiding other
-   * public methods of the CounterSet.
-   */
-  public class AddCounterMutator {
-    /**
-     * Adds the given Counter into the enclosing CounterSet.
-     *
-     * <p>If a counter with the same name already exists, it will be
-     * reused, as long as it has the same type.
-     *
-     * @return the Counter that was reused, or added
-     * @throws IllegalArgumentException if a counter with the same
-     * name but an incompatible kind had already been added
-     */
-    public <T> Counter<T> addCounter(Counter<T> counter) {
-      return addOrReuseCounter(counter);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
deleted file mode 100644
index fee6737..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservable.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-/**
- * An interface for things that allow observing the size in bytes of
- * encoded values of type {@code T}.
- *
- * @param <T> the type of the values being observed
- */
-public interface ElementByteSizeObservable<T> {
-  /**
-   * Returns whether {@link #registerByteSizeObserver} is cheap enough
-   * to call for every element, that is, if this
-   * {@code ElementByteSizeObservable} can calculate the byte size of
-   * the element to be coded in roughly constant time (or lazily).
-   */
-  public boolean isRegisterByteSizeObserverCheap(T value);
-
-  /**
-   * Notifies the {@code ElementByteSizeObserver} about the byte size
-   * of the encoded value using this {@code ElementByteSizeObservable}.
-   */
-  public void registerByteSizeObserver(T value,
-                                       ElementByteSizeObserver observer)
-      throws Exception;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
deleted file mode 100644
index 591d2be..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterable.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Observer;
-
-/**
- * An abstract class used for iterables that notify observers about size in
- * bytes of their elements, as they are being iterated over.
- *
- * @param <V> the type of elements returned by this iterable
- * @param <InputT> type type of iterator returned by this iterable
- */
-public abstract class ElementByteSizeObservableIterable<
-    V, InputT extends ElementByteSizeObservableIterator<V>>
-    implements Iterable<V> {
-  private List<Observer> observers = new ArrayList<>();
-
-  /**
-   * Derived classes override this method to return an iterator for this
-   * iterable.
-   */
-  protected abstract InputT createIterator();
-
-  /**
-   * Sets the observer, which will observe the iterator returned in
-   * the next call to iterator() method. Future calls to iterator()
-   * won't be observed, unless an observer is set again.
-   */
-  public void addObserver(Observer observer) {
-    observers.add(observer);
-  }
-
-  /**
-   * Returns a new iterator for this iterable. If an observer was set in
-   * a previous call to setObserver(), it will observe the iterator returned.
-   */
-  @Override
-  public InputT iterator() {
-    InputT iterator = createIterator();
-    for (Observer observer : observers) {
-      iterator.addObserver(observer);
-    }
-    observers.clear();
-    return iterator;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
deleted file mode 100644
index c094900..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObservableIterator.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import java.util.Iterator;
-import java.util.Observable;
-
-/**
- * An abstract class used for iterators that notify observers about size in
- * bytes of their elements, as they are being iterated over. The subclasses
- * need to implement the standard Iterator interface and call method
- * notifyValueReturned() for each element read and/or iterated over.
- *
- * @param <V> value type
- */
-public abstract class ElementByteSizeObservableIterator<V>
-    extends Observable implements Iterator<V> {
-  protected final void notifyValueReturned(long byteSize) {
-    setChanged();
-    notifyObservers(byteSize);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
deleted file mode 100644
index 6c764d9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ElementByteSizeObserver.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import java.util.Observable;
-import java.util.Observer;
-
-/**
- * An observer that gets notified when additional bytes are read
- * and/or used. It adds all bytes into a local counter. When the
- * observer gets advanced via the next() call, it adds the total byte
- * count to the specified counter, and prepares for the next element.
- */
-public class ElementByteSizeObserver implements Observer {
-  private final Counter<Long> counter;
-  private boolean isLazy = false;
-  private long totalSize = 0;
-  private double scalingFactor = 1.0;
-
-  public ElementByteSizeObserver(Counter<Long> counter) {
-    this.counter = counter;
-  }
-
-  /**
-   * Sets byte counting for the current element as lazy. That is, the
-   * observer will get notified of the element's byte count only as
-   * element's pieces are being processed or iterated over.
-   */
-  public void setLazy() {
-    isLazy = true;
-  }
-
-  /**
-   * Returns whether byte counting for the current element is lazy, that is,
-   * whether the observer gets notified of the element's byte count only as
-   * element's pieces are being processed or iterated over.
-   */
-  public boolean getIsLazy() {
-    return isLazy;
-  }
-
-  /**
-   * Updates the observer with a context specified, but without an instance of
-   * the Observable.
-   */
-  public void update(Object obj) {
-    update(null, obj);
-  }
-
-  /**
-   * Sets a multiplier to use on observed sizes.
-   */
-  public void setScalingFactor(double scalingFactor) {
-    this.scalingFactor = scalingFactor;
-  }
-
-  @Override
-  public void update(Observable obs, Object obj) {
-    if (obj instanceof Long) {
-      totalSize += scalingFactor * (Long) obj;
-    } else if (obj instanceof Integer) {
-      totalSize += scalingFactor * (Integer) obj;
-    } else {
-      throw new AssertionError("unexpected parameter object");
-    }
-  }
-
-  /**
-   * Advances the observer to the next element. Adds the current total byte
-   * size to the counter, and prepares the observer for the next element.
-   */
-  public void advance() {
-    counter.addValue(totalSize);
-
-    totalSize = 0;
-    isLazy = false;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
deleted file mode 100644
index 0948747..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/PeekingReiterator.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import java.util.NoSuchElementException;
-
-/**
- * A {@link Reiterator} that supports one-element lookahead during iteration.
- *
- * @param <T> the type of elements returned by this iterator
- */
-public final class PeekingReiterator<T> implements Reiterator<T> {
-  private T nextElement;
-  private boolean nextElementComputed;
-  private final Reiterator<T> iterator;
-
-  public PeekingReiterator(Reiterator<T> iterator) {
-    this.iterator = checkNotNull(iterator);
-  }
-
-  PeekingReiterator(PeekingReiterator<T> it) {
-    this.iterator = checkNotNull(checkNotNull(it).iterator.copy());
-    this.nextElement = it.nextElement;
-    this.nextElementComputed = it.nextElementComputed;
-  }
-
-  @Override
-  public boolean hasNext() {
-    computeNext();
-    return nextElementComputed;
-  }
-
-  @Override
-  public T next() {
-    T result = peek();
-    nextElementComputed = false;
-    return result;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * <p>If {@link #peek} is called, {@code remove} is disallowed until
-   * {@link #next} has been subsequently called.
-   */
-  @Override
-  public void remove() {
-    checkState(!nextElementComputed,
-        "After peek(), remove() is disallowed until next() is called");
-    iterator.remove();
-  }
-
-  @Override
-  public PeekingReiterator<T> copy() {
-    return new PeekingReiterator<>(this);
-  }
-
-  /**
-   * Returns the element that would be returned by {@link #next}, without
-   * actually consuming the element.
-   * @throws NoSuchElementException if there is no next element
-   */
-  public T peek() {
-    computeNext();
-    if (!nextElementComputed) {
-      throw new NoSuchElementException();
-    }
-    return nextElement;
-  }
-
-  private void computeNext() {
-    if (nextElementComputed) {
-      return;
-    }
-    if (!iterator.hasNext()) {
-      return;
-    }
-    nextElement = iterator.next();
-    nextElementComputed = true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
deleted file mode 100644
index f87242f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/ReflectHelpers.java
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.common;
-
-import static java.util.Arrays.asList;
-
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Queues;
-
-import java.lang.reflect.GenericArrayType;
-import java.lang.reflect.Method;
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.Type;
-import java.lang.reflect.TypeVariable;
-import java.lang.reflect.WildcardType;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.LinkedHashSet;
-import java.util.Queue;
-
-import javax.annotation.Nullable;
-
-/**
- * Utilities for working with with {@link Class Classes} and {@link Method Methods}.
- */
-public class ReflectHelpers {
-
-  private static final Joiner COMMA_SEPARATOR = Joiner.on(", ");
-
-  /** A {@link Function} that turns a method into a simple method signature. */
-  public static final Function<Method, String> METHOD_FORMATTER = new Function<Method, String>() {
-    @Override
-    public String apply(Method input) {
-      String parameterTypes = FluentIterable.from(asList(input.getParameterTypes()))
-          .transform(CLASS_SIMPLE_NAME)
-          .join(COMMA_SEPARATOR);
-      return String.format("%s(%s)",
-          input.getName(),
-          parameterTypes);
-    }
-  };
-
-  /** A {@link Function} that turns a method into the declaring class + method signature. */
-  public static final Function<Method, String> CLASS_AND_METHOD_FORMATTER =
-      new Function<Method, String>() {
-    @Override
-    public String apply(Method input) {
-      return String.format("%s#%s",
-          CLASS_NAME.apply(input.getDeclaringClass()),
-          METHOD_FORMATTER.apply(input));
-    }
-  };
-
-  /** A {@link Function} with returns the classes name. */
-  public static final Function<Class<?>, String> CLASS_NAME =
-      new Function<Class<?>, String>() {
-    @Override
-    public String apply(Class<?> input) {
-      return input.getName();
-    }
-  };
-
-  /** A {@link Function} with returns the classes name. */
-  public static final Function<Class<?>, String> CLASS_SIMPLE_NAME =
-      new Function<Class<?>, String>() {
-    @Override
-    public String apply(Class<?> input) {
-      return input.getSimpleName();
-    }
-  };
-
-  /** A {@link Function} that formats types. */
-  public static final Function<Type, String> TYPE_SIMPLE_DESCRIPTION =
-      new Function<Type, String>() {
-    @Override
-    @Nullable
-    public String apply(@Nullable Type input) {
-      StringBuilder builder = new StringBuilder();
-      format(builder, input);
-      return builder.toString();
-    }
-
-    private void format(StringBuilder builder, Type t) {
-      if (t instanceof Class) {
-        formatClass(builder, (Class<?>) t);
-      } else if (t instanceof TypeVariable) {
-        formatTypeVariable(builder, (TypeVariable<?>) t);
-      } else if (t instanceof WildcardType) {
-        formatWildcardType(builder, (WildcardType) t);
-      } else if (t instanceof ParameterizedType) {
-        formatParameterizedType(builder, (ParameterizedType) t);
-      } else if (t instanceof GenericArrayType) {
-        formatGenericArrayType(builder, (GenericArrayType) t);
-      } else {
-        builder.append(t.toString());
-      }
-    }
-
-    private void formatClass(StringBuilder builder, Class<?> clazz) {
-      builder.append(clazz.getSimpleName());
-    }
-
-    private void formatTypeVariable(StringBuilder builder, TypeVariable<?> t) {
-      builder.append(t.getName());
-    }
-
-    private void formatWildcardType(StringBuilder builder, WildcardType t) {
-      builder.append("?");
-      for (Type lowerBound : t.getLowerBounds()) {
-        builder.append(" super ");
-        format(builder, lowerBound);
-      }
-      for (Type upperBound : t.getUpperBounds()) {
-        if (!Object.class.equals(upperBound)) {
-          builder.append(" extends ");
-          format(builder, upperBound);
-        }
-      }
-    }
-
-    private void formatParameterizedType(StringBuilder builder, ParameterizedType t) {
-      format(builder, t.getRawType());
-      builder.append('<');
-      COMMA_SEPARATOR.appendTo(builder,
-          FluentIterable.from(asList(t.getActualTypeArguments()))
-          .transform(TYPE_SIMPLE_DESCRIPTION));
-      builder.append('>');
-    }
-
-    private void formatGenericArrayType(StringBuilder builder, GenericArrayType t) {
-      format(builder, t.getGenericComponentType());
-      builder.append("[]");
-    }
-  };
-
-  /**
-   * Returns all interfaces of the given clazz.
-   * @param clazz
-   * @return
-   */
-  public static FluentIterable<Class<?>> getClosureOfInterfaces(Class<?> clazz) {
-    Preconditions.checkNotNull(clazz);
-    Queue<Class<?>> interfacesToProcess = Queues.newArrayDeque();
-    Collections.addAll(interfacesToProcess, clazz.getInterfaces());
-
-    LinkedHashSet<Class<?>> interfaces = new LinkedHashSet<>();
-    while (!interfacesToProcess.isEmpty()) {
-      Class<?> current = interfacesToProcess.remove();
-      if (interfaces.add(current)) {
-        Collections.addAll(interfacesToProcess, current.getInterfaces());
-      }
-    }
-    return FluentIterable.from(interfaces);
-  }
-
-  /**
-   * Returns all the methods visible from the provided interfaces.
-   *
-   * @param interfaces The interfaces to use when searching for all their methods.
-   * @return An iterable of {@link Method}s which interfaces expose.
-   */
-  public static Iterable<Method> getClosureOfMethodsOnInterfaces(
-      Iterable<? extends Class<?>> interfaces) {
-    return FluentIterable.from(interfaces).transformAndConcat(
-        new Function<Class<?>, Iterable<Method>>() {
-          @Override
-          public Iterable<Method> apply(Class<?> input) {
-            return getClosureOfMethodsOnInterface(input);
-          }
-    });
-  }
-
-  /**
-   * Returns all the methods visible from {@code iface}.
-   *
-   * @param iface The interface to use when searching for all its methods.
-   * @return An iterable of {@link Method}s which {@code iface} exposes.
-   */
-  public static Iterable<Method> getClosureOfMethodsOnInterface(Class<?> iface) {
-    Preconditions.checkNotNull(iface);
-    Preconditions.checkArgument(iface.isInterface());
-    ImmutableSet.Builder<Method> builder = ImmutableSet.builder();
-    Queue<Class<?>> interfacesToProcess = Queues.newArrayDeque();
-    interfacesToProcess.add(iface);
-    while (!interfacesToProcess.isEmpty()) {
-      Class<?> current = interfacesToProcess.remove();
-      builder.add(current.getMethods());
-      interfacesToProcess.addAll(Arrays.asList(current.getInterfaces()));
-    }
-    return builder.build();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
deleted file mode 100644
index 01c5775..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterable.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-/**
- * An {@link Iterable} that returns {@link Reiterator} iterators.
- *
- * @param <T> the type of elements returned by the iterator
- */
-public interface Reiterable<T> extends Iterable<T> {
-  @Override
-  public Reiterator<T> iterator();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
deleted file mode 100644
index dd8036d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/Reiterator.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common;
-
-import java.util.Iterator;
-
-/**
- * An {@link Iterator} with the ability to copy its iteration state.
- *
- * @param <T> the type of elements returned by this iterator
- */
-public interface Reiterator<T> extends Iterator<T> {
-  /**
-   * Returns a copy of the current {@link Reiterator}.  The copy's iteration
-   * state is logically independent of the current iterator; each may be
-   * advanced without affecting the other.
-   *
-   * <p>The returned {@code Reiterator} is not guaranteed to return
-   * referentially identical iteration results as the original
-   * {@link Reiterator}, although {@link Object#equals} will typically return
-   * true for the corresponding elements of each if the original source is
-   * logically immutable.
-   */
-  public Reiterator<T> copy();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
deleted file mode 100644
index 7fb16c5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/** Defines utilities shared by multiple PipelineRunner implementations. **/
-package com.google.cloud.dataflow.sdk.util.common;

[32/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
deleted file mode 100644
index 8b066ab..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/PubsubIOTranslator.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-
-/**
- * Pubsub transform support code for the Dataflow backend.
- */
-public class PubsubIOTranslator {
-
-  /**
-   * Implements PubsubIO Read translation for the Dataflow backend.
-   */
-  public static class ReadTranslator<T> implements TransformTranslator<PubsubIO.Read.Bound<T>> {
-    @Override
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    public void translate(
-        PubsubIO.Read.Bound transform,
-        TranslationContext context) {
-      translateReadHelper(transform, context);
-    }
-
-    private <T> void translateReadHelper(
-        PubsubIO.Read.Bound<T> transform,
-        TranslationContext context) {
-      if (!context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException(
-            "PubsubIO.Read can only be used with the Dataflow streaming runner.");
-      }
-
-      context.addStep(transform, "ParallelRead");
-      context.addInput(PropertyNames.FORMAT, "pubsub");
-      if (transform.getTopic() != null) {
-        context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic().asV1Beta1Path());
-      }
-      if (transform.getSubscription() != null) {
-        context.addInput(
-            PropertyNames.PUBSUB_SUBSCRIPTION, transform.getSubscription().asV1Beta1Path());
-      }
-      if (transform.getTimestampLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
-      }
-      if (transform.getIdLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
-      }
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-    }
-  }
-
-  /**
-   * Implements PubsubIO Write translation for the Dataflow backend.
-   */
-  public static class WriteTranslator<T>
-      implements TransformTranslator<DataflowPipelineRunner.StreamingPubsubIOWrite<T>> {
-
-    @Override
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    public void translate(
-        DataflowPipelineRunner.StreamingPubsubIOWrite transform,
-        TranslationContext context) {
-      translateWriteHelper(transform, context);
-    }
-
-    private <T> void translateWriteHelper(
-        DataflowPipelineRunner.StreamingPubsubIOWrite<T> customTransform,
-        TranslationContext context) {
-      if (!context.getPipelineOptions().isStreaming()) {
-        throw new IllegalArgumentException(
-            "PubsubIO.Write is non-primitive for the Dataflow batch runner.");
-      }
-
-      PubsubIO.Write.Bound<T> transform = customTransform.getOverriddenTransform();
-
-      context.addStep(customTransform, "ParallelWrite");
-      context.addInput(PropertyNames.FORMAT, "pubsub");
-      context.addInput(PropertyNames.PUBSUB_TOPIC, transform.getTopic().asV1Beta1Path());
-      if (transform.getTimestampLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_TIMESTAMP_LABEL, transform.getTimestampLabel());
-      }
-      if (transform.getIdLabel() != null) {
-        context.addInput(PropertyNames.PUBSUB_ID_LABEL, transform.getIdLabel());
-      }
-      context.addEncodingInput(WindowedValue.getValueOnlyCoder(transform.getCoder()));
-      context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(customTransform));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
deleted file mode 100644
index f110e84..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/ReadTranslator.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners.dataflow;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-
-import com.google.api.services.dataflow.model.SourceMetadata;
-import com.google.cloud.dataflow.sdk.io.FileBasedSource;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.Source;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.values.PValue;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Translator for the {@code Read} {@code PTransform} for the Dataflow back-end.
- */
-public class ReadTranslator implements TransformTranslator<Read.Bounded<?>> {
-  @Override
-  public void translate(Read.Bounded<?> transform, TranslationContext context) {
-    translateReadHelper(transform.getSource(), transform, context);
-  }
-
-  public static <T> void translateReadHelper(Source<T> source,
-      PTransform<?, ? extends PValue> transform,
-      DataflowPipelineTranslator.TranslationContext context) {
-    try {
-      // TODO: Move this validation out of translation once IOChannelUtils is portable
-      // and can be reconstructed on the worker.
-      if (source instanceof FileBasedSource) {
-        String filePatternOrSpec = ((FileBasedSource<?>) source).getFileOrPatternSpec();
-        context.getPipelineOptions()
-               .getPathValidator()
-               .validateInputFilePatternSupported(filePatternOrSpec);
-      }
-
-      context.addStep(transform, "ParallelRead");
-      context.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT);
-      context.addInput(
-          PropertyNames.SOURCE_STEP_INPUT,
-          cloudSourceToDictionary(
-              CustomSources.serializeToCloudSource(source, context.getPipelineOptions())));
-      context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  // Represents a cloud Source as a dictionary for encoding inside the {@code SOURCE_STEP_INPUT}
-  // property of CloudWorkflowStep.input.
-  private static Map<String, Object> cloudSourceToDictionary(
-      com.google.api.services.dataflow.model.Source source) {
-    // Do not translate encoding - the source's encoding is translated elsewhere
-    // to the step's output info.
-    Map<String, Object> res = new HashMap<>();
-    addDictionary(res, PropertyNames.SOURCE_SPEC, source.getSpec());
-    if (source.getMetadata() != null) {
-      addDictionary(res, PropertyNames.SOURCE_METADATA,
-          cloudSourceMetadataToDictionary(source.getMetadata()));
-    }
-    if (source.getDoesNotNeedSplitting() != null) {
-      addBoolean(
-          res, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, source.getDoesNotNeedSplitting());
-    }
-    return res;
-  }
-
-  private static Map<String, Object> cloudSourceMetadataToDictionary(SourceMetadata metadata) {
-    Map<String, Object> res = new HashMap<>();
-    if (metadata.getProducesSortedKeys() != null) {
-      addBoolean(res, PropertyNames.SOURCE_PRODUCES_SORTED_KEYS, metadata.getProducesSortedKeys());
-    }
-    if (metadata.getEstimatedSizeBytes() != null) {
-      addLong(res, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, metadata.getEstimatedSizeBytes());
-    }
-    if (metadata.getInfinite() != null) {
-      addBoolean(res, PropertyNames.SOURCE_IS_INFINITE, metadata.getInfinite());
-    }
-    return res;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
deleted file mode 100644
index b6b2ce6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/dataflow/package-info.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Implementation of the {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}.
- */
-package com.google.cloud.dataflow.sdk.runners.dataflow;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
deleted file mode 100644
index eaea3ed..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/BoundedReadEvaluatorFactory.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader;
-import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.io.Source.Reader;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.io.IOException;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link TransformEvaluatorFactory} that produces {@link TransformEvaluator TransformEvaluators}
- * for the {@link Bounded Read.Bounded} primitive {@link PTransform}.
- */
-final class BoundedReadEvaluatorFactory implements TransformEvaluatorFactory {
-  /*
-   * An evaluator for a Source is stateful, to ensure data is not read multiple times.
-   * Evaluators are cached here to ensure that the reader is not restarted if the evaluator is
-   * retriggered.
-   */
-  private final ConcurrentMap<EvaluatorKey, Queue<? extends BoundedReadEvaluator<?>>>
-      sourceEvaluators = new ConcurrentHashMap<>();
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  @Override
-  public <InputT> TransformEvaluator<InputT> forApplication(
-      AppliedPTransform<?, ?, ?> application,
-      @Nullable CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext)
-      throws IOException {
-    return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
-  }
-
-  private <OutputT> TransformEvaluator<?> getTransformEvaluator(
-      final AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform,
-      final InProcessEvaluationContext evaluationContext)
-      throws IOException {
-    BoundedReadEvaluator<?> evaluator =
-        getTransformEvaluatorQueue(transform, evaluationContext).poll();
-    if (evaluator == null) {
-      return EmptyTransformEvaluator.create(transform);
-    }
-    return evaluator;
-  }
-
-  /**
-   * Get the queue of {@link TransformEvaluator TransformEvaluators} that produce elements for the
-   * provided application of {@link Bounded Read.Bounded}, initializing it if required.
-   *
-   * <p>This method is thread-safe, and will only produce new evaluators if no other invocation has
-   * already done so.
-   */
-  @SuppressWarnings("unchecked")
-  private <OutputT> Queue<BoundedReadEvaluator<OutputT>> getTransformEvaluatorQueue(
-      final AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform,
-      final InProcessEvaluationContext evaluationContext) {
-    // Key by the application and the context the evaluation is occurring in (which call to
-    // Pipeline#run).
-    EvaluatorKey key = new EvaluatorKey(transform, evaluationContext);
-    Queue<BoundedReadEvaluator<OutputT>> evaluatorQueue =
-        (Queue<BoundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
-    if (evaluatorQueue == null) {
-      evaluatorQueue = new ConcurrentLinkedQueue<>();
-      if (sourceEvaluators.putIfAbsent(key, evaluatorQueue) == null) {
-        // If no queue existed in the evaluators, add an evaluator to initialize the evaluator
-        // factory for this transform
-        BoundedReadEvaluator<OutputT> evaluator =
-            new BoundedReadEvaluator<OutputT>(transform, evaluationContext);
-        evaluatorQueue.offer(evaluator);
-      } else {
-        // otherwise return the existing Queue that arrived before us
-        evaluatorQueue = (Queue<BoundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
-      }
-    }
-    return evaluatorQueue;
-  }
-
-  /**
-   * A {@link BoundedReadEvaluator} produces elements from an underlying {@link BoundedSource},
-   * discarding all input elements. Within the call to {@link #finishBundle()}, the evaluator
-   * creates the {@link BoundedReader} and consumes all available input.
-   *
-   * <p>A {@link BoundedReadEvaluator} should only be created once per {@link BoundedSource}, and
-   * each evaluator should only be called once per evaluation of the pipeline. Otherwise, the source
-   * may produce duplicate elements.
-   */
-  private static class BoundedReadEvaluator<OutputT> implements TransformEvaluator<Object> {
-    private final AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform;
-    private final InProcessEvaluationContext evaluationContext;
-    private boolean contentsRemaining;
-
-    public BoundedReadEvaluator(
-        AppliedPTransform<?, PCollection<OutputT>, Bounded<OutputT>> transform,
-        InProcessEvaluationContext evaluationContext) {
-      this.transform = transform;
-      this.evaluationContext = evaluationContext;
-    }
-
-    @Override
-    public void processElement(WindowedValue<Object> element) {}
-
-    @Override
-    public InProcessTransformResult finishBundle() throws IOException {
-      try (final Reader<OutputT> reader =
-              transform
-                  .getTransform()
-                  .getSource()
-                  .createReader(evaluationContext.getPipelineOptions());) {
-        contentsRemaining = reader.start();
-        UncommittedBundle<OutputT> output =
-            evaluationContext.createRootBundle(transform.getOutput());
-        while (contentsRemaining) {
-          output.add(
-              WindowedValue.timestampedValueInGlobalWindow(
-                  reader.getCurrent(), reader.getCurrentTimestamp()));
-          contentsRemaining = reader.advance();
-        }
-        reader.close();
-        return StepTransformResult.withHold(transform, BoundedWindow.TIMESTAMP_MAX_VALUE)
-            .addOutput(output)
-            .build();
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java
deleted file mode 100644
index 3350d2b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CachedThreadPoolExecutorServiceFactory.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-/**
- * A {@link ExecutorServiceFactory} that produces cached thread pools via
- * {@link Executors#newCachedThreadPool()}.
- */
-class CachedThreadPoolExecutorServiceFactory
-    implements DefaultValueFactory<ExecutorServiceFactory>, ExecutorServiceFactory {
-  private static final CachedThreadPoolExecutorServiceFactory INSTANCE =
-      new CachedThreadPoolExecutorServiceFactory();
-
-  @Override
-  public ExecutorServiceFactory create(PipelineOptions options) {
-    return INSTANCE;
-  }
-
-  @Override
-  public ExecutorService create() {
-    return Executors.newCachedThreadPool();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
deleted file mode 100644
index 11e6ec1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/Clock.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import org.joda.time.Instant;
-
-/**
- * Access to the current time.
- */
-public interface Clock {
-  /**
-   * Returns the current time as an {@link Instant}.
-   */
-  Instant now();
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java
deleted file mode 100644
index 2792631..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/CompletionCallback.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-
-/**
- * A callback for completing a bundle of input.
- */
-interface CompletionCallback {
-  /**
-   * Handle a successful result.
-   */
-  void handleResult(CommittedBundle<?> inputBundle, InProcessTransformResult result);
-
-  /**
-   * Handle a result that terminated abnormally due to the provided {@link Throwable}.
-   */
-  void handleThrowable(CommittedBundle<?> inputBundle, Throwable t);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ConsumerTrackingPipelineVisitor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ConsumerTrackingPipelineVisitor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ConsumerTrackingPipelineVisitor.java
deleted file mode 100644
index c602b23..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ConsumerTrackingPipelineVisitor.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.TransformTreeNode;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Tracks the {@link AppliedPTransform AppliedPTransforms} that consume each {@link PValue} in the
- * {@link Pipeline}. This is used to schedule consuming {@link PTransform PTransforms} to consume
- * input after the upstream transform has produced and committed output.
- */
-public class ConsumerTrackingPipelineVisitor implements PipelineVisitor {
-  private Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> valueToConsumers = new HashMap<>();
-  private Collection<AppliedPTransform<?, ?, ?>> rootTransforms = new ArrayList<>();
-  private Collection<PCollectionView<?>> views = new ArrayList<>();
-  private Map<AppliedPTransform<?, ?, ?>, String> stepNames = new HashMap<>();
-  private Set<PValue> toFinalize = new HashSet<>();
-  private int numTransforms = 0;
-  private boolean finalized = false;
-
-  @Override
-  public void enterCompositeTransform(TransformTreeNode node) {
-    checkState(
-        !finalized,
-        "Attempting to traverse a pipeline (node %s) with a %s "
-            + "which has already visited a Pipeline and is finalized",
-        node.getFullName(),
-        ConsumerTrackingPipelineVisitor.class.getSimpleName());
-  }
-
-  @Override
-  public void leaveCompositeTransform(TransformTreeNode node) {
-    checkState(
-        !finalized,
-        "Attempting to traverse a pipeline (node %s) with a %s which is already finalized",
-        node.getFullName(),
-        ConsumerTrackingPipelineVisitor.class.getSimpleName());
-    if (node.isRootNode()) {
-      finalized = true;
-    }
-  }
-
-  @Override
-  public void visitTransform(TransformTreeNode node) {
-    toFinalize.removeAll(node.getInput().expand());
-    AppliedPTransform<?, ?, ?> appliedTransform = getAppliedTransform(node);
-    if (node.getInput().expand().isEmpty()) {
-      rootTransforms.add(appliedTransform);
-    } else {
-      for (PValue value : node.getInput().expand()) {
-        valueToConsumers.get(value).add(appliedTransform);
-        stepNames.put(appliedTransform, genStepName());
-      }
-    }
-  }
-
-  private AppliedPTransform<?, ?, ?> getAppliedTransform(TransformTreeNode node) {
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    AppliedPTransform<?, ?, ?> application = AppliedPTransform.of(
-        node.getFullName(), node.getInput(), node.getOutput(), (PTransform) node.getTransform());
-    return application;
-  }
-
-  @Override
-  public void visitValue(PValue value, TransformTreeNode producer) {
-    toFinalize.add(value);
-    for (PValue expandedValue : value.expand()) {
-      valueToConsumers.put(expandedValue, new ArrayList<AppliedPTransform<?, ?, ?>>());
-      if (expandedValue instanceof PCollectionView) {
-        views.add((PCollectionView<?>) expandedValue);
-      }
-      expandedValue.recordAsOutput(getAppliedTransform(producer));
-    }
-    value.recordAsOutput(getAppliedTransform(producer));
-  }
-
-  private String genStepName() {
-    return String.format("s%s", numTransforms++);
-  }
-
-
-  /**
-   * Returns a mapping of each fully-expanded {@link PValue} to each
-   * {@link AppliedPTransform} that consumes it. For each AppliedPTransform in the collection
-   * returned from {@code getValueToCustomers().get(PValue)},
-   * {@code AppliedPTransform#getInput().expand()} will contain the argument {@link PValue}.
-   */
-  public Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> getValueToConsumers() {
-    checkState(
-        finalized,
-        "Can't call getValueToConsumers before the Pipeline has been completely traversed");
-
-    return valueToConsumers;
-  }
-
-  /**
-   * Returns the mapping for each {@link AppliedPTransform} in the {@link Pipeline} to a unique step
-   * name.
-   */
-  public Map<AppliedPTransform<?, ?, ?>, String> getStepNames() {
-    checkState(
-        finalized, "Can't call getStepNames before the Pipeline has been completely traversed");
-
-    return stepNames;
-  }
-
-  /**
-   * Returns the root transforms of the {@link Pipeline}. A root {@link AppliedPTransform} consumes
-   * a {@link PInput} where the {@link PInput#expand()} returns an empty collection.
-   */
-  public Collection<AppliedPTransform<?, ?, ?>> getRootTransforms() {
-    checkState(
-        finalized,
-        "Can't call getRootTransforms before the Pipeline has been completely traversed");
-
-    return rootTransforms;
-  }
-
-  /**
-   * Returns all of the {@link PCollectionView PCollectionViews} contained in the visited
-   * {@link Pipeline}.
-   */
-  public Collection<PCollectionView<?>> getViews() {
-    checkState(finalized, "Can't call getViews before the Pipeline has been completely traversed");
-
-    return views;
-  }
-
-  /**
-   * Returns all of the {@link PValue PValues} that have been produced but not consumed. These
-   * {@link PValue PValues} should be finalized by the {@link PipelineRunner} before the
-   * {@link Pipeline} is executed.
-   */
-  public Set<PValue> getUnfinalizedPValues() {
-    checkState(
-        finalized,
-        "Can't call getUnfinalizedPValues before the Pipeline has been completely traversed");
-
-    return toFinalize;
-  }
-}
-
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java
deleted file mode 100644
index fc09237..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EmptyTransformEvaluator.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-
-/**
- * A {@link TransformEvaluator} that ignores all input and produces no output. The result of
- * invoking {@link #finishBundle()} on this evaluator is to return an
- * {@link InProcessTransformResult} with no elements and a timestamp hold equal to
- * {@link BoundedWindow#TIMESTAMP_MIN_VALUE}. Because the result contains no elements, this hold
- * will not affect the watermark.
- */
-final class EmptyTransformEvaluator<T> implements TransformEvaluator<T> {
-  public static <T> TransformEvaluator<T> create(AppliedPTransform<?, ?, ?> transform) {
-    return new EmptyTransformEvaluator<T>(transform);
-  }
-
-  private final AppliedPTransform<?, ?, ?> transform;
-
-  private EmptyTransformEvaluator(AppliedPTransform<?, ?, ?> transform) {
-    this.transform = transform;
-  }
-
-  @Override
-  public void processElement(WindowedValue<T> element) throws Exception {}
-
-  @Override
-  public InProcessTransformResult finishBundle() throws Exception {
-    return StepTransformResult.withHold(transform, BoundedWindow.TIMESTAMP_MIN_VALUE)
-        .build();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EvaluatorKey.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EvaluatorKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EvaluatorKey.java
deleted file mode 100644
index 307bc5c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/EvaluatorKey.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-
-import java.util.Objects;
-
-/**
- * A (Transform, Pipeline Execution) key for stateful evaluators.
- *
- * Source evaluators are stateful to ensure data is not read multiple times. Evaluators are cached
- * to ensure that the reader is not restarted if the evaluator is retriggered. An
- * {@link EvaluatorKey} is used to ensure that multiple Pipelines can be executed without sharing
- * the same evaluators.
- */
-final class EvaluatorKey {
-  private final AppliedPTransform<?, ?, ?> transform;
-  private final InProcessEvaluationContext context;
-
-  public EvaluatorKey(AppliedPTransform<?, ?, ?> transform, InProcessEvaluationContext context) {
-    this.transform = transform;
-    this.context = context;
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(transform, context);
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == null || !(other instanceof EvaluatorKey)) {
-      return false;
-    }
-    EvaluatorKey that = (EvaluatorKey) other;
-    return Objects.equals(this.transform, that.transform)
-        && Objects.equals(this.context, that.context);
-  }
-}
-
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceFactory.java
deleted file mode 100644
index 480bcde..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceFactory.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import java.util.concurrent.ExecutorService;
-
-/**
- * A factory that creates {@link ExecutorService ExecutorServices}.
- * {@link ExecutorService ExecutorServices} created by this factory should be independent of one
- * another (e.g., if any executor is shut down the remaining executors should continue to process
- * work).
- */
-public interface ExecutorServiceFactory {
-  /**
-   * Create a new {@link ExecutorService}.
-   */
-  ExecutorService create();
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
deleted file mode 100644
index 68a1b8c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ExecutorServiceParallelExecutor.java
+++ /dev/null
@@ -1,432 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.FiredTimers;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItems;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Optional;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ExecutorService;
-
-import javax.annotation.Nullable;
-
-/**
- * An {@link InProcessExecutor} that uses an underlying {@link ExecutorService} and
- * {@link InProcessEvaluationContext} to execute a {@link Pipeline}.
- */
-final class ExecutorServiceParallelExecutor implements InProcessExecutor {
-  private static final Logger LOG = LoggerFactory.getLogger(ExecutorServiceParallelExecutor.class);
-
-  private final ExecutorService executorService;
-
-  private final Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> valueToConsumers;
-  private final Set<PValue> keyedPValues;
-  private final TransformEvaluatorRegistry registry;
-  private final InProcessEvaluationContext evaluationContext;
-
-  private final ConcurrentMap<StepAndKey, TransformExecutorService> currentEvaluations;
-  private final ConcurrentMap<TransformExecutor<?>, Boolean> scheduledExecutors;
-
-  private final Queue<ExecutorUpdate> allUpdates;
-  private final BlockingQueue<VisibleExecutorUpdate> visibleUpdates;
-
-  private final TransformExecutorService parallelExecutorService;
-  private final CompletionCallback defaultCompletionCallback;
-
-  private Collection<AppliedPTransform<?, ?, ?>> rootNodes;
-
-  public static ExecutorServiceParallelExecutor create(
-      ExecutorService executorService,
-      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> valueToConsumers,
-      Set<PValue> keyedPValues,
-      TransformEvaluatorRegistry registry,
-      InProcessEvaluationContext context) {
-    return new ExecutorServiceParallelExecutor(
-        executorService, valueToConsumers, keyedPValues, registry, context);
-  }
-
-  private ExecutorServiceParallelExecutor(
-      ExecutorService executorService,
-      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> valueToConsumers,
-      Set<PValue> keyedPValues,
-      TransformEvaluatorRegistry registry,
-      InProcessEvaluationContext context) {
-    this.executorService = executorService;
-    this.valueToConsumers = valueToConsumers;
-    this.keyedPValues = keyedPValues;
-    this.registry = registry;
-    this.evaluationContext = context;
-
-    currentEvaluations = new ConcurrentHashMap<>();
-    scheduledExecutors = new ConcurrentHashMap<>();
-
-    this.allUpdates = new ConcurrentLinkedQueue<>();
-    this.visibleUpdates = new ArrayBlockingQueue<>(20);
-
-    parallelExecutorService =
-        TransformExecutorServices.parallel(executorService, scheduledExecutors);
-    defaultCompletionCallback = new DefaultCompletionCallback();
-  }
-
-  @Override
-  public void start(Collection<AppliedPTransform<?, ?, ?>> roots) {
-    rootNodes = ImmutableList.copyOf(roots);
-    Runnable monitorRunnable = new MonitorRunnable();
-    executorService.submit(monitorRunnable);
-  }
-
-  @SuppressWarnings("unchecked")
-  public void scheduleConsumption(
-      AppliedPTransform<?, ?, ?> consumer,
-      @Nullable CommittedBundle<?> bundle,
-      CompletionCallback onComplete) {
-    evaluateBundle(consumer, bundle, onComplete);
-  }
-
-  private <T> void evaluateBundle(
-      final AppliedPTransform<?, ?, ?> transform,
-      @Nullable final CommittedBundle<T> bundle,
-      final CompletionCallback onComplete) {
-    TransformExecutorService transformExecutor;
-    if (bundle != null && isKeyed(bundle.getPCollection())) {
-      final StepAndKey stepAndKey =
-          StepAndKey.of(transform, bundle == null ? null : bundle.getKey());
-      transformExecutor = getSerialExecutorService(stepAndKey);
-    } else {
-      transformExecutor = parallelExecutorService;
-    }
-    TransformExecutor<T> callable =
-        TransformExecutor.create(
-            registry, evaluationContext, bundle, transform, onComplete, transformExecutor);
-    transformExecutor.schedule(callable);
-  }
-
-  private boolean isKeyed(PValue pvalue) {
-    return keyedPValues.contains(pvalue);
-  }
-
-  private void scheduleConsumers(CommittedBundle<?> bundle) {
-    for (AppliedPTransform<?, ?, ?> consumer : valueToConsumers.get(bundle.getPCollection())) {
-      scheduleConsumption(consumer, bundle, defaultCompletionCallback);
-    }
-  }
-
-  private TransformExecutorService getSerialExecutorService(StepAndKey stepAndKey) {
-    if (!currentEvaluations.containsKey(stepAndKey)) {
-      currentEvaluations.putIfAbsent(
-          stepAndKey, TransformExecutorServices.serial(executorService, scheduledExecutors));
-    }
-    return currentEvaluations.get(stepAndKey);
-  }
-
-  @Override
-  public void awaitCompletion() throws Throwable {
-    VisibleExecutorUpdate update;
-    do {
-      update = visibleUpdates.take();
-      if (update.throwable.isPresent()) {
-        throw update.throwable.get();
-      }
-    } while (!update.isDone());
-    executorService.shutdown();
-  }
-
-  /**
-   * The default {@link CompletionCallback}. The default completion callback is used to complete
-   * transform evaluations that are triggered due to the arrival of elements from an upstream
-   * transform, or for a source transform.
-   */
-  private class DefaultCompletionCallback implements CompletionCallback {
-    @Override
-    public void handleResult(CommittedBundle<?> inputBundle, InProcessTransformResult result) {
-      Iterable<? extends CommittedBundle<?>> resultBundles =
-          evaluationContext.handleResult(inputBundle, Collections.<TimerData>emptyList(), result);
-      for (CommittedBundle<?> outputBundle : resultBundles) {
-        allUpdates.offer(ExecutorUpdate.fromBundle(outputBundle));
-      }
-    }
-
-    @Override
-    public void handleThrowable(CommittedBundle<?> inputBundle, Throwable t) {
-      allUpdates.offer(ExecutorUpdate.fromThrowable(t));
-    }
-  }
-
-  /**
-   * A {@link CompletionCallback} where the completed bundle was produced to deliver some collection
-   * of {@link TimerData timers}. When the evaluator completes successfully, reports all of the
-   * timers used to create the input to the {@link InProcessEvaluationContext evaluation context}
-   * as part of the result.
-   */
-  private class TimerCompletionCallback implements CompletionCallback {
-    private final Iterable<TimerData> timers;
-
-    private TimerCompletionCallback(Iterable<TimerData> timers) {
-      this.timers = timers;
-    }
-
-    @Override
-    public void handleResult(CommittedBundle<?> inputBundle, InProcessTransformResult result) {
-      Iterable<? extends CommittedBundle<?>> resultBundles =
-          evaluationContext.handleResult(inputBundle, timers, result);
-      for (CommittedBundle<?> outputBundle : resultBundles) {
-        allUpdates.offer(ExecutorUpdate.fromBundle(outputBundle));
-      }
-    }
-
-    @Override
-    public void handleThrowable(CommittedBundle<?> inputBundle, Throwable t) {
-      allUpdates.offer(ExecutorUpdate.fromThrowable(t));
-    }
-  }
-
-  /**
-   * An internal status update on the state of the executor.
-   *
-   * Used to signal when the executor should be shut down (due to an exception).
-   */
-  private static class ExecutorUpdate {
-    private final Optional<? extends CommittedBundle<?>> bundle;
-    private final Optional<? extends Throwable> throwable;
-
-    public static ExecutorUpdate fromBundle(CommittedBundle<?> bundle) {
-      return new ExecutorUpdate(bundle, null);
-    }
-
-    public static ExecutorUpdate fromThrowable(Throwable t) {
-      return new ExecutorUpdate(null, t);
-    }
-
-    private ExecutorUpdate(CommittedBundle<?> producedBundle, Throwable throwable) {
-      this.bundle = Optional.fromNullable(producedBundle);
-      this.throwable = Optional.fromNullable(throwable);
-    }
-
-    public Optional<? extends CommittedBundle<?>> getBundle() {
-      return bundle;
-    }
-
-    public Optional<? extends Throwable> getException() {
-      return throwable;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(ExecutorUpdate.class)
-          .add("bundle", bundle)
-          .add("exception", throwable)
-          .toString();
-    }
-  }
-
-  /**
-   * An update of interest to the user. Used in {@link #awaitCompletion} to decide whether to
-   * return normally or throw an exception.
-   */
-  private static class VisibleExecutorUpdate {
-    private final Optional<? extends Throwable> throwable;
-    private final boolean done;
-
-    public static VisibleExecutorUpdate fromThrowable(Throwable e) {
-      return new VisibleExecutorUpdate(false, e);
-    }
-
-    public static VisibleExecutorUpdate finished() {
-      return new VisibleExecutorUpdate(true, null);
-    }
-
-    private VisibleExecutorUpdate(boolean done, @Nullable Throwable exception) {
-      this.throwable = Optional.fromNullable(exception);
-      this.done = done;
-    }
-
-    public boolean isDone() {
-      return done;
-    }
-  }
-
-  private class MonitorRunnable implements Runnable {
-    private final String runnableName =
-        String.format(
-            "%s$%s-monitor",
-            evaluationContext.getPipelineOptions().getAppName(),
-            ExecutorServiceParallelExecutor.class.getSimpleName());
-
-    @Override
-    public void run() {
-      String oldName = Thread.currentThread().getName();
-      Thread.currentThread().setName(runnableName);
-      try {
-        ExecutorUpdate update = allUpdates.poll();
-        if (update != null) {
-          LOG.debug("Executor Update: {}", update);
-          if (update.getBundle().isPresent()) {
-            scheduleConsumers(update.getBundle().get());
-          } else if (update.getException().isPresent()) {
-            visibleUpdates.offer(VisibleExecutorUpdate.fromThrowable(update.getException().get()));
-          }
-        }
-        boolean timersFired = fireTimers();
-        addWorkIfNecessary(timersFired);
-      } catch (InterruptedException e) {
-        Thread.currentThread().interrupt();
-        LOG.error("Monitor died due to being interrupted");
-        while (!visibleUpdates.offer(VisibleExecutorUpdate.fromThrowable(e))) {
-          visibleUpdates.poll();
-        }
-      } catch (Throwable t) {
-        LOG.error("Monitor thread died due to throwable", t);
-        while (!visibleUpdates.offer(VisibleExecutorUpdate.fromThrowable(t))) {
-          visibleUpdates.poll();
-        }
-      } finally {
-        if (!shouldShutdown()) {
-          // The monitor thread should always be scheduled; but we only need to be scheduled once
-          executorService.submit(this);
-        }
-        Thread.currentThread().setName(oldName);
-      }
-    }
-
-    /**
-     * Fires any available timers. Returns true if at least one timer was fired.
-     */
-    private boolean fireTimers() throws Exception {
-      try {
-        boolean firedTimers = false;
-        for (Map.Entry<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> transformTimers :
-            evaluationContext.extractFiredTimers().entrySet()) {
-          AppliedPTransform<?, ?, ?> transform = transformTimers.getKey();
-          for (Map.Entry<Object, FiredTimers> keyTimers : transformTimers.getValue().entrySet()) {
-            for (TimeDomain domain : TimeDomain.values()) {
-              Collection<TimerData> delivery = keyTimers.getValue().getTimers(domain);
-              if (delivery.isEmpty()) {
-                continue;
-              }
-              KeyedWorkItem<Object, Object> work =
-                  KeyedWorkItems.timersWorkItem(keyTimers.getKey(), delivery);
-              @SuppressWarnings({"unchecked", "rawtypes"})
-              CommittedBundle<?> bundle =
-                  InProcessBundle.<KeyedWorkItem<Object, Object>>keyed(
-                          (PCollection) transform.getInput(), keyTimers.getKey())
-                      .add(WindowedValue.valueInEmptyWindows(work))
-                      .commit(Instant.now());
-              scheduleConsumption(transform, bundle, new TimerCompletionCallback(delivery));
-              firedTimers = true;
-            }
-          }
-        }
-        return firedTimers;
-      } catch (Exception e) {
-        LOG.error("Internal Error while delivering timers", e);
-        throw e;
-      }
-    }
-
-    private boolean shouldShutdown() {
-      if (evaluationContext.isDone()) {
-        LOG.debug("Pipeline is finished. Shutting down. {}");
-        while (!visibleUpdates.offer(VisibleExecutorUpdate.finished())) {
-          visibleUpdates.poll();
-        }
-        executorService.shutdown();
-        return true;
-      }
-      return false;
-    }
-
-    /**
-     * If all active {@link TransformExecutor TransformExecutors} are in a blocked state,
-     * add more work from root nodes that may have additional work. This ensures that if a pipeline
-     * has elements available from the root nodes it will add those elements when necessary.
-     */
-    private void addWorkIfNecessary(boolean firedTimers) {
-      // If any timers have fired, they will add more work; We don't need to add more
-      if (firedTimers) {
-        return;
-      }
-      for (TransformExecutor<?> executor : scheduledExecutors.keySet()) {
-        if (!isExecutorBlocked(executor)) {
-          // We have at least one executor that can proceed without adding additional work
-          return;
-        }
-      }
-      // All current TransformExecutors are blocked; add more work from the roots.
-      for (AppliedPTransform<?, ?, ?> root : rootNodes) {
-        if (!evaluationContext.isDone(root)) {
-          scheduleConsumption(root, null, defaultCompletionCallback);
-        }
-      }
-    }
-
-    /**
-     * Return true if the provided executor might make more progress if no action is taken.
-     *
-     * <p>May return false even if all executor threads are currently blocked or cleaning up, as
-     * these can cause more work to be scheduled. If this does not occur, after these calls
-     * terminate, future calls will return true if all executors are waiting.
-     */
-    private boolean isExecutorBlocked(TransformExecutor<?> executor) {
-      Thread thread = executor.getThread();
-      if (thread == null) {
-        return false;
-      }
-      switch (thread.getState()) {
-        case TERMINATED:
-          throw new IllegalStateException(String.format(
-              "Unexpectedly encountered a Terminated TransformExecutor %s", executor));
-        case WAITING:
-        case TIMED_WAITING:
-          // The thread is waiting for some external input. Adding more work may cause the thread
-          // to stop waiting (e.g. the thread is waiting on an unbounded side input)
-          return true;
-        case BLOCKED:
-          // The executor is blocked on acquisition of a java monitor. This usually means it is
-          // making a call to the EvaluationContext, but not a model-blocking call - and will
-          // eventually complete, at which point we may reevaluate.
-        default:
-          // NEW and RUNNABLE threads can make progress
-          return false;
-      }
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
deleted file mode 100644
index ce315be..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/FlattenEvaluatorFactory.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.Flatten.FlattenPCollectionList;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-
-/**
- * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the {@link Flatten}
- * {@link PTransform}.
- */
-class FlattenEvaluatorFactory implements TransformEvaluatorFactory {
-  @Override
-  public <InputT> TransformEvaluator<InputT> forApplication(
-      AppliedPTransform<?, ?, ?> application,
-      CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
-    @SuppressWarnings({"cast", "unchecked", "rawtypes"})
-    TransformEvaluator<InputT> evaluator = (TransformEvaluator<InputT>) createInMemoryEvaluator(
-            (AppliedPTransform) application, inputBundle, evaluationContext);
-    return evaluator;
-  }
-
-  private <InputT> TransformEvaluator<InputT> createInMemoryEvaluator(
-      final AppliedPTransform<
-              PCollectionList<InputT>, PCollection<InputT>, FlattenPCollectionList<InputT>>
-          application,
-      final CommittedBundle<InputT> inputBundle,
-      final InProcessEvaluationContext evaluationContext) {
-    if (inputBundle == null) {
-      // it is impossible to call processElement on a flatten with no input bundle. A Flatten with
-      // no input bundle occurs as an output of Flatten.pcollections(PCollectionList.empty())
-      return new FlattenEvaluator<>(
-          null, StepTransformResult.withoutHold(application).build());
-    }
-    final UncommittedBundle<InputT> outputBundle =
-        evaluationContext.createBundle(inputBundle, application.getOutput());
-    final InProcessTransformResult result =
-        StepTransformResult.withoutHold(application).addOutput(outputBundle).build();
-    return new FlattenEvaluator<>(outputBundle, result);
-  }
-
-  private static class FlattenEvaluator<InputT> implements TransformEvaluator<InputT> {
-    private final UncommittedBundle<InputT> outputBundle;
-    private final InProcessTransformResult result;
-
-    public FlattenEvaluator(
-        UncommittedBundle<InputT> outputBundle, InProcessTransformResult result) {
-      this.outputBundle = outputBundle;
-      this.result = result;
-    }
-
-    @Override
-    public void processElement(WindowedValue<InputT> element) {
-      outputBundle.add(element);
-    }
-
-    @Override
-    public InProcessTransformResult finishBundle() {
-      return result;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java
deleted file mode 100644
index b736e35..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ForwardingPTransform.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.TypedPValue;
-
-/**
- * A base class for implementing {@link PTransform} overrides, which behave identically to the
- * delegate transform but with overridden methods. Implementors are required to implement
- * {@link #delegate()}, which returns the object to forward calls to, and {@link #apply(PInput)}.
- */
-public abstract class ForwardingPTransform<InputT extends PInput, OutputT extends POutput>
-    extends PTransform<InputT, OutputT> {
-  protected abstract PTransform<InputT, OutputT> delegate();
-
-  @Override
-  public OutputT apply(InputT input) {
-    return delegate().apply(input);
-  }
-
-  @Override
-  public void validate(InputT input) {
-    delegate().validate(input);
-  }
-
-  @Override
-  public String getName() {
-    return delegate().getName();
-  }
-
-  @Override
-  public <T> Coder<T> getDefaultOutputCoder(InputT input, @SuppressWarnings("unused")
-      TypedPValue<T> output) throws CannotProvideCoderException {
-    return delegate().getDefaultOutputCoder(input, output);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
deleted file mode 100644
index 3ec4af1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/GroupByKeyEvaluatorFactory.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.StepTransformResult.Builder;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey.ReifyTimestampsAndWindows;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowViaWindowSetDoFn;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItem;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItemCoder;
-import com.google.cloud.dataflow.sdk.util.KeyedWorkItems;
-import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.annotations.VisibleForTesting;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the {@link GroupByKey}
- * {@link PTransform}.
- */
-class GroupByKeyEvaluatorFactory implements TransformEvaluatorFactory {
-  @Override
-  public <InputT> TransformEvaluator<InputT> forApplication(
-      AppliedPTransform<?, ?, ?> application,
-      CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
-    @SuppressWarnings({"cast", "unchecked", "rawtypes"})
-    TransformEvaluator<InputT> evaluator = createEvaluator(
-            (AppliedPTransform) application, (CommittedBundle) inputBundle, evaluationContext);
-    return evaluator;
-  }
-
-  private <K, V> TransformEvaluator<KV<K, WindowedValue<V>>> createEvaluator(
-      final AppliedPTransform<
-              PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>,
-              InProcessGroupByKeyOnly<K, V>>
-          application,
-      final CommittedBundle<KV<K, V>> inputBundle,
-      final InProcessEvaluationContext evaluationContext) {
-    return new GroupByKeyEvaluator<K, V>(evaluationContext, inputBundle, application);
-  }
-
-  private static class GroupByKeyEvaluator<K, V>
-      implements TransformEvaluator<KV<K, WindowedValue<V>>> {
-    private final InProcessEvaluationContext evaluationContext;
-
-    private final CommittedBundle<KV<K, V>> inputBundle;
-    private final AppliedPTransform<
-            PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>,
-            InProcessGroupByKeyOnly<K, V>>
-        application;
-    private final Coder<K> keyCoder;
-    private Map<GroupingKey<K>, List<WindowedValue<V>>> groupingMap;
-
-    public GroupByKeyEvaluator(
-        InProcessEvaluationContext evaluationContext,
-        CommittedBundle<KV<K, V>> inputBundle,
-        AppliedPTransform<
-                PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>,
-                InProcessGroupByKeyOnly<K, V>>
-            application) {
-      this.evaluationContext = evaluationContext;
-      this.inputBundle = inputBundle;
-      this.application = application;
-
-      PCollection<KV<K, WindowedValue<V>>> input = application.getInput();
-      keyCoder = getKeyCoder(input.getCoder());
-      groupingMap = new HashMap<>();
-    }
-
-    private Coder<K> getKeyCoder(Coder<KV<K, WindowedValue<V>>> coder) {
-      if (!(coder instanceof KvCoder)) {
-        throw new IllegalStateException();
-      }
-      @SuppressWarnings("unchecked")
-      Coder<K> keyCoder = ((KvCoder<K, WindowedValue<V>>) coder).getKeyCoder();
-      return keyCoder;
-    }
-
-    @Override
-    public void processElement(WindowedValue<KV<K, WindowedValue<V>>> element) {
-      KV<K, WindowedValue<V>> kv = element.getValue();
-      K key = kv.getKey();
-      byte[] encodedKey;
-      try {
-        encodedKey = encodeToByteArray(keyCoder, key);
-      } catch (CoderException exn) {
-        // TODO: Put in better element printing:
-        // truncate if too long.
-        throw new IllegalArgumentException(
-            String.format("unable to encode key %s of input to %s using %s", key, this, keyCoder),
-            exn);
-      }
-      GroupingKey<K> groupingKey = new GroupingKey<>(key, encodedKey);
-      List<WindowedValue<V>> values = groupingMap.get(groupingKey);
-      if (values == null) {
-        values = new ArrayList<WindowedValue<V>>();
-        groupingMap.put(groupingKey, values);
-      }
-      values.add(kv.getValue());
-    }
-
-    @Override
-    public InProcessTransformResult finishBundle() {
-      Builder resultBuilder = StepTransformResult.withoutHold(application);
-      for (Map.Entry<GroupingKey<K>, List<WindowedValue<V>>> groupedEntry :
-          groupingMap.entrySet()) {
-        K key = groupedEntry.getKey().key;
-        KeyedWorkItem<K, V> groupedKv =
-            KeyedWorkItems.elementsWorkItem(key, groupedEntry.getValue());
-        UncommittedBundle<KeyedWorkItem<K, V>> bundle =
-            evaluationContext.createKeyedBundle(inputBundle, key, application.getOutput());
-        bundle.add(WindowedValue.valueInEmptyWindows(groupedKv));
-        resultBuilder.addOutput(bundle);
-      }
-      return resultBuilder.build();
-    }
-
-    private static class GroupingKey<K> {
-      private K key;
-      private byte[] encodedKey;
-
-      public GroupingKey(K key, byte[] encodedKey) {
-        this.key = key;
-        this.encodedKey = encodedKey;
-      }
-
-      @Override
-      public boolean equals(Object o) {
-        if (o instanceof GroupingKey) {
-          GroupingKey<?> that = (GroupingKey<?>) o;
-          return Arrays.equals(this.encodedKey, that.encodedKey);
-        } else {
-          return false;
-        }
-      }
-
-      @Override
-      public int hashCode() {
-        return Arrays.hashCode(encodedKey);
-      }
-    }
-  }
-
-  /**
-   * An in-memory implementation of the {@link GroupByKey} primitive as a composite
-   * {@link PTransform}.
-   */
-  public static final class InProcessGroupByKey<K, V>
-      extends ForwardingPTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> {
-    private final GroupByKey<K, V> original;
-
-    private InProcessGroupByKey(GroupByKey<K, V> from) {
-      this.original = from;
-    }
-
-    @Override
-    public PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Iterable<V>>>> delegate() {
-      return original;
-    }
-
-    @Override
-    public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();
-
-      // This operation groups by the combination of key and window,
-      // merging windows as needed, using the windows assigned to the
-      // key/value input elements and the window merge operation of the
-      // window function associated with the input PCollection.
-      WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-
-      // Use the default GroupAlsoByWindow implementation
-      DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> groupAlsoByWindow =
-          groupAlsoByWindow(windowingStrategy, inputCoder.getValueCoder());
-
-      // By default, implement GroupByKey via a series of lower-level operations.
-      return input
-          // Make each input element's timestamp and assigned windows
-          // explicit, in the value part.
-          .apply(new ReifyTimestampsAndWindows<K, V>())
-
-          .apply(new InProcessGroupByKeyOnly<K, V>())
-          .setCoder(KeyedWorkItemCoder.of(inputCoder.getKeyCoder(),
-              inputCoder.getValueCoder(), input.getWindowingStrategy().getWindowFn().windowCoder()))
-
-          // Group each key's values by window, merging windows as needed.
-          .apply("GroupAlsoByWindow", ParDo.of(groupAlsoByWindow))
-
-          // And update the windowing strategy as appropriate.
-          .setWindowingStrategyInternal(original.updateWindowingStrategy(windowingStrategy))
-          .setCoder(
-              KvCoder.of(inputCoder.getKeyCoder(), IterableCoder.of(inputCoder.getValueCoder())));
-    }
-
-    private <W extends BoundedWindow>
-        DoFn<KeyedWorkItem<K, V>, KV<K, Iterable<V>>> groupAlsoByWindow(
-            final WindowingStrategy<?, W> windowingStrategy, final Coder<V> inputCoder) {
-      return GroupAlsoByWindowViaWindowSetDoFn.create(
-          windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
-    }
-  }
-
-  /**
-   * An implementation primitive to use in the evaluation of a {@link GroupByKey}
-   * {@link PTransform}.
-   */
-  public static final class InProcessGroupByKeyOnly<K, V>
-      extends PTransform<PCollection<KV<K, WindowedValue<V>>>, PCollection<KeyedWorkItem<K, V>>> {
-    @Override
-    public PCollection<KeyedWorkItem<K, V>> apply(PCollection<KV<K, WindowedValue<V>>> input) {
-      return PCollection.<KeyedWorkItem<K, V>>createPrimitiveOutputInternal(
-          input.getPipeline(), input.getWindowingStrategy(), input.isBounded());
-    }
-
-    @VisibleForTesting
-    InProcessGroupByKeyOnly() {}
-  }
-}

[51/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

Directory reorganization

Move Java SDK from "sdk/" into "sdks/java/core".


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/7bef2b7e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/7bef2b7e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/7bef2b7e

Branch: refs/heads/master
Commit: 7bef2b7ec4c2d60c28f0f5a8ccc6297edb934b11
Parents: 9f8dd18
Author: Davor Bonaci <da...@google.com>
Authored: Wed Mar 23 16:52:19 2016 -0700
Committer: Davor Bonaci <da...@google.com>
Committed: Wed Mar 23 18:11:34 2016 -0700

----------------------------------------------------------------------
 pom.xml                                         |    2 +-
 sdk/pom.xml                                     |  771 -----
 .../com/google/cloud/dataflow/sdk/Pipeline.java |  502 ---
 .../cloud/dataflow/sdk/PipelineResult.java      |   95 -
 .../dataflow/sdk/annotations/Experimental.java  |   80 -
 .../dataflow/sdk/annotations/package-info.java  |   20 -
 .../cloud/dataflow/sdk/coders/AtomicCoder.java  |   51 -
 .../cloud/dataflow/sdk/coders/AvroCoder.java    |  714 -----
 .../sdk/coders/BigEndianIntegerCoder.java       |   99 -
 .../dataflow/sdk/coders/BigEndianLongCoder.java |   99 -
 .../dataflow/sdk/coders/ByteArrayCoder.java     |  138 -
 .../cloud/dataflow/sdk/coders/ByteCoder.java    |  111 -
 .../dataflow/sdk/coders/ByteStringCoder.java    |  106 -
 .../sdk/coders/CannotProvideCoderException.java |   95 -
 .../google/cloud/dataflow/sdk/coders/Coder.java |  298 --
 .../dataflow/sdk/coders/CoderException.java     |   36 -
 .../dataflow/sdk/coders/CoderFactories.java     |  274 --
 .../cloud/dataflow/sdk/coders/CoderFactory.java |   43 -
 .../dataflow/sdk/coders/CoderProvider.java      |   33 -
 .../dataflow/sdk/coders/CoderProviders.java     |  164 -
 .../dataflow/sdk/coders/CoderRegistry.java      |  843 -----
 .../dataflow/sdk/coders/CollectionCoder.java    |   73 -
 .../cloud/dataflow/sdk/coders/CustomCoder.java  |  137 -
 .../cloud/dataflow/sdk/coders/DefaultCoder.java |   66 -
 .../dataflow/sdk/coders/DelegateCoder.java      |  164 -
 .../sdk/coders/DeterministicStandardCoder.java  |   38 -
 .../cloud/dataflow/sdk/coders/DoubleCoder.java  |  113 -
 .../dataflow/sdk/coders/DurationCoder.java      |   97 -
 .../cloud/dataflow/sdk/coders/EntityCoder.java  |   86 -
 .../cloud/dataflow/sdk/coders/InstantCoder.java |  113 -
 .../dataflow/sdk/coders/IterableCoder.java      |   78 -
 .../dataflow/sdk/coders/IterableLikeCoder.java  |  278 --
 .../cloud/dataflow/sdk/coders/JAXBCoder.java    |  135 -
 .../cloud/dataflow/sdk/coders/KvCoder.java      |  162 -
 .../cloud/dataflow/sdk/coders/KvCoderBase.java  |   61 -
 .../cloud/dataflow/sdk/coders/ListCoder.java    |   77 -
 .../cloud/dataflow/sdk/coders/MapCoder.java     |  160 -
 .../cloud/dataflow/sdk/coders/MapCoderBase.java |   54 -
 .../dataflow/sdk/coders/NullableCoder.java      |  175 -
 .../cloud/dataflow/sdk/coders/Proto2Coder.java  |  361 ---
 .../dataflow/sdk/coders/SerializableCoder.java  |  183 --
 .../cloud/dataflow/sdk/coders/SetCoder.java     |   94 -
 .../dataflow/sdk/coders/StandardCoder.java      |  229 --
 .../sdk/coders/StringDelegateCoder.java         |   86 -
 .../dataflow/sdk/coders/StringUtf8Coder.java    |  139 -
 .../sdk/coders/StructuralByteArray.java         |   56 -
 .../dataflow/sdk/coders/TableRowJsonCoder.java  |   82 -
 .../sdk/coders/TextualIntegerCoder.java         |   69 -
 .../cloud/dataflow/sdk/coders/VarIntCoder.java  |   97 -
 .../cloud/dataflow/sdk/coders/VarLongCoder.java |   96 -
 .../cloud/dataflow/sdk/coders/VoidCoder.java    |   76 -
 .../cloud/dataflow/sdk/coders/package-info.java |   44 -
 .../sdk/coders/protobuf/ProtoCoder.java         |  404 ---
 .../sdk/coders/protobuf/ProtobufUtil.java       |  171 -
 .../sdk/coders/protobuf/package-info.java       |   23 -
 .../google/cloud/dataflow/sdk/io/AvroIO.java    |  810 -----
 .../cloud/dataflow/sdk/io/AvroSource.java       |  647 ----
 .../cloud/dataflow/sdk/io/BigQueryIO.java       | 1499 ---------
 .../cloud/dataflow/sdk/io/BlockBasedSource.java |  237 --
 .../sdk/io/BoundedReadFromUnboundedSource.java  |  271 --
 .../cloud/dataflow/sdk/io/BoundedSource.java    |  277 --
 .../cloud/dataflow/sdk/io/CompressedSource.java |  413 ---
 .../cloud/dataflow/sdk/io/CountingInput.java    |  191 --
 .../cloud/dataflow/sdk/io/CountingSource.java   |  397 ---
 .../cloud/dataflow/sdk/io/DatastoreIO.java      |  957 ------
 .../cloud/dataflow/sdk/io/FileBasedSink.java    |  864 -----
 .../cloud/dataflow/sdk/io/FileBasedSource.java  |  648 ----
 .../dataflow/sdk/io/OffsetBasedSource.java      |  326 --
 .../google/cloud/dataflow/sdk/io/PubsubIO.java  | 1044 ------
 .../com/google/cloud/dataflow/sdk/io/Read.java  |  253 --
 .../dataflow/sdk/io/ShardNameTemplate.java      |   75 -
 .../com/google/cloud/dataflow/sdk/io/Sink.java  |  252 --
 .../google/cloud/dataflow/sdk/io/Source.java    |  193 --
 .../google/cloud/dataflow/sdk/io/TextIO.java    |  992 ------
 .../cloud/dataflow/sdk/io/UnboundedSource.java  |  253 --
 .../com/google/cloud/dataflow/sdk/io/Write.java |  213 --
 .../google/cloud/dataflow/sdk/io/XmlSink.java   |  310 --
 .../google/cloud/dataflow/sdk/io/XmlSource.java |  541 ----
 .../dataflow/sdk/io/bigtable/BigtableIO.java    |  987 ------
 .../sdk/io/bigtable/BigtableService.java        |  108 -
 .../sdk/io/bigtable/BigtableServiceImpl.java    |  241 --
 .../dataflow/sdk/io/bigtable/package-info.java  |   22 -
 .../cloud/dataflow/sdk/io/package-info.java     |   37 -
 .../cloud/dataflow/sdk/io/range/ByteKey.java    |  173 -
 .../dataflow/sdk/io/range/ByteKeyRange.java     |  376 ---
 .../sdk/io/range/ByteKeyRangeTracker.java       |  117 -
 .../sdk/io/range/OffsetRangeTracker.java        |  182 --
 .../dataflow/sdk/io/range/RangeTracker.java     |  220 --
 .../dataflow/sdk/io/range/package-info.java     |   23 -
 .../sdk/options/ApplicationNameOptions.java     |   33 -
 .../dataflow/sdk/options/BigQueryOptions.java   |   31 -
 .../BlockingDataflowPipelineOptions.java        |   49 -
 .../sdk/options/CloudDebuggerOptions.java       |   43 -
 .../options/DataflowPipelineDebugOptions.java   |  259 --
 .../sdk/options/DataflowPipelineOptions.java    |  134 -
 .../DataflowPipelineWorkerPoolOptions.java      |  254 --
 .../sdk/options/DataflowProfilingOptions.java   |   46 -
 .../options/DataflowWorkerHarnessOptions.java   |   50 -
 .../options/DataflowWorkerLoggingOptions.java   |  153 -
 .../cloud/dataflow/sdk/options/Default.java     |  153 -
 .../sdk/options/DefaultValueFactory.java        |   38 -
 .../cloud/dataflow/sdk/options/Description.java |   35 -
 .../sdk/options/DirectPipelineOptions.java      |   75 -
 .../cloud/dataflow/sdk/options/GcpOptions.java  |  291 --
 .../cloud/dataflow/sdk/options/GcsOptions.java  |  113 -
 .../sdk/options/GoogleApiDebugOptions.java      |   87 -
 .../cloud/dataflow/sdk/options/Hidden.java      |   33 -
 .../dataflow/sdk/options/PipelineOptions.java   |  249 --
 .../sdk/options/PipelineOptionsFactory.java     | 1537 ---------
 .../sdk/options/PipelineOptionsRegistrar.java   |   36 -
 .../sdk/options/PipelineOptionsValidator.java   |  102 -
 .../sdk/options/ProxyInvocationHandler.java     |  441 ---
 .../dataflow/sdk/options/StreamingOptions.java  |   30 -
 .../cloud/dataflow/sdk/options/Validation.java  |   46 -
 .../dataflow/sdk/options/package-info.java      |   25 -
 .../google/cloud/dataflow/sdk/package-info.java |   33 -
 .../runners/AggregatorPipelineExtractor.java    |   97 -
 .../runners/AggregatorRetrievalException.java   |   32 -
 .../dataflow/sdk/runners/AggregatorValues.java  |   52 -
 .../runners/BlockingDataflowPipelineRunner.java |  181 --
 .../DataflowJobAlreadyExistsException.java      |   34 -
 .../DataflowJobAlreadyUpdatedException.java     |   33 -
 .../runners/DataflowJobCancelledException.java  |   38 -
 .../sdk/runners/DataflowJobException.java       |   40 -
 .../runners/DataflowJobExecutionException.java  |   34 -
 .../runners/DataflowJobUpdatedException.java    |   51 -
 .../dataflow/sdk/runners/DataflowPipeline.java  |   59 -
 .../sdk/runners/DataflowPipelineJob.java        |  389 ---
 .../sdk/runners/DataflowPipelineRegistrar.java  |   58 -
 .../sdk/runners/DataflowPipelineRunner.java     | 3003 ------------------
 .../runners/DataflowPipelineRunnerHooks.java    |   37 -
 .../sdk/runners/DataflowPipelineTranslator.java | 1104 -------
 .../sdk/runners/DataflowServiceException.java   |   32 -
 .../dataflow/sdk/runners/DirectPipeline.java    |   55 -
 .../sdk/runners/DirectPipelineRegistrar.java    |   53 -
 .../sdk/runners/DirectPipelineRunner.java       | 1156 -------
 .../dataflow/sdk/runners/PipelineRunner.java    |   76 -
 .../sdk/runners/PipelineRunnerRegistrar.java    |   40 -
 .../sdk/runners/RecordingPipelineVisitor.java   |   54 -
 .../sdk/runners/TransformHierarchy.java         |  104 -
 .../dataflow/sdk/runners/TransformTreeNode.java |  252 --
 .../sdk/runners/dataflow/AssignWindows.java     |   88 -
 .../runners/dataflow/BigQueryIOTranslator.java  |  125 -
 .../sdk/runners/dataflow/CustomSources.java     |  118 -
 .../dataflow/DataflowAggregatorTransforms.java  |   79 -
 .../dataflow/DataflowMetricUpdateExtractor.java |  110 -
 .../runners/dataflow/PubsubIOTranslator.java    |  107 -
 .../sdk/runners/dataflow/ReadTranslator.java    |  103 -
 .../sdk/runners/dataflow/package-info.java      |   20 -
 .../inprocess/BoundedReadEvaluatorFactory.java  |  152 -
 .../CachedThreadPoolExecutorServiceFactory.java |   42 -
 .../dataflow/sdk/runners/inprocess/Clock.java   |   29 -
 .../runners/inprocess/CompletionCallback.java   |   33 -
 .../ConsumerTrackingPipelineVisitor.java        |  173 -
 .../inprocess/EmptyTransformEvaluator.java      |   49 -
 .../sdk/runners/inprocess/EvaluatorKey.java     |   55 -
 .../inprocess/ExecutorServiceFactory.java       |   32 -
 .../ExecutorServiceParallelExecutor.java        |  432 ---
 .../inprocess/FlattenEvaluatorFactory.java      |   83 -
 .../runners/inprocess/ForwardingPTransform.java |   54 -
 .../inprocess/GroupByKeyEvaluatorFactory.java   |  252 --
 .../inprocess/InMemoryWatermarkManager.java     | 1310 --------
 .../sdk/runners/inprocess/InProcessBundle.java  |  121 -
 .../inprocess/InProcessBundleOutputManager.java |   50 -
 .../sdk/runners/inprocess/InProcessCreate.java  |  209 --
 .../inprocess/InProcessEvaluationContext.java   |  405 ---
 .../inprocess/InProcessExecutionContext.java    |  106 -
 .../runners/inprocess/InProcessExecutor.java    |   46 -
 .../inprocess/InProcessPipelineOptions.java     |   90 -
 .../inprocess/InProcessPipelineRunner.java      |  343 --
 .../inprocess/InProcessSideInputContainer.java  |  230 --
 .../inprocess/InProcessTimerInternals.java      |   84 -
 .../inprocess/InProcessTransformResult.java     |   75 -
 .../inprocess/KeyedPValueTrackingVisitor.java   |   95 -
 .../sdk/runners/inprocess/NanosOffsetClock.java |   58 -
 .../inprocess/ParDoInProcessEvaluator.java      |  109 -
 .../inprocess/ParDoMultiEvaluatorFactory.java   |   90 -
 .../inprocess/ParDoSingleEvaluatorFactory.java  |   87 -
 .../sdk/runners/inprocess/StepAndKey.java       |   68 -
 .../runners/inprocess/StepTransformResult.java  |  157 -
 .../runners/inprocess/TransformEvaluator.java   |   45 -
 .../inprocess/TransformEvaluatorFactory.java    |   42 -
 .../inprocess/TransformEvaluatorRegistry.java   |   72 -
 .../runners/inprocess/TransformExecutor.java    |  114 -
 .../inprocess/TransformExecutorService.java     |   34 -
 .../inprocess/TransformExecutorServices.java    |  153 -
 .../UnboundedReadEvaluatorFactory.java          |  168 -
 .../runners/inprocess/ViewEvaluatorFactory.java |  121 -
 .../inprocess/WatermarkCallbackExecutor.java    |  143 -
 .../dataflow/sdk/runners/package-info.java      |   33 -
 .../dataflow/sdk/runners/worker/IsmFormat.java  |  946 ------
 .../sdk/runners/worker/package-info.java        |   24 -
 .../dataflow/sdk/testing/CoderProperties.java   |  349 --
 .../dataflow/sdk/testing/DataflowAssert.java    |  825 -----
 .../dataflow/sdk/testing/RunnableOnService.java |   30 -
 .../sdk/testing/SerializableMatcher.java        |   36 -
 .../sdk/testing/SerializableMatchers.java       | 1180 -------
 .../dataflow/sdk/testing/SourceTestUtils.java   |  642 ----
 .../testing/TestDataflowPipelineOptions.java    |   26 -
 .../sdk/testing/TestDataflowPipelineRunner.java |  220 --
 .../dataflow/sdk/testing/TestPipeline.java      |  193 --
 .../dataflow/sdk/testing/WindowFnTestUtils.java |  325 --
 .../dataflow/sdk/testing/package-info.java      |   21 -
 .../dataflow/sdk/transforms/Aggregator.java     |   78 -
 .../sdk/transforms/AggregatorRetriever.java     |   36 -
 .../sdk/transforms/AppliedPTransform.java       |  100 -
 .../sdk/transforms/ApproximateQuantiles.java    |  766 -----
 .../sdk/transforms/ApproximateUnique.java       |  419 ---
 .../cloud/dataflow/sdk/transforms/Combine.java  | 2240 -------------
 .../dataflow/sdk/transforms/CombineFnBase.java  |  283 --
 .../dataflow/sdk/transforms/CombineFns.java     | 1100 -------
 .../sdk/transforms/CombineWithContext.java      |  277 --
 .../cloud/dataflow/sdk/transforms/Count.java    |  135 -
 .../cloud/dataflow/sdk/transforms/Create.java   |  426 ---
 .../cloud/dataflow/sdk/transforms/DoFn.java     |  563 ----
 .../dataflow/sdk/transforms/DoFnReflector.java  |  668 ----
 .../dataflow/sdk/transforms/DoFnTester.java     |  495 ---
 .../sdk/transforms/DoFnWithContext.java         |  416 ---
 .../cloud/dataflow/sdk/transforms/Filter.java   |  234 --
 .../sdk/transforms/FlatMapElements.java         |  145 -
 .../cloud/dataflow/sdk/transforms/Flatten.java  |  219 --
 .../dataflow/sdk/transforms/GroupByKey.java     |  575 ----
 .../transforms/IntraBundleParallelization.java  |  346 --
 .../cloud/dataflow/sdk/transforms/Keys.java     |   68 -
 .../cloud/dataflow/sdk/transforms/KvSwap.java   |   73 -
 .../dataflow/sdk/transforms/MapElements.java    |  112 -
 .../cloud/dataflow/sdk/transforms/Max.java      |  255 --
 .../cloud/dataflow/sdk/transforms/Mean.java     |  202 --
 .../cloud/dataflow/sdk/transforms/Min.java      |  255 --
 .../dataflow/sdk/transforms/PTransform.java     |  324 --
 .../cloud/dataflow/sdk/transforms/ParDo.java    | 1321 --------
 .../dataflow/sdk/transforms/Partition.java      |  173 -
 .../sdk/transforms/RemoveDuplicates.java        |  158 -
 .../cloud/dataflow/sdk/transforms/Sample.java   |  246 --
 .../sdk/transforms/SerializableComparator.java  |   28 -
 .../sdk/transforms/SerializableFunction.java    |   31 -
 .../dataflow/sdk/transforms/SimpleFunction.java |   54 -
 .../cloud/dataflow/sdk/transforms/Sum.java      |  188 --
 .../cloud/dataflow/sdk/transforms/Top.java      |  559 ----
 .../cloud/dataflow/sdk/transforms/Values.java   |   68 -
 .../cloud/dataflow/sdk/transforms/View.java     |  470 ---
 .../cloud/dataflow/sdk/transforms/WithKeys.java |  140 -
 .../dataflow/sdk/transforms/WithTimestamps.java |  129 -
 .../cloud/dataflow/sdk/transforms/Write.java    |   27 -
 .../sdk/transforms/display/DisplayData.java     |  530 ----
 .../sdk/transforms/display/HasDisplayData.java  |   53 -
 .../sdk/transforms/join/CoGbkResult.java        |  463 ---
 .../sdk/transforms/join/CoGbkResultSchema.java  |  134 -
 .../sdk/transforms/join/CoGroupByKey.java       |  211 --
 .../transforms/join/KeyedPCollectionTuple.java  |  247 --
 .../sdk/transforms/join/RawUnionValue.java      |   51 -
 .../sdk/transforms/join/UnionCoder.java         |  147 -
 .../sdk/transforms/join/package-info.java       |   21 -
 .../dataflow/sdk/transforms/package-info.java   |   43 -
 .../sdk/transforms/windowing/AfterAll.java      |  117 -
 .../windowing/AfterDelayFromFirstElement.java   |  322 --
 .../sdk/transforms/windowing/AfterEach.java     |  135 -
 .../sdk/transforms/windowing/AfterFirst.java    |  119 -
 .../sdk/transforms/windowing/AfterPane.java     |  145 -
 .../windowing/AfterProcessingTime.java          |   97 -
 .../AfterSynchronizedProcessingTime.java        |   75 -
 .../transforms/windowing/AfterWatermark.java    |  397 ---
 .../sdk/transforms/windowing/BoundedWindow.java |   46 -
 .../transforms/windowing/CalendarWindows.java   |  348 --
 .../transforms/windowing/DefaultTrigger.java    |   95 -
 .../sdk/transforms/windowing/FixedWindows.java  |  116 -
 .../sdk/transforms/windowing/GlobalWindow.java  |   68 -
 .../sdk/transforms/windowing/GlobalWindows.java |   63 -
 .../transforms/windowing/IntervalWindow.java    |  201 --
 .../transforms/windowing/InvalidWindows.java    |   87 -
 .../MergeOverlappingIntervalWindows.java        |   86 -
 .../windowing/NonMergingWindowFn.java           |   35 -
 .../transforms/windowing/OrFinallyTrigger.java  |  100 -
 .../sdk/transforms/windowing/OutputTimeFn.java  |  319 --
 .../sdk/transforms/windowing/OutputTimeFns.java |  168 -
 .../sdk/transforms/windowing/PaneInfo.java      |  384 ---
 .../windowing/PartitioningWindowFn.java         |   61 -
 .../sdk/transforms/windowing/Repeatedly.java    |  100 -
 .../sdk/transforms/windowing/Sessions.java      |  112 -
 .../transforms/windowing/SlidingWindows.java    |  214 --
 .../sdk/transforms/windowing/Trigger.java       |  544 ----
 .../transforms/windowing/TriggerBuilder.java    |   29 -
 .../sdk/transforms/windowing/Window.java        |  662 ----
 .../sdk/transforms/windowing/WindowFn.java      |  221 --
 .../sdk/transforms/windowing/package-info.java  |   49 -
 .../dataflow/sdk/util/ActiveWindowSet.java      |  171 -
 .../cloud/dataflow/sdk/util/ApiSurface.java     |  642 ----
 .../dataflow/sdk/util/AppEngineEnvironment.java |   61 -
 .../dataflow/sdk/util/AppliedCombineFn.java     |  130 -
 .../dataflow/sdk/util/AssignWindowsDoFn.java    |   67 -
 ...AttemptAndTimeBoundedExponentialBackOff.java |  168 -
 .../util/AttemptBoundedExponentialBackOff.java  |   83 -
 .../cloud/dataflow/sdk/util/AvroUtils.java      |  345 --
 .../dataflow/sdk/util/BaseExecutionContext.java |  155 -
 .../dataflow/sdk/util/BatchTimerInternals.java  |  138 -
 .../sdk/util/BigQueryTableInserter.java         |  434 ---
 .../sdk/util/BigQueryTableRowIterator.java      |  469 ---
 .../cloud/dataflow/sdk/util/BitSetCoder.java    |   59 -
 .../BufferedElementCountingOutputStream.java    |  184 --
 .../cloud/dataflow/sdk/util/CloudKnownType.java |  138 -
 .../cloud/dataflow/sdk/util/CloudObject.java    |  184 --
 .../cloud/dataflow/sdk/util/CoderUtils.java     |  327 --
 .../sdk/util/CombineContextFactory.java         |  107 -
 .../cloud/dataflow/sdk/util/CombineFnUtil.java  |  154 -
 .../dataflow/sdk/util/CounterAggregator.java    |   96 -
 .../dataflow/sdk/util/CredentialFactory.java    |   29 -
 .../cloud/dataflow/sdk/util/Credentials.java    |  192 --
 .../sdk/util/DataflowPathValidator.java         |   97 -
 .../dataflow/sdk/util/DataflowReleaseInfo.java  |   87 -
 .../sdk/util/DirectModeExecutionContext.java    |  130 -
 .../sdk/util/DirectSideInputReader.java         |   73 -
 .../cloud/dataflow/sdk/util/DoFnInfo.java       |   67 -
 .../cloud/dataflow/sdk/util/DoFnRunner.java     |   60 -
 .../cloud/dataflow/sdk/util/DoFnRunnerBase.java |  558 ----
 .../cloud/dataflow/sdk/util/DoFnRunners.java    |  142 -
 .../dataflow/sdk/util/ExecutableTrigger.java    |  159 -
 .../dataflow/sdk/util/ExecutionContext.java     |  102 -
 .../sdk/util/ExposedByteArrayInputStream.java   |   51 -
 .../sdk/util/ExposedByteArrayOutputStream.java  |  115 -
 .../dataflow/sdk/util/FileIOChannelFactory.java |  135 -
 .../dataflow/sdk/util/FinishedTriggers.java     |   42 -
 .../sdk/util/FinishedTriggersBitSet.java        |   68 -
 .../dataflow/sdk/util/FinishedTriggersSet.java  |   74 -
 .../dataflow/sdk/util/GcpCredentialFactory.java |   45 -
 .../dataflow/sdk/util/GcsIOChannelFactory.java  |   86 -
 .../cloud/dataflow/sdk/util/GcsStager.java      |   53 -
 .../google/cloud/dataflow/sdk/util/GcsUtil.java |  406 ---
 .../util/GroupAlsoByWindowViaWindowSetDoFn.java |  104 -
 .../sdk/util/GroupAlsoByWindowsDoFn.java        |   58 -
 .../GroupAlsoByWindowsViaOutputBufferDoFn.java  |   98 -
 .../dataflow/sdk/util/IOChannelFactory.java     |  101 -
 .../cloud/dataflow/sdk/util/IOChannelUtils.java |  204 --
 .../sdk/util/IllegalMutationException.java      |   52 -
 .../dataflow/sdk/util/InstanceBuilder.java      |  269 --
 .../util/IntervalBoundedExponentialBackOff.java |   87 -
 .../cloud/dataflow/sdk/util/KeyedWorkItem.java  |   41 -
 .../dataflow/sdk/util/KeyedWorkItemCoder.java   |  120 -
 .../cloud/dataflow/sdk/util/KeyedWorkItems.java |  120 -
 .../sdk/util/LateDataDroppingDoFnRunner.java    |  145 -
 .../dataflow/sdk/util/MapAggregatorValues.java  |   48 -
 .../sdk/util/MergingActiveWindowSet.java        |  544 ----
 .../cloud/dataflow/sdk/util/MimeTypes.java      |   23 -
 .../cloud/dataflow/sdk/util/MonitoringUtil.java |  233 --
 .../dataflow/sdk/util/MutationDetector.java     |   31 -
 .../dataflow/sdk/util/MutationDetectors.java    |  182 --
 .../cloud/dataflow/sdk/util/NonEmptyPanes.java  |  148 -
 .../sdk/util/NonMergingActiveWindowSet.java     |   85 -
 .../sdk/util/NoopCredentialFactory.java         |   38 -
 .../dataflow/sdk/util/NoopPathValidator.java    |   48 -
 .../dataflow/sdk/util/NullSideInputReader.java  |   61 -
 .../dataflow/sdk/util/OutputReference.java      |   42 -
 .../sdk/util/PCollectionViewWindow.java         |   67 -
 .../dataflow/sdk/util/PCollectionViews.java     |  426 ---
 .../google/cloud/dataflow/sdk/util/PTuple.java  |  160 -
 .../cloud/dataflow/sdk/util/PackageUtil.java    |  327 --
 .../dataflow/sdk/util/PaneInfoTracker.java      |  151 -
 .../cloud/dataflow/sdk/util/PathValidator.java  |   47 -
 .../sdk/util/PerKeyCombineFnRunner.java         |  147 -
 .../sdk/util/PerKeyCombineFnRunners.java        |  257 --
 .../cloud/dataflow/sdk/util/PropertyNames.java  |  107 -
 .../dataflow/sdk/util/RandomAccessData.java     |  352 --
 .../cloud/dataflow/sdk/util/ReduceFn.java       |  128 -
 .../sdk/util/ReduceFnContextFactory.java        |  495 ---
 .../cloud/dataflow/sdk/util/ReduceFnRunner.java |  843 -----
 .../sdk/util/ReifyTimestampAndWindowsDoFn.java  |   46 -
 .../cloud/dataflow/sdk/util/Reshuffle.java      |  145 -
 .../dataflow/sdk/util/ReshuffleTrigger.java     |   61 -
 .../sdk/util/RetryHttpRequestInitializer.java   |  250 --
 .../dataflow/sdk/util/SerializableUtils.java    |  159 -
 .../cloud/dataflow/sdk/util/Serializer.java     |  145 -
 .../sdk/util/ShardingWritableByteChannel.java   |  118 -
 .../dataflow/sdk/util/SideInputReader.java      |   48 -
 .../dataflow/sdk/util/SimpleDoFnRunner.java     |   55 -
 .../google/cloud/dataflow/sdk/util/Stager.java  |   29 -
 .../cloud/dataflow/sdk/util/StreamUtils.java    |   68 -
 .../cloud/dataflow/sdk/util/StringUtils.java    |  242 --
 .../google/cloud/dataflow/sdk/util/Structs.java |  384 ---
 .../dataflow/sdk/util/SystemDoFnInternal.java   |   37 -
 .../cloud/dataflow/sdk/util/SystemReduceFn.java |  133 -
 .../cloud/dataflow/sdk/util/TestCredential.java |   51 -
 .../cloud/dataflow/sdk/util/TimeDomain.java     |   41 -
 .../cloud/dataflow/sdk/util/TimeUtil.java       |  164 -
 .../cloud/dataflow/sdk/util/TimerInternals.java |  269 --
 .../google/cloud/dataflow/sdk/util/Timers.java  |   60 -
 .../cloud/dataflow/sdk/util/Transport.java      |  205 --
 .../sdk/util/TriggerContextFactory.java         |  522 ---
 .../cloud/dataflow/sdk/util/TriggerRunner.java  |  223 --
 .../dataflow/sdk/util/UnownedInputStream.java   |   76 -
 .../dataflow/sdk/util/UnownedOutputStream.java  |   56 -
 .../sdk/util/UploadIdResponseInterceptor.java   |   61 -
 .../dataflow/sdk/util/UserCodeException.java    |   94 -
 .../dataflow/sdk/util/ValueWithRecordId.java    |  154 -
 .../google/cloud/dataflow/sdk/util/Values.java  |   88 -
 .../google/cloud/dataflow/sdk/util/VarInt.java  |  115 -
 .../cloud/dataflow/sdk/util/WatermarkHold.java  |  450 ---
 .../cloud/dataflow/sdk/util/Weighted.java       |   27 -
 .../cloud/dataflow/sdk/util/WeightedValue.java  |   45 -
 .../cloud/dataflow/sdk/util/WindowTracing.java  |   36 -
 .../cloud/dataflow/sdk/util/WindowedValue.java  |  720 -----
 .../dataflow/sdk/util/WindowingInternals.java   |   82 -
 .../dataflow/sdk/util/WindowingStrategy.java    |  268 --
 .../cloud/dataflow/sdk/util/ZipFiles.java       |  294 --
 .../cloud/dataflow/sdk/util/common/Counter.java | 1103 -------
 .../sdk/util/common/CounterProvider.java        |   26 -
 .../dataflow/sdk/util/common/CounterSet.java    |  177 --
 .../util/common/ElementByteSizeObservable.java  |   41 -
 .../ElementByteSizeObservableIterable.java      |   63 -
 .../ElementByteSizeObservableIterator.java      |   36 -
 .../util/common/ElementByteSizeObserver.java    |   92 -
 .../sdk/util/common/PeekingReiterator.java      |   98 -
 .../sdk/util/common/ReflectHelpers.java         |  209 --
 .../dataflow/sdk/util/common/Reiterable.java    |   27 -
 .../dataflow/sdk/util/common/Reiterator.java    |   39 -
 .../dataflow/sdk/util/common/package-info.java  |   18 -
 .../sdk/util/common/worker/StateSampler.java    |  365 ---
 .../sdk/util/common/worker/package-info.java    |   18 -
 .../cloud/dataflow/sdk/util/gcsfs/GcsPath.java  |  619 ----
 .../dataflow/sdk/util/gcsfs/package-info.java   |   18 -
 .../cloud/dataflow/sdk/util/package-info.java   |   18 -
 .../util/state/AccumulatorCombiningState.java   |   51 -
 .../cloud/dataflow/sdk/util/state/BagState.java |   26 -
 .../dataflow/sdk/util/state/CombiningState.java |   40 -
 .../CopyOnAccessInMemoryStateInternals.java     |  454 ---
 .../sdk/util/state/InMemoryStateInternals.java  |  414 ---
 .../sdk/util/state/MergingStateAccessor.java    |   40 -
 .../dataflow/sdk/util/state/ReadableState.java  |   53 -
 .../cloud/dataflow/sdk/util/state/State.java    |   30 -
 .../dataflow/sdk/util/state/StateAccessor.java  |   36 -
 .../dataflow/sdk/util/state/StateContext.java   |   41 -
 .../dataflow/sdk/util/state/StateContexts.java  |  107 -
 .../dataflow/sdk/util/state/StateInternals.java |   55 -
 .../dataflow/sdk/util/state/StateMerging.java   |  254 --
 .../dataflow/sdk/util/state/StateNamespace.java |   54 -
 .../sdk/util/state/StateNamespaceForTest.java   |   63 -
 .../sdk/util/state/StateNamespaces.java         |  277 --
 .../dataflow/sdk/util/state/StateTable.java     |   89 -
 .../cloud/dataflow/sdk/util/state/StateTag.java |   96 -
 .../dataflow/sdk/util/state/StateTags.java      |  579 ----
 .../dataflow/sdk/util/state/ValueState.java     |   35 -
 .../sdk/util/state/WatermarkHoldState.java      |   42 -
 .../google/cloud/dataflow/sdk/values/KV.java    |  130 -
 .../cloud/dataflow/sdk/values/PBegin.java       |   87 -
 .../cloud/dataflow/sdk/values/PCollection.java  |  250 --
 .../dataflow/sdk/values/PCollectionList.java    |  238 --
 .../dataflow/sdk/values/PCollectionTuple.java   |  264 --
 .../dataflow/sdk/values/PCollectionView.java    |   64 -
 .../google/cloud/dataflow/sdk/values/PDone.java |   47 -
 .../cloud/dataflow/sdk/values/PInput.java       |   56 -
 .../cloud/dataflow/sdk/values/POutput.java      |   76 -
 .../dataflow/sdk/values/POutputValueBase.java   |  102 -
 .../cloud/dataflow/sdk/values/PValue.java       |   38 -
 .../cloud/dataflow/sdk/values/PValueBase.java   |  155 -
 .../dataflow/sdk/values/TimestampedValue.java   |  155 -
 .../cloud/dataflow/sdk/values/TupleTag.java     |  196 --
 .../cloud/dataflow/sdk/values/TupleTagList.java |  148 -
 .../dataflow/sdk/values/TypeDescriptor.java     |  351 --
 .../cloud/dataflow/sdk/values/TypedPValue.java  |  197 --
 .../cloud/dataflow/sdk/values/package-info.java |   52 -
 sdk/src/main/proto/README.md                    |   27 -
 .../main/proto/proto2_coder_test_messages.proto |   51 -
 .../google/cloud/dataflow/sdk/sdk.properties    |    5 -
 .../cloud/dataflow/sdk/DataflowMatchers.java    |   65 -
 .../google/cloud/dataflow/sdk/PipelineTest.java |  296 --
 .../google/cloud/dataflow/sdk/TestUtils.java    |  213 --
 .../cloud/dataflow/sdk/WindowMatchers.java      |  137 -
 .../dataflow/sdk/coders/AvroCoderTest.java      |  754 -----
 .../sdk/coders/BigEndianIntegerCoderTest.java   |   90 -
 .../sdk/coders/BigEndianLongCoderTest.java      |   94 -
 .../dataflow/sdk/coders/ByteArrayCoderTest.java |  144 -
 .../dataflow/sdk/coders/ByteCoderTest.java      |   91 -
 .../sdk/coders/ByteStringCoderTest.java         |  121 -
 .../dataflow/sdk/coders/CoderFactoriesTest.java |  100 -
 .../dataflow/sdk/coders/CoderProvidersTest.java |   71 -
 .../dataflow/sdk/coders/CoderRegistryTest.java  |  521 ---
 .../cloud/dataflow/sdk/coders/CoderTest.java    |   78 -
 .../sdk/coders/CollectionCoderTest.java         |   93 -
 .../dataflow/sdk/coders/CustomCoderTest.java    |  135 -
 .../dataflow/sdk/coders/DefaultCoderTest.java   |  128 -
 .../dataflow/sdk/coders/DelegateCoderTest.java  |  141 -
 .../dataflow/sdk/coders/DoubleCoderTest.java    |   96 -
 .../dataflow/sdk/coders/DurationCoderTest.java  |   86 -
 .../dataflow/sdk/coders/EntityCoderTest.java    |  108 -
 .../dataflow/sdk/coders/InstantCoderTest.java   |  116 -
 .../dataflow/sdk/coders/IterableCoderTest.java  |  109 -
 .../dataflow/sdk/coders/JAXBCoderTest.java      |   99 -
 .../cloud/dataflow/sdk/coders/KvCoderTest.java  |  118 -
 .../dataflow/sdk/coders/ListCoderTest.java      |  134 -
 .../cloud/dataflow/sdk/coders/MapCoderTest.java |  106 -
 .../dataflow/sdk/coders/NullableCoderTest.java  |  132 -
 .../sdk/coders/PrintBase64Encodings.java        |   81 -
 .../dataflow/sdk/coders/Proto2CoderTest.java    |  145 -
 .../sdk/coders/SerializableCoderTest.java       |  222 --
 .../cloud/dataflow/sdk/coders/SetCoderTest.java |   86 -
 .../dataflow/sdk/coders/StandardCoderTest.java  |  176 -
 .../sdk/coders/StringDelegateCoderTest.java     |   72 -
 .../sdk/coders/StringUtf8CoderTest.java         |   80 -
 .../sdk/coders/StructuralByteArrayTest.java     |   39 -
 .../sdk/coders/TableRowJsonCoderTest.java       |   86 -
 .../sdk/coders/TextualIntegerCoderTest.java     |   90 -
 .../dataflow/sdk/coders/VarIntCoderTest.java    |   91 -
 .../dataflow/sdk/coders/VarLongCoderTest.java   |   94 -
 .../sdk/coders/protobuf/ProtoCoderTest.java     |  182 --
 .../sdk/coders/protobuf/ProtobufUtilTest.java   |  195 --
 .../sdk/io/AvroIOGeneratedClassTest.java        |  374 ---
 .../cloud/dataflow/sdk/io/AvroIOTest.java       |  226 --
 .../cloud/dataflow/sdk/io/AvroSourceTest.java   |  692 ----
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java   |  445 ---
 .../io/BoundedReadFromUnboundedSourceTest.java  |  132 -
 .../dataflow/sdk/io/CompressedSourceTest.java   |  430 ---
 .../dataflow/sdk/io/CountingInputTest.java      |  125 -
 .../dataflow/sdk/io/CountingSourceTest.java     |  216 --
 .../cloud/dataflow/sdk/io/DatastoreIOTest.java  |  631 ----
 .../dataflow/sdk/io/FileBasedSinkTest.java      |  512 ---
 .../dataflow/sdk/io/FileBasedSourceTest.java    |  914 ------
 .../dataflow/sdk/io/OffsetBasedSourceTest.java  |  278 --
 .../cloud/dataflow/sdk/io/PubsubIOTest.java     |  233 --
 .../google/cloud/dataflow/sdk/io/ReadTest.java  |  144 -
 .../cloud/dataflow/sdk/io/TextIOTest.java       |  562 ----
 .../google/cloud/dataflow/sdk/io/WriteTest.java |  341 --
 .../cloud/dataflow/sdk/io/XmlSinkTest.java      |  235 --
 .../cloud/dataflow/sdk/io/XmlSourceTest.java    |  822 -----
 .../sdk/io/bigtable/BigtableIOTest.java         |  688 ----
 .../range/ByteKeyRangeEstimateFractionTest.java |   69 -
 .../range/ByteKeyRangeInterpolateKeyTest.java   |   73 -
 .../dataflow/sdk/io/range/ByteKeyRangeTest.java |  396 ---
 .../sdk/io/range/ByteKeyRangeTrackerTest.java   |  118 -
 .../dataflow/sdk/io/range/ByteKeyTest.java      |  178 --
 .../sdk/io/range/OffsetRangeTrackerTest.java    |  186 --
 .../com/google/cloud/dataflow/sdk/io/user.avsc  |   10 -
 .../DataflowPipelineDebugOptionsTest.java       |   40 -
 .../options/DataflowPipelineOptionsTest.java    |   91 -
 .../options/DataflowProfilingOptionsTest.java   |   47 -
 .../DataflowWorkerLoggingOptionsTest.java       |   73 -
 .../dataflow/sdk/options/GcpOptionsTest.java    |  123 -
 .../sdk/options/GoogleApiDebugOptionsTest.java  |  147 -
 .../sdk/options/PipelineOptionsFactoryTest.java | 1154 -------
 .../sdk/options/PipelineOptionsTest.java        |  126 -
 .../options/PipelineOptionsValidatorTest.java   |  310 --
 .../sdk/options/ProxyInvocationHandlerTest.java |  691 ----
 .../AggregatorPipelineExtractorTest.java        |  228 --
 .../BlockingDataflowPipelineRunnerTest.java     |  301 --
 .../sdk/runners/DataflowPipelineJobTest.java    |  603 ----
 .../runners/DataflowPipelineRegistrarTest.java  |   72 -
 .../sdk/runners/DataflowPipelineRunnerTest.java | 1370 --------
 .../sdk/runners/DataflowPipelineTest.java       |   44 -
 .../runners/DataflowPipelineTranslatorTest.java |  889 ------
 .../runners/DirectPipelineRegistrarTest.java    |   69 -
 .../sdk/runners/DirectPipelineRunnerTest.java   |  210 --
 .../sdk/runners/DirectPipelineTest.java         |   34 -
 .../sdk/runners/PipelineRunnerTest.java         |   82 -
 .../dataflow/sdk/runners/TransformTreeTest.java |  194 --
 .../sdk/runners/dataflow/CustomSourcesTest.java |  273 --
 .../runners/dataflow/TestCountingSource.java    |  235 --
 .../BoundedReadEvaluatorFactoryTest.java        |  287 --
 .../ConsumerTrackingPipelineVisitorTest.java    |  233 --
 .../inprocess/FlattenEvaluatorFactoryTest.java  |  136 -
 .../inprocess/ForwardingPTransformTest.java     |  100 -
 .../GroupByKeyEvaluatorFactoryTest.java         |  178 --
 .../inprocess/InMemoryWatermarkManagerTest.java | 1111 -------
 .../runners/inprocess/InProcessBundleTest.java  |  143 -
 .../runners/inprocess/InProcessCreateTest.java  |  199 --
 .../InProcessEvaluationContextTest.java         |  544 ----
 .../inprocess/InProcessPipelineRunnerTest.java  |   77 -
 .../InProcessSideInputContainerTest.java        |  370 ---
 .../inprocess/InProcessTimerInternalsTest.java  |  131 -
 .../KeyedPValueTrackingVisitorTest.java         |  189 --
 .../sdk/runners/inprocess/MockClock.java        |   60 -
 .../ParDoMultiEvaluatorFactoryTest.java         |  412 ---
 .../ParDoSingleEvaluatorFactoryTest.java        |  310 --
 .../TransformExecutorServicesTest.java          |  134 -
 .../inprocess/TransformExecutorTest.java        |  312 --
 .../UnboundedReadEvaluatorFactoryTest.java      |  327 --
 .../inprocess/ViewEvaluatorFactoryTest.java     |   96 -
 .../WatermarkCallbackExecutorTest.java          |  126 -
 .../sdk/testing/CoderPropertiesTest.java        |  214 --
 .../sdk/testing/DataflowAssertTest.java         |  326 --
 .../sdk/testing/DataflowJUnitTestRunner.java    |  129 -
 .../dataflow/sdk/testing/ExpectedLogs.java      |  306 --
 .../dataflow/sdk/testing/ExpectedLogsTest.java  |  153 -
 .../sdk/testing/FastNanoClockAndSleeper.java    |   47 -
 .../testing/FastNanoClockAndSleeperTest.java    |   47 -
 .../sdk/testing/PCollectionViewTesting.java     |  295 --
 .../sdk/testing/ResetDateTimeProvider.java      |   41 -
 .../sdk/testing/ResetDateTimeProviderTest.java  |   55 -
 .../sdk/testing/RestoreSystemProperties.java    |   51 -
 .../testing/RestoreSystemPropertiesTest.java    |   50 -
 .../sdk/testing/SerializableMatchersTest.java   |  165 -
 .../sdk/testing/SystemNanoTimeSleeper.java      |   68 -
 .../sdk/testing/SystemNanoTimeSleeperTest.java  |   53 -
 .../testing/TestDataflowPipelineRunnerTest.java |  317 --
 .../dataflow/sdk/testing/TestPipelineTest.java  |   93 -
 .../transforms/ApproximateQuantilesTest.java    |  299 --
 .../sdk/transforms/ApproximateUniqueTest.java   |  291 --
 .../dataflow/sdk/transforms/CombineFnsTest.java |  413 ---
 .../dataflow/sdk/transforms/CombineTest.java    | 1137 -------
 .../dataflow/sdk/transforms/CountTest.java      |  121 -
 .../dataflow/sdk/transforms/CreateTest.java     |  240 --
 .../sdk/transforms/DoFnContextTest.java         |   68 -
 .../DoFnDelegatingAggregatorTest.java           |  143 -
 .../sdk/transforms/DoFnReflectorTest.java       |  493 ---
 .../cloud/dataflow/sdk/transforms/DoFnTest.java |  206 --
 .../dataflow/sdk/transforms/DoFnTesterTest.java |  253 --
 .../sdk/transforms/DoFnWithContextTest.java     |  225 --
 .../dataflow/sdk/transforms/FilterTest.java     |  160 -
 .../sdk/transforms/FlatMapElementsTest.java     |  124 -
 .../dataflow/sdk/transforms/FlattenTest.java    |  369 ---
 .../dataflow/sdk/transforms/GroupByKeyTest.java |  438 ---
 .../IntraBundleParallelizationTest.java         |  250 --
 .../cloud/dataflow/sdk/transforms/KeysTest.java |   83 -
 .../dataflow/sdk/transforms/KvSwapTest.java     |   91 -
 .../sdk/transforms/MapElementsTest.java         |  134 -
 .../cloud/dataflow/sdk/transforms/MaxTest.java  |   66 -
 .../cloud/dataflow/sdk/transforms/MeanTest.java |   72 -
 .../cloud/dataflow/sdk/transforms/MinTest.java  |   66 -
 .../cloud/dataflow/sdk/transforms/NoOpDoFn.java |  143 -
 .../dataflow/sdk/transforms/PTransformTest.java |   41 -
 .../dataflow/sdk/transforms/ParDoTest.java      | 1541 ---------
 .../dataflow/sdk/transforms/PartitionTest.java  |  140 -
 .../sdk/transforms/RemoveDuplicatesTest.java    |  131 -
 .../dataflow/sdk/transforms/SampleTest.java     |  260 --
 .../sdk/transforms/SimpleStatsFnsTest.java      |  129 -
 .../cloud/dataflow/sdk/transforms/SumTest.java  |   66 -
 .../cloud/dataflow/sdk/transforms/TopTest.java  |  259 --
 .../dataflow/sdk/transforms/ValuesTest.java     |   93 -
 .../cloud/dataflow/sdk/transforms/ViewTest.java | 1548 ---------
 .../dataflow/sdk/transforms/WithKeysTest.java   |  127 -
 .../sdk/transforms/WithTimestampsTest.java      |  210 --
 .../transforms/display/DisplayDataMatchers.java |   98 -
 .../display/DisplayDataMatchersTest.java        |   81 -
 .../sdk/transforms/display/DisplayDataTest.java |  633 ----
 .../transforms/join/CoGbkResultCoderTest.java   |   85 -
 .../sdk/transforms/join/CoGbkResultTest.java    |  124 -
 .../sdk/transforms/join/CoGroupByKeyTest.java   |  507 ---
 .../sdk/transforms/join/UnionCoderTest.java     |   48 -
 .../sdk/transforms/windowing/AfterAllTest.java  |  151 -
 .../sdk/transforms/windowing/AfterEachTest.java |  122 -
 .../transforms/windowing/AfterFirstTest.java    |  175 -
 .../sdk/transforms/windowing/AfterPaneTest.java |  126 -
 .../windowing/AfterProcessingTimeTest.java      |  157 -
 .../AfterSynchronizedProcessingTimeTest.java    |  121 -
 .../windowing/AfterWatermarkTest.java           |  338 --
 .../windowing/CalendarWindowsTest.java          |  260 --
 .../windowing/DefaultTriggerTest.java           |  176 -
 .../transforms/windowing/FixedWindowsTest.java  |  124 -
 .../windowing/IntervalWindowTest.java           |   94 -
 .../windowing/OrFinallyTriggerTest.java         |  209 --
 .../sdk/transforms/windowing/PaneInfoTest.java  |   75 -
 .../transforms/windowing/RepeatedlyTest.java    |  128 -
 .../sdk/transforms/windowing/SessionsTest.java  |  156 -
 .../windowing/SlidingWindowsTest.java           |  193 --
 .../sdk/transforms/windowing/TriggerTest.java   |  117 -
 .../sdk/transforms/windowing/WindowTest.java    |  226 --
 .../sdk/transforms/windowing/WindowingTest.java |  244 --
 .../cloud/dataflow/sdk/util/ApiSurfaceTest.java |  187 --
 ...mptAndTimeBoundedExponentialBackOffTest.java |  212 --
 .../AttemptBoundedExponentialBackOffTest.java   |   85 -
 .../cloud/dataflow/sdk/util/AvroUtilsTest.java  |  225 --
 .../sdk/util/BatchTimerInternalsTest.java       |  116 -
 .../sdk/util/BigQueryTableInserterTest.java     |  239 --
 .../sdk/util/BigQueryTableRowIteratorTest.java  |  255 --
 .../dataflow/sdk/util/BigQueryUtilTest.java     |  479 ---
 ...BufferedElementCountingOutputStreamTest.java |  205 --
 .../cloud/dataflow/sdk/util/CoderUtilsTest.java |  229 --
 .../dataflow/sdk/util/CombineFnUtilTest.java    |   62 -
 .../sdk/util/CounterAggregatorTest.java         |  253 --
 .../sdk/util/DataflowPathValidatorTest.java     |   92 -
 .../sdk/util/ExecutableTriggerTest.java         |  130 -
 .../util/ExposedByteArrayInputStreamTest.java   |   78 -
 .../util/ExposedByteArrayOutputStreamTest.java  |  245 --
 .../sdk/util/FileIOChannelFactoryTest.java      |  226 --
 .../sdk/util/FinishedTriggersBitSetTest.java    |   54 -
 .../sdk/util/FinishedTriggersProperties.java    |  109 -
 .../sdk/util/FinishedTriggersSetTest.java       |   60 -
 .../sdk/util/GcsIOChannelFactoryTest.java       |   43 -
 .../cloud/dataflow/sdk/util/GcsUtilTest.java    |  490 ---
 .../sdk/util/GroupAlsoByWindowsProperties.java  |  718 -----
 ...oupAlsoByWindowsViaOutputBufferDoFnTest.java |  111 -
 .../dataflow/sdk/util/IOChannelUtilsTest.java   |   94 -
 .../dataflow/sdk/util/InstanceBuilderTest.java  |  115 -
 .../IntervalBoundedExponentialBackOffTest.java  |   99 -
 .../sdk/util/KeyedWorkItemCoderTest.java        |   61 -
 .../util/LateDataDroppingDoFnRunnerTest.java    |  115 -
 .../sdk/util/MergingActiveWindowSetTest.java    |  175 -
 .../dataflow/sdk/util/MonitoringUtilTest.java   |  146 -
 .../sdk/util/MutationDetectorsTest.java         |  148 -
 .../cloud/dataflow/sdk/util/PTupleTest.java     |   40 -
 .../dataflow/sdk/util/PackageUtilTest.java      |  482 ---
 .../dataflow/sdk/util/RandomAccessDataTest.java |  205 --
 .../dataflow/sdk/util/ReduceFnRunnerTest.java   | 1049 ------
 .../cloud/dataflow/sdk/util/ReduceFnTester.java |  776 -----
 .../cloud/dataflow/sdk/util/ReshuffleTest.java  |  208 --
 .../dataflow/sdk/util/ReshuffleTriggerTest.java |   58 -
 .../util/RetryHttpRequestInitializerTest.java   |  296 --
 .../sdk/util/SerializableUtilsTest.java         |  165 -
 .../cloud/dataflow/sdk/util/SerializerTest.java |  162 -
 .../dataflow/sdk/util/SimpleDoFnRunnerTest.java |   86 -
 .../dataflow/sdk/util/StreamUtilsTest.java      |   71 -
 .../dataflow/sdk/util/StringUtilsTest.java      |  145 -
 .../cloud/dataflow/sdk/util/StructsTest.java    |  206 --
 .../cloud/dataflow/sdk/util/TimeUtilTest.java   |   73 -
 .../dataflow/sdk/util/TimerInternalsTest.java   |   52 -
 .../cloud/dataflow/sdk/util/TriggerTester.java  |  585 ----
 .../sdk/util/UnownedInputStreamTest.java        |   76 -
 .../sdk/util/UnownedOutputStreamTest.java       |   57 -
 .../util/UploadIdResponseInterceptorTest.java   |   99 -
 .../sdk/util/UserCodeExceptionTest.java         |  176 -
 .../cloud/dataflow/sdk/util/VarIntTest.java     |  277 --
 .../dataflow/sdk/util/WindowedValueTest.java    |   57 -
 .../cloud/dataflow/sdk/util/ZipFilesTest.java   |  311 --
 .../sdk/util/common/CounterSetTest.java         |  225 --
 .../dataflow/sdk/util/common/CounterTest.java   |  589 ----
 .../sdk/util/common/CounterTestUtils.java       |   56 -
 .../sdk/util/common/ReflectHelpersTest.java     |  126 -
 .../dataflow/sdk/util/gcsfs/GcsPathTest.java    |  333 --
 .../CopyOnAccessInMemoryStateInternalsTest.java |  553 ----
 .../util/state/InMemoryStateInternalsTest.java  |  348 --
 .../sdk/util/state/StateNamespacesTest.java     |  129 -
 .../dataflow/sdk/util/state/StateTagTest.java   |  173 -
 .../cloud/dataflow/sdk/values/KVTest.java       |  112 -
 .../sdk/values/PCollectionListTest.java         |   47 -
 .../sdk/values/PCollectionTupleTest.java        |   93 -
 .../cloud/dataflow/sdk/values/PDoneTest.java    |  102 -
 .../cloud/dataflow/sdk/values/TupleTagTest.java |   87 -
 .../dataflow/sdk/values/TypeDescriptorTest.java |  193 --
 .../dataflow/sdk/values/TypedPValueTest.java    |  164 -
 .../PipelineOptionsFactoryJava8Test.java        |   90 -
 sdks/java/core/pom.xml                          |  771 +++++
 .../com/google/cloud/dataflow/sdk/Pipeline.java |  502 +++
 .../cloud/dataflow/sdk/PipelineResult.java      |   95 +
 .../dataflow/sdk/annotations/Experimental.java  |   80 +
 .../dataflow/sdk/annotations/package-info.java  |   20 +
 .../cloud/dataflow/sdk/coders/AtomicCoder.java  |   51 +
 .../cloud/dataflow/sdk/coders/AvroCoder.java    |  714 +++++
 .../sdk/coders/BigEndianIntegerCoder.java       |   99 +
 .../dataflow/sdk/coders/BigEndianLongCoder.java |   99 +
 .../dataflow/sdk/coders/ByteArrayCoder.java     |  138 +
 .../cloud/dataflow/sdk/coders/ByteCoder.java    |  111 +
 .../dataflow/sdk/coders/ByteStringCoder.java    |  106 +
 .../sdk/coders/CannotProvideCoderException.java |   95 +
 .../google/cloud/dataflow/sdk/coders/Coder.java |  298 ++
 .../dataflow/sdk/coders/CoderException.java     |   36 +
 .../dataflow/sdk/coders/CoderFactories.java     |  274 ++
 .../cloud/dataflow/sdk/coders/CoderFactory.java |   43 +
 .../dataflow/sdk/coders/CoderProvider.java      |   33 +
 .../dataflow/sdk/coders/CoderProviders.java     |  164 +
 .../dataflow/sdk/coders/CoderRegistry.java      |  843 +++++
 .../dataflow/sdk/coders/CollectionCoder.java    |   73 +
 .../cloud/dataflow/sdk/coders/CustomCoder.java  |  137 +
 .../cloud/dataflow/sdk/coders/DefaultCoder.java |   66 +
 .../dataflow/sdk/coders/DelegateCoder.java      |  164 +
 .../sdk/coders/DeterministicStandardCoder.java  |   38 +
 .../cloud/dataflow/sdk/coders/DoubleCoder.java  |  113 +
 .../dataflow/sdk/coders/DurationCoder.java      |   97 +
 .../cloud/dataflow/sdk/coders/EntityCoder.java  |   86 +
 .../cloud/dataflow/sdk/coders/InstantCoder.java |  113 +
 .../dataflow/sdk/coders/IterableCoder.java      |   78 +
 .../dataflow/sdk/coders/IterableLikeCoder.java  |  278 ++
 .../cloud/dataflow/sdk/coders/JAXBCoder.java    |  135 +
 .../cloud/dataflow/sdk/coders/KvCoder.java      |  162 +
 .../cloud/dataflow/sdk/coders/KvCoderBase.java  |   61 +
 .../cloud/dataflow/sdk/coders/ListCoder.java    |   77 +
 .../cloud/dataflow/sdk/coders/MapCoder.java     |  160 +
 .../cloud/dataflow/sdk/coders/MapCoderBase.java |   54 +
 .../dataflow/sdk/coders/NullableCoder.java      |  175 +
 .../cloud/dataflow/sdk/coders/Proto2Coder.java  |  361 +++
 .../dataflow/sdk/coders/SerializableCoder.java  |  183 ++
 .../cloud/dataflow/sdk/coders/SetCoder.java     |   94 +
 .../dataflow/sdk/coders/StandardCoder.java      |  229 ++
 .../sdk/coders/StringDelegateCoder.java         |   86 +
 .../dataflow/sdk/coders/StringUtf8Coder.java    |  139 +
 .../sdk/coders/StructuralByteArray.java         |   56 +
 .../dataflow/sdk/coders/TableRowJsonCoder.java  |   82 +
 .../sdk/coders/TextualIntegerCoder.java         |   69 +
 .../cloud/dataflow/sdk/coders/VarIntCoder.java  |   97 +
 .../cloud/dataflow/sdk/coders/VarLongCoder.java |   96 +
 .../cloud/dataflow/sdk/coders/VoidCoder.java    |   76 +
 .../cloud/dataflow/sdk/coders/package-info.java |   44 +
 .../sdk/coders/protobuf/ProtoCoder.java         |  404 +++
 .../sdk/coders/protobuf/ProtobufUtil.java       |  171 +
 .../sdk/coders/protobuf/package-info.java       |   23 +
 .../google/cloud/dataflow/sdk/io/AvroIO.java    |  810 +++++
 .../cloud/dataflow/sdk/io/AvroSource.java       |  647 ++++
 .../cloud/dataflow/sdk/io/BigQueryIO.java       | 1499 +++++++++
 .../cloud/dataflow/sdk/io/BlockBasedSource.java |  237 ++
 .../sdk/io/BoundedReadFromUnboundedSource.java  |  271 ++
 .../cloud/dataflow/sdk/io/BoundedSource.java    |  277 ++
 .../cloud/dataflow/sdk/io/CompressedSource.java |  413 +++
 .../cloud/dataflow/sdk/io/CountingInput.java    |  191 ++
 .../cloud/dataflow/sdk/io/CountingSource.java   |  397 +++
 .../cloud/dataflow/sdk/io/DatastoreIO.java      |  957 ++++++
 .../cloud/dataflow/sdk/io/FileBasedSink.java    |  864 +++++
 .../cloud/dataflow/sdk/io/FileBasedSource.java  |  648 ++++
 .../dataflow/sdk/io/OffsetBasedSource.java      |  326 ++
 .../google/cloud/dataflow/sdk/io/PubsubIO.java  | 1044 ++++++
 .../com/google/cloud/dataflow/sdk/io/Read.java  |  253 ++
 .../dataflow/sdk/io/ShardNameTemplate.java      |   75 +
 .../com/google/cloud/dataflow/sdk/io/Sink.java  |  252 ++
 .../google/cloud/dataflow/sdk/io/Source.java    |  193 ++
 .../google/cloud/dataflow/sdk/io/TextIO.java    |  992 ++++++
 .../cloud/dataflow/sdk/io/UnboundedSource.java  |  253 ++
 .../com/google/cloud/dataflow/sdk/io/Write.java |  213 ++
 .../google/cloud/dataflow/sdk/io/XmlSink.java   |  310 ++
 .../google/cloud/dataflow/sdk/io/XmlSource.java |  541 ++++
 .../dataflow/sdk/io/bigtable/BigtableIO.java    |  987 ++++++
 .../sdk/io/bigtable/BigtableService.java        |  108 +
 .../sdk/io/bigtable/BigtableServiceImpl.java    |  241 ++
 .../dataflow/sdk/io/bigtable/package-info.java  |   22 +
 .../cloud/dataflow/sdk/io/package-info.java     |   37 +
 .../cloud/dataflow/sdk/io/range/ByteKey.java    |  173 +
 .../dataflow/sdk/io/range/ByteKeyRange.java     |  376 +++
 .../sdk/io/range/ByteKeyRangeTracker.java       |  117 +
 .../sdk/io/range/OffsetRangeTracker.java        |  182 ++
 .../dataflow/sdk/io/range/RangeTracker.java     |  220 ++
 .../dataflow/sdk/io/range/package-info.java     |   23 +
 .../sdk/options/ApplicationNameOptions.java     |   33 +
 .../dataflow/sdk/options/BigQueryOptions.java   |   31 +
 .../BlockingDataflowPipelineOptions.java        |   49 +
 .../sdk/options/CloudDebuggerOptions.java       |   43 +
 .../options/DataflowPipelineDebugOptions.java   |  259 ++
 .../sdk/options/DataflowPipelineOptions.java    |  134 +
 .../DataflowPipelineWorkerPoolOptions.java      |  254 ++
 .../sdk/options/DataflowProfilingOptions.java   |   46 +
 .../options/DataflowWorkerHarnessOptions.java   |   50 +
 .../options/DataflowWorkerLoggingOptions.java   |  153 +
 .../cloud/dataflow/sdk/options/Default.java     |  153 +
 .../sdk/options/DefaultValueFactory.java        |   38 +
 .../cloud/dataflow/sdk/options/Description.java |   35 +
 .../sdk/options/DirectPipelineOptions.java      |   75 +
 .../cloud/dataflow/sdk/options/GcpOptions.java  |  291 ++
 .../cloud/dataflow/sdk/options/GcsOptions.java  |  113 +
 .../sdk/options/GoogleApiDebugOptions.java      |   87 +
 .../cloud/dataflow/sdk/options/Hidden.java      |   33 +
 .../dataflow/sdk/options/PipelineOptions.java   |  249 ++
 .../sdk/options/PipelineOptionsFactory.java     | 1537 +++++++++
 .../sdk/options/PipelineOptionsRegistrar.java   |   36 +
 .../sdk/options/PipelineOptionsValidator.java   |  102 +
 .../sdk/options/ProxyInvocationHandler.java     |  441 +++
 .../dataflow/sdk/options/StreamingOptions.java  |   30 +
 .../cloud/dataflow/sdk/options/Validation.java  |   46 +
 .../dataflow/sdk/options/package-info.java      |   25 +
 .../google/cloud/dataflow/sdk/package-info.java |   33 +
 .../runners/AggregatorPipelineExtractor.java    |   97 +
 .../runners/AggregatorRetrievalException.java   |   32 +
 .../dataflow/sdk/runners/AggregatorValues.java  |   52 +
 .../runners/BlockingDataflowPipelineRunner.java |  181 ++
 .../DataflowJobAlreadyExistsException.java      |   34 +
 .../DataflowJobAlreadyUpdatedException.java     |   33 +
 .../runners/DataflowJobCancelledException.java  |   38 +
 .../sdk/runners/DataflowJobException.java       |   40 +
 .../runners/DataflowJobExecutionException.java  |   34 +
 .../runners/DataflowJobUpdatedException.java    |   51 +
 .../dataflow/sdk/runners/DataflowPipeline.java  |   59 +
 .../sdk/runners/DataflowPipelineJob.java        |  389 +++
 .../sdk/runners/DataflowPipelineRegistrar.java  |   58 +
 .../sdk/runners/DataflowPipelineRunner.java     | 3003 ++++++++++++++++++
 .../runners/DataflowPipelineRunnerHooks.java    |   37 +
 .../sdk/runners/DataflowPipelineTranslator.java | 1104 +++++++
 .../sdk/runners/DataflowServiceException.java   |   32 +
 .../dataflow/sdk/runners/DirectPipeline.java    |   55 +
 .../sdk/runners/DirectPipelineRegistrar.java    |   53 +
 .../sdk/runners/DirectPipelineRunner.java       | 1156 +++++++
 .../dataflow/sdk/runners/PipelineRunner.java    |   76 +
 .../sdk/runners/PipelineRunnerRegistrar.java    |   40 +
 .../sdk/runners/RecordingPipelineVisitor.java   |   54 +
 .../sdk/runners/TransformHierarchy.java         |  104 +
 .../dataflow/sdk/runners/TransformTreeNode.java |  252 ++
 .../sdk/runners/dataflow/AssignWindows.java     |   88 +
 .../runners/dataflow/BigQueryIOTranslator.java  |  125 +
 .../sdk/runners/dataflow/CustomSources.java     |  118 +
 .../dataflow/DataflowAggregatorTransforms.java  |   79 +
 .../dataflow/DataflowMetricUpdateExtractor.java |  110 +
 .../runners/dataflow/PubsubIOTranslator.java    |  107 +
 .../sdk/runners/dataflow/ReadTranslator.java    |  103 +
 .../sdk/runners/dataflow/package-info.java      |   20 +
 .../inprocess/BoundedReadEvaluatorFactory.java  |  152 +
 .../CachedThreadPoolExecutorServiceFactory.java |   42 +
 .../dataflow/sdk/runners/inprocess/Clock.java   |   29 +
 .../runners/inprocess/CompletionCallback.java   |   33 +
 .../ConsumerTrackingPipelineVisitor.java        |  173 +
 .../inprocess/EmptyTransformEvaluator.java      |   49 +
 .../sdk/runners/inprocess/EvaluatorKey.java     |   55 +
 .../inprocess/ExecutorServiceFactory.java       |   32 +
 .../ExecutorServiceParallelExecutor.java        |  432 +++
 .../inprocess/FlattenEvaluatorFactory.java      |   83 +
 .../runners/inprocess/ForwardingPTransform.java |   54 +
 .../inprocess/GroupByKeyEvaluatorFactory.java   |  252 ++
 .../inprocess/InMemoryWatermarkManager.java     | 1310 ++++++++
 .../sdk/runners/inprocess/InProcessBundle.java  |  121 +
 .../inprocess/InProcessBundleOutputManager.java |   50 +
 .../sdk/runners/inprocess/InProcessCreate.java  |  209 ++
 .../inprocess/InProcessEvaluationContext.java   |  405 +++
 .../inprocess/InProcessExecutionContext.java    |  106 +
 .../runners/inprocess/InProcessExecutor.java    |   46 +
 .../inprocess/InProcessPipelineOptions.java     |   90 +
 .../inprocess/InProcessPipelineRunner.java      |  343 ++
 .../inprocess/InProcessSideInputContainer.java  |  230 ++
 .../inprocess/InProcessTimerInternals.java      |   84 +
 .../inprocess/InProcessTransformResult.java     |   75 +
 .../inprocess/KeyedPValueTrackingVisitor.java   |   95 +
 .../sdk/runners/inprocess/NanosOffsetClock.java |   58 +
 .../inprocess/ParDoInProcessEvaluator.java      |  109 +
 .../inprocess/ParDoMultiEvaluatorFactory.java   |   90 +
 .../inprocess/ParDoSingleEvaluatorFactory.java  |   87 +
 .../sdk/runners/inprocess/StepAndKey.java       |   68 +
 .../runners/inprocess/StepTransformResult.java  |  157 +
 .../runners/inprocess/TransformEvaluator.java   |   45 +
 .../inprocess/TransformEvaluatorFactory.java    |   42 +
 .../inprocess/TransformEvaluatorRegistry.java   |   72 +
 .../runners/inprocess/TransformExecutor.java    |  114 +
 .../inprocess/TransformExecutorService.java     |   34 +
 .../inprocess/TransformExecutorServices.java    |  153 +
 .../UnboundedReadEvaluatorFactory.java          |  168 +
 .../runners/inprocess/ViewEvaluatorFactory.java |  121 +
 .../inprocess/WatermarkCallbackExecutor.java    |  143 +
 .../dataflow/sdk/runners/package-info.java      |   33 +
 .../dataflow/sdk/runners/worker/IsmFormat.java  |  946 ++++++
 .../sdk/runners/worker/package-info.java        |   24 +
 .../dataflow/sdk/testing/CoderProperties.java   |  349 ++
 .../dataflow/sdk/testing/DataflowAssert.java    |  825 +++++
 .../dataflow/sdk/testing/RunnableOnService.java |   30 +
 .../sdk/testing/SerializableMatcher.java        |   36 +
 .../sdk/testing/SerializableMatchers.java       | 1180 +++++++
 .../dataflow/sdk/testing/SourceTestUtils.java   |  642 ++++
 .../testing/TestDataflowPipelineOptions.java    |   26 +
 .../sdk/testing/TestDataflowPipelineRunner.java |  220 ++
 .../dataflow/sdk/testing/TestPipeline.java      |  193 ++
 .../dataflow/sdk/testing/WindowFnTestUtils.java |  325 ++
 .../dataflow/sdk/testing/package-info.java      |   21 +
 .../dataflow/sdk/transforms/Aggregator.java     |   78 +
 .../sdk/transforms/AggregatorRetriever.java     |   36 +
 .../sdk/transforms/AppliedPTransform.java       |  100 +
 .../sdk/transforms/ApproximateQuantiles.java    |  766 +++++
 .../sdk/transforms/ApproximateUnique.java       |  419 +++
 .../cloud/dataflow/sdk/transforms/Combine.java  | 2240 +++++++++++++
 .../dataflow/sdk/transforms/CombineFnBase.java  |  283 ++
 .../dataflow/sdk/transforms/CombineFns.java     | 1100 +++++++
 .../sdk/transforms/CombineWithContext.java      |  277 ++
 .../cloud/dataflow/sdk/transforms/Count.java    |  135 +
 .../cloud/dataflow/sdk/transforms/Create.java   |  426 +++
 .../cloud/dataflow/sdk/transforms/DoFn.java     |  563 ++++
 .../dataflow/sdk/transforms/DoFnReflector.java  |  668 ++++
 .../dataflow/sdk/transforms/DoFnTester.java     |  495 +++
 .../sdk/transforms/DoFnWithContext.java         |  416 +++
 .../cloud/dataflow/sdk/transforms/Filter.java   |  234 ++
 .../sdk/transforms/FlatMapElements.java         |  145 +
 .../cloud/dataflow/sdk/transforms/Flatten.java  |  219 ++
 .../dataflow/sdk/transforms/GroupByKey.java     |  575 ++++
 .../transforms/IntraBundleParallelization.java  |  346 ++
 .../cloud/dataflow/sdk/transforms/Keys.java     |   68 +
 .../cloud/dataflow/sdk/transforms/KvSwap.java   |   73 +
 .../dataflow/sdk/transforms/MapElements.java    |  112 +
 .../cloud/dataflow/sdk/transforms/Max.java      |  255 ++
 .../cloud/dataflow/sdk/transforms/Mean.java     |  202 ++
 .../cloud/dataflow/sdk/transforms/Min.java      |  255 ++
 .../dataflow/sdk/transforms/PTransform.java     |  324 ++
 .../cloud/dataflow/sdk/transforms/ParDo.java    | 1321 ++++++++
 .../dataflow/sdk/transforms/Partition.java      |  173 +
 .../sdk/transforms/RemoveDuplicates.java        |  158 +
 .../cloud/dataflow/sdk/transforms/Sample.java   |  246 ++
 .../sdk/transforms/SerializableComparator.java  |   28 +
 .../sdk/transforms/SerializableFunction.java    |   31 +
 .../dataflow/sdk/transforms/SimpleFunction.java |   54 +
 .../cloud/dataflow/sdk/transforms/Sum.java      |  188 ++
 .../cloud/dataflow/sdk/transforms/Top.java      |  559 ++++
 .../cloud/dataflow/sdk/transforms/Values.java   |   68 +
 .../cloud/dataflow/sdk/transforms/View.java     |  470 +++
 .../cloud/dataflow/sdk/transforms/WithKeys.java |  140 +
 .../dataflow/sdk/transforms/WithTimestamps.java |  129 +
 .../cloud/dataflow/sdk/transforms/Write.java    |   27 +
 .../sdk/transforms/display/DisplayData.java     |  530 ++++
 .../sdk/transforms/display/HasDisplayData.java  |   53 +
 .../sdk/transforms/join/CoGbkResult.java        |  463 +++
 .../sdk/transforms/join/CoGbkResultSchema.java  |  134 +
 .../sdk/transforms/join/CoGroupByKey.java       |  211 ++
 .../transforms/join/KeyedPCollectionTuple.java  |  247 ++
 .../sdk/transforms/join/RawUnionValue.java      |   51 +
 .../sdk/transforms/join/UnionCoder.java         |  147 +
 .../sdk/transforms/join/package-info.java       |   21 +
 .../dataflow/sdk/transforms/package-info.java   |   43 +
 .../sdk/transforms/windowing/AfterAll.java      |  117 +
 .../windowing/AfterDelayFromFirstElement.java   |  322 ++
 .../sdk/transforms/windowing/AfterEach.java     |  135 +
 .../sdk/transforms/windowing/AfterFirst.java    |  119 +
 .../sdk/transforms/windowing/AfterPane.java     |  145 +
 .../windowing/AfterProcessingTime.java          |   97 +
 .../AfterSynchronizedProcessingTime.java        |   75 +
 .../transforms/windowing/AfterWatermark.java    |  397 +++
 .../sdk/transforms/windowing/BoundedWindow.java |   46 +
 .../transforms/windowing/CalendarWindows.java   |  348 ++
 .../transforms/windowing/DefaultTrigger.java    |   95 +
 .../sdk/transforms/windowing/FixedWindows.java  |  116 +
 .../sdk/transforms/windowing/GlobalWindow.java  |   68 +
 .../sdk/transforms/windowing/GlobalWindows.java |   63 +
 .../transforms/windowing/IntervalWindow.java    |  201 ++
 .../transforms/windowing/InvalidWindows.java    |   87 +
 .../MergeOverlappingIntervalWindows.java        |   86 +
 .../windowing/NonMergingWindowFn.java           |   35 +
 .../transforms/windowing/OrFinallyTrigger.java  |  100 +
 .../sdk/transforms/windowing/OutputTimeFn.java  |  319 ++
 .../sdk/transforms/windowing/OutputTimeFns.java |  168 +
 .../sdk/transforms/windowing/PaneInfo.java      |  384 +++
 .../windowing/PartitioningWindowFn.java         |   61 +
 .../sdk/transforms/windowing/Repeatedly.java    |  100 +
 .../sdk/transforms/windowing/Sessions.java      |  112 +
 .../transforms/windowing/SlidingWindows.java    |  214 ++
 .../sdk/transforms/windowing/Trigger.java       |  544 ++++
 .../transforms/windowing/TriggerBuilder.java    |   29 +
 .../sdk/transforms/windowing/Window.java        |  662 ++++
 .../sdk/transforms/windowing/WindowFn.java      |  221 ++
 .../sdk/transforms/windowing/package-info.java  |   49 +
 .../dataflow/sdk/util/ActiveWindowSet.java      |  171 +
 .../cloud/dataflow/sdk/util/ApiSurface.java     |  642 ++++
 .../dataflow/sdk/util/AppEngineEnvironment.java |   61 +
 .../dataflow/sdk/util/AppliedCombineFn.java     |  130 +
 .../dataflow/sdk/util/AssignWindowsDoFn.java    |   67 +
 ...AttemptAndTimeBoundedExponentialBackOff.java |  168 +
 .../util/AttemptBoundedExponentialBackOff.java  |   83 +
 .../cloud/dataflow/sdk/util/AvroUtils.java      |  345 ++
 .../dataflow/sdk/util/BaseExecutionContext.java |  155 +
 .../dataflow/sdk/util/BatchTimerInternals.java  |  138 +
 .../sdk/util/BigQueryTableInserter.java         |  434 +++
 .../sdk/util/BigQueryTableRowIterator.java      |  469 +++
 .../cloud/dataflow/sdk/util/BitSetCoder.java    |   59 +
 .../BufferedElementCountingOutputStream.java    |  184 ++
 .../cloud/dataflow/sdk/util/CloudKnownType.java |  138 +
 .../cloud/dataflow/sdk/util/CloudObject.java    |  184 ++
 .../cloud/dataflow/sdk/util/CoderUtils.java     |  327 ++
 .../sdk/util/CombineContextFactory.java         |  107 +
 .../cloud/dataflow/sdk/util/CombineFnUtil.java  |  154 +
 .../dataflow/sdk/util/CounterAggregator.java    |   96 +
 .../dataflow/sdk/util/CredentialFactory.java    |   29 +
 .../cloud/dataflow/sdk/util/Credentials.java    |  192 ++
 .../sdk/util/DataflowPathValidator.java         |   97 +
 .../dataflow/sdk/util/DataflowReleaseInfo.java  |   87 +
 .../sdk/util/DirectModeExecutionContext.java    |  130 +
 .../sdk/util/DirectSideInputReader.java         |   73 +
 .../cloud/dataflow/sdk/util/DoFnInfo.java       |   67 +
 .../cloud/dataflow/sdk/util/DoFnRunner.java     |   60 +
 .../cloud/dataflow/sdk/util/DoFnRunnerBase.java |  558 ++++
 .../cloud/dataflow/sdk/util/DoFnRunners.java    |  142 +
 .../dataflow/sdk/util/ExecutableTrigger.java    |  159 +
 .../dataflow/sdk/util/ExecutionContext.java     |  102 +
 .../sdk/util/ExposedByteArrayInputStream.java   |   51 +
 .../sdk/util/ExposedByteArrayOutputStream.java  |  115 +
 .../dataflow/sdk/util/FileIOChannelFactory.java |  135 +
 .../dataflow/sdk/util/FinishedTriggers.java     |   42 +
 .../sdk/util/FinishedTriggersBitSet.java        |   68 +
 .../dataflow/sdk/util/FinishedTriggersSet.java  |   74 +
 .../dataflow/sdk/util/GcpCredentialFactory.java |   45 +
 .../dataflow/sdk/util/GcsIOChannelFactory.java  |   86 +
 .../cloud/dataflow/sdk/util/GcsStager.java      |   53 +
 .../google/cloud/dataflow/sdk/util/GcsUtil.java |  406 +++
 .../util/GroupAlsoByWindowViaWindowSetDoFn.java |  104 +
 .../sdk/util/GroupAlsoByWindowsDoFn.java        |   58 +
 .../GroupAlsoByWindowsViaOutputBufferDoFn.java  |   98 +
 .../dataflow/sdk/util/IOChannelFactory.java     |  101 +
 .../cloud/dataflow/sdk/util/IOChannelUtils.java |  204 ++
 .../sdk/util/IllegalMutationException.java      |   52 +
 .../dataflow/sdk/util/InstanceBuilder.java      |  269 ++
 .../util/IntervalBoundedExponentialBackOff.java |   87 +
 .../cloud/dataflow/sdk/util/KeyedWorkItem.java  |   41 +
 .../dataflow/sdk/util/KeyedWorkItemCoder.java   |  120 +
 .../cloud/dataflow/sdk/util/KeyedWorkItems.java |  120 +
 .../sdk/util/LateDataDroppingDoFnRunner.java    |  145 +
 .../dataflow/sdk/util/MapAggregatorValues.java  |   48 +
 .../sdk/util/MergingActiveWindowSet.java        |  544 ++++
 .../cloud/dataflow/sdk/util/MimeTypes.java      |   23 +
 .../cloud/dataflow/sdk/util/MonitoringUtil.java |  233 ++
 .../dataflow/sdk/util/MutationDetector.java     |   31 +
 .../dataflow/sdk/util/MutationDetectors.java    |  182 ++
 .../cloud/dataflow/sdk/util/NonEmptyPanes.java  |  148 +
 .../sdk/util/NonMergingActiveWindowSet.java     |   85 +
 .../sdk/util/NoopCredentialFactory.java         |   38 +
 .../dataflow/sdk/util/NoopPathValidator.java    |   48 +
 .../dataflow/sdk/util/NullSideInputReader.java  |   61 +
 .../dataflow/sdk/util/OutputReference.java      |   42 +
 .../sdk/util/PCollectionViewWindow.java         |   67 +
 .../dataflow/sdk/util/PCollectionViews.java     |  426 +++
 .../google/cloud/dataflow/sdk/util/PTuple.java  |  160 +
 .../cloud/dataflow/sdk/util/PackageUtil.java    |  327 ++
 .../dataflow/sdk/util/PaneInfoTracker.java      |  151 +
 .../cloud/dataflow/sdk/util/PathValidator.java  |   47 +
 .../sdk/util/PerKeyCombineFnRunner.java         |  147 +
 .../sdk/util/PerKeyCombineFnRunners.java        |  257 ++
 .../cloud/dataflow/sdk/util/PropertyNames.java  |  107 +
 .../dataflow/sdk/util/RandomAccessData.java     |  352 ++
 .../cloud/dataflow/sdk/util/ReduceFn.java       |  128 +
 .../sdk/util/ReduceFnContextFactory.java        |  495 +++
 .../cloud/dataflow/sdk/util/ReduceFnRunner.java |  843 +++++
 .../sdk/util/ReifyTimestampAndWindowsDoFn.java  |   46 +
 .../cloud/dataflow/sdk/util/Reshuffle.java      |  145 +
 .../dataflow/sdk/util/ReshuffleTrigger.java     |   61 +
 .../sdk/util/RetryHttpRequestInitializer.java   |  250 ++
 .../dataflow/sdk/util/SerializableUtils.java    |  159 +
 .../cloud/dataflow/sdk/util/Serializer.java     |  145 +
 .../sdk/util/ShardingWritableByteChannel.java   |  118 +
 .../dataflow/sdk/util/SideInputReader.java      |   48 +
 .../dataflow/sdk/util/SimpleDoFnRunner.java     |   55 +
 .../google/cloud/dataflow/sdk/util/Stager.java  |   29 +
 .../cloud/dataflow/sdk/util/StreamUtils.java    |   68 +
 .../cloud/dataflow/sdk/util/StringUtils.java    |  242 ++
 .../google/cloud/dataflow/sdk/util/Structs.java |  384 +++
 .../dataflow/sdk/util/SystemDoFnInternal.java   |   37 +
 .../cloud/dataflow/sdk/util/SystemReduceFn.java |  133 +
 .../cloud/dataflow/sdk/util/TestCredential.java |   51 +
 .../cloud/dataflow/sdk/util/TimeDomain.java     |   41 +
 .../cloud/dataflow/sdk/util/TimeUtil.java       |  164 +
 .../cloud/dataflow/sdk/util/TimerInternals.java |  269 ++
 .../google/cloud/dataflow/sdk/util/Timers.java  |   60 +
 .../cloud/dataflow/sdk/util/Transport.java      |  205 ++
 .../sdk/util/TriggerContextFactory.java         |  522 +++
 .../cloud/dataflow/sdk/util/TriggerRunner.java  |  223 ++
 .../dataflow/sdk/util/UnownedInputStream.java   |   76 +
 .../dataflow/sdk/util/UnownedOutputStream.java  |   56 +
 .../sdk/util/UploadIdResponseInterceptor.java   |   61 +
 .../dataflow/sdk/util/UserCodeException.java    |   94 +
 .../dataflow/sdk/util/ValueWithRecordId.java    |  154 +
 .../google/cloud/dataflow/sdk/util/Values.java  |   88 +
 .../google/cloud/dataflow/sdk/util/VarInt.java  |  115 +
 .../cloud/dataflow/sdk/util/WatermarkHold.java  |  450 +++
 .../cloud/dataflow/sdk/util/Weighted.java       |   27 +
 .../cloud/dataflow/sdk/util/WeightedValue.java  |   45 +
 .../cloud/dataflow/sdk/util/WindowTracing.java  |   36 +
 .../cloud/dataflow/sdk/util/WindowedValue.java  |  720 +++++
 .../dataflow/sdk/util/WindowingInternals.java   |   82 +
 .../dataflow/sdk/util/WindowingStrategy.java    |  268 ++
 .../cloud/dataflow/sdk/util/ZipFiles.java       |  294 ++
 .../cloud/dataflow/sdk/util/common/Counter.java | 1103 +++++++
 .../sdk/util/common/CounterProvider.java        |   26 +
 .../dataflow/sdk/util/common/CounterSet.java    |  177 ++
 .../util/common/ElementByteSizeObservable.java  |   41 +
 .../ElementByteSizeObservableIterable.java      |   63 +
 .../ElementByteSizeObservableIterator.java      |   36 +
 .../util/common/ElementByteSizeObserver.java    |   92 +
 .../sdk/util/common/PeekingReiterator.java      |   98 +
 .../sdk/util/common/ReflectHelpers.java         |  209 ++
 .../dataflow/sdk/util/common/Reiterable.java    |   27 +
 .../dataflow/sdk/util/common/Reiterator.java    |   39 +
 .../dataflow/sdk/util/common/package-info.java  |   18 +
 .../sdk/util/common/worker/StateSampler.java    |  365 +++
 .../sdk/util/common/worker/package-info.java    |   18 +
 .../cloud/dataflow/sdk/util/gcsfs/GcsPath.java  |  619 ++++
 .../dataflow/sdk/util/gcsfs/package-info.java   |   18 +
 .../cloud/dataflow/sdk/util/package-info.java   |   18 +
 .../util/state/AccumulatorCombiningState.java   |   51 +
 .../cloud/dataflow/sdk/util/state/BagState.java |   26 +
 .../dataflow/sdk/util/state/CombiningState.java |   40 +
 .../CopyOnAccessInMemoryStateInternals.java     |  454 +++
 .../sdk/util/state/InMemoryStateInternals.java  |  414 +++
 .../sdk/util/state/MergingStateAccessor.java    |   40 +
 .../dataflow/sdk/util/state/ReadableState.java  |   53 +
 .../cloud/dataflow/sdk/util/state/State.java    |   30 +
 .../dataflow/sdk/util/state/StateAccessor.java  |   36 +
 .../dataflow/sdk/util/state/StateContext.java   |   41 +
 .../dataflow/sdk/util/state/StateContexts.java  |  107 +
 .../dataflow/sdk/util/state/StateInternals.java |   55 +
 .../dataflow/sdk/util/state/StateMerging.java   |  254 ++
 .../dataflow/sdk/util/state/StateNamespace.java |   54 +
 .../sdk/util/state/StateNamespaceForTest.java   |   63 +
 .../sdk/util/state/StateNamespaces.java         |  277 ++
 .../dataflow/sdk/util/state/StateTable.java     |   89 +
 .../cloud/dataflow/sdk/util/state/StateTag.java |   96 +
 .../dataflow/sdk/util/state/StateTags.java      |  579 ++++
 .../dataflow/sdk/util/state/ValueState.java     |   35 +
 .../sdk/util/state/WatermarkHoldState.java      |   42 +
 .../google/cloud/dataflow/sdk/values/KV.java    |  130 +
 .../cloud/dataflow/sdk/values/PBegin.java       |   87 +
 .../cloud/dataflow/sdk/values/PCollection.java  |  250 ++
 .../dataflow/sdk/values/PCollectionList.java    |  238 ++
 .../dataflow/sdk/values/PCollectionTuple.java   |  264 ++
 .../dataflow/sdk/values/PCollectionView.java    |   64 +
 .../google/cloud/dataflow/sdk/values/PDone.java |   47 +
 .../cloud/dataflow/sdk/values/PInput.java       |   56 +
 .../cloud/dataflow/sdk/values/POutput.java      |   76 +
 .../dataflow/sdk/values/POutputValueBase.java   |  102 +
 .../cloud/dataflow/sdk/values/PValue.java       |   38 +
 .../cloud/dataflow/sdk/values/PValueBase.java   |  155 +
 .../dataflow/sdk/values/TimestampedValue.java   |  155 +
 .../cloud/dataflow/sdk/values/TupleTag.java     |  196 ++
 .../cloud/dataflow/sdk/values/TupleTagList.java |  148 +
 .../dataflow/sdk/values/TypeDescriptor.java     |  351 ++
 .../cloud/dataflow/sdk/values/TypedPValue.java  |  197 ++
 .../cloud/dataflow/sdk/values/package-info.java |   52 +
 sdks/java/core/src/main/proto/README.md         |   27 +
 .../main/proto/proto2_coder_test_messages.proto |   51 +
 .../google/cloud/dataflow/sdk/sdk.properties    |    5 +
 .../cloud/dataflow/sdk/DataflowMatchers.java    |   65 +
 .../google/cloud/dataflow/sdk/PipelineTest.java |  296 ++
 .../google/cloud/dataflow/sdk/TestUtils.java    |  213 ++
 .../cloud/dataflow/sdk/WindowMatchers.java      |  137 +
 .../dataflow/sdk/coders/AvroCoderTest.java      |  754 +++++
 .../sdk/coders/BigEndianIntegerCoderTest.java   |   90 +
 .../sdk/coders/BigEndianLongCoderTest.java      |   94 +
 .../dataflow/sdk/coders/ByteArrayCoderTest.java |  144 +
 .../dataflow/sdk/coders/ByteCoderTest.java      |   91 +
 .../sdk/coders/ByteStringCoderTest.java         |  121 +
 .../dataflow/sdk/coders/CoderFactoriesTest.java |  100 +
 .../dataflow/sdk/coders/CoderProvidersTest.java |   71 +
 .../dataflow/sdk/coders/CoderRegistryTest.java  |  521 +++
 .../cloud/dataflow/sdk/coders/CoderTest.java    |   78 +
 .../sdk/coders/CollectionCoderTest.java         |   93 +
 .../dataflow/sdk/coders/CustomCoderTest.java    |  135 +
 .../dataflow/sdk/coders/DefaultCoderTest.java   |  128 +
 .../dataflow/sdk/coders/DelegateCoderTest.java  |  141 +
 .../dataflow/sdk/coders/DoubleCoderTest.java    |   96 +
 .../dataflow/sdk/coders/DurationCoderTest.java  |   86 +
 .../dataflow/sdk/coders/EntityCoderTest.java    |  108 +
 .../dataflow/sdk/coders/InstantCoderTest.java   |  116 +
 .../dataflow/sdk/coders/IterableCoderTest.java  |  109 +
 .../dataflow/sdk/coders/JAXBCoderTest.java      |   99 +
 .../cloud/dataflow/sdk/coders/KvCoderTest.java  |  118 +
 .../dataflow/sdk/coders/ListCoderTest.java      |  134 +
 .../cloud/dataflow/sdk/coders/MapCoderTest.java |  106 +
 .../dataflow/sdk/coders/NullableCoderTest.java  |  132 +
 .../sdk/coders/PrintBase64Encodings.java        |   81 +
 .../dataflow/sdk/coders/Proto2CoderTest.java    |  145 +
 .../sdk/coders/SerializableCoderTest.java       |  222 ++
 .../cloud/dataflow/sdk/coders/SetCoderTest.java |   86 +
 .../dataflow/sdk/coders/StandardCoderTest.java  |  176 +
 .../sdk/coders/StringDelegateCoderTest.java     |   72 +
 .../sdk/coders/StringUtf8CoderTest.java         |   80 +
 .../sdk/coders/StructuralByteArrayTest.java     |   39 +
 .../sdk/coders/TableRowJsonCoderTest.java       |   86 +
 .../sdk/coders/TextualIntegerCoderTest.java     |   90 +
 .../dataflow/sdk/coders/VarIntCoderTest.java    |   91 +
 .../dataflow/sdk/coders/VarLongCoderTest.java   |   94 +
 .../sdk/coders/protobuf/ProtoCoderTest.java     |  182 ++
 .../sdk/coders/protobuf/ProtobufUtilTest.java   |  195 ++
 .../sdk/io/AvroIOGeneratedClassTest.java        |  374 +++
 .../cloud/dataflow/sdk/io/AvroIOTest.java       |  226 ++
 .../cloud/dataflow/sdk/io/AvroSourceTest.java   |  692 ++++
 .../cloud/dataflow/sdk/io/BigQueryIOTest.java   |  445 +++
 .../io/BoundedReadFromUnboundedSourceTest.java  |  132 +
 .../dataflow/sdk/io/CompressedSourceTest.java   |  430 +++
 .../dataflow/sdk/io/CountingInputTest.java      |  125 +
 .../dataflow/sdk/io/CountingSourceTest.java     |  216 ++
 .../cloud/dataflow/sdk/io/DatastoreIOTest.java  |  631 ++++
 .../dataflow/sdk/io/FileBasedSinkTest.java      |  512 +++
 .../dataflow/sdk/io/FileBasedSourceTest.java    |  914 ++++++
 .../dataflow/sdk/io/OffsetBasedSourceTest.java  |  278 ++
 .../cloud/dataflow/sdk/io/PubsubIOTest.java     |  233 ++
 .../google/cloud/dataflow/sdk/io/ReadTest.java  |  144 +
 .../cloud/dataflow/sdk/io/TextIOTest.java       |  562 ++++
 .../google/cloud/dataflow/sdk/io/WriteTest.java |  341 ++
 .../cloud/dataflow/sdk/io/XmlSinkTest.java      |  235 ++
 .../cloud/dataflow/sdk/io/XmlSourceTest.java    |  822 +++++
 .../sdk/io/bigtable/BigtableIOTest.java         |  688 ++++
 .../range/ByteKeyRangeEstimateFractionTest.java |   69 +
 .../range/ByteKeyRangeInterpolateKeyTest.java   |   73 +
 .../dataflow/sdk/io/range/ByteKeyRangeTest.java |  396 +++
 .../sdk/io/range/ByteKeyRangeTrackerTest.java   |  118 +
 .../dataflow/sdk/io/range/ByteKeyTest.java      |  178 ++
 .../sdk/io/range/OffsetRangeTrackerTest.java    |  186 ++
 .../com/google/cloud/dataflow/sdk/io/user.avsc  |   10 +
 .../DataflowPipelineDebugOptionsTest.java       |   40 +
 .../options/DataflowPipelineOptionsTest.java    |   91 +
 .../options/DataflowProfilingOptionsTest.java   |   47 +
 .../DataflowWorkerLoggingOptionsTest.java       |   73 +
 .../dataflow/sdk/options/GcpOptionsTest.java    |  123 +
 .../sdk/options/GoogleApiDebugOptionsTest.java  |  147 +
 .../sdk/options/PipelineOptionsFactoryTest.java | 1154 +++++++
 .../sdk/options/PipelineOptionsTest.java        |  126 +
 .../options/PipelineOptionsValidatorTest.java   |  310 ++
 .../sdk/options/ProxyInvocationHandlerTest.java |  691 ++++
 .../AggregatorPipelineExtractorTest.java        |  228 ++
 .../BlockingDataflowPipelineRunnerTest.java     |  301 ++
 .../sdk/runners/DataflowPipelineJobTest.java    |  603 ++++
 .../runners/DataflowPipelineRegistrarTest.java  |   72 +
 .../sdk/runners/DataflowPipelineRunnerTest.java | 1370 ++++++++
 .../sdk/runners/DataflowPipelineTest.java       |   44 +
 .../runners/DataflowPipelineTranslatorTest.java |  889 ++++++
 .../runners/DirectPipelineRegistrarTest.java    |   69 +
 .../sdk/runners/DirectPipelineRunnerTest.java   |  210 ++
 .../sdk/runners/DirectPipelineTest.java         |   34 +
 .../sdk/runners/PipelineRunnerTest.java         |   82 +
 .../dataflow/sdk/runners/TransformTreeTest.java |  194 ++
 .../sdk/runners/dataflow/CustomSourcesTest.java |  273 ++
 .../runners/dataflow/TestCountingSource.java    |  235 ++
 .../BoundedReadEvaluatorFactoryTest.java        |  287 ++
 .../ConsumerTrackingPipelineVisitorTest.java    |  233 ++
 .../inprocess/FlattenEvaluatorFactoryTest.java  |  136 +
 .../inprocess/ForwardingPTransformTest.java     |  100 +
 .../GroupByKeyEvaluatorFactoryTest.java         |  178 ++
 .../inprocess/InMemoryWatermarkManagerTest.java | 1111 +++++++
 .../runners/inprocess/InProcessBundleTest.java  |  143 +
 .../runners/inprocess/InProcessCreateTest.java  |  199 ++
 .../InProcessEvaluationContextTest.java         |  544 ++++
 .../inprocess/InProcessPipelineRunnerTest.java  |   77 +
 .../InProcessSideInputContainerTest.java        |  370 +++
 .../inprocess/InProcessTimerInternalsTest.java  |  131 +
 .../KeyedPValueTrackingVisitorTest.java         |  189 ++
 .../sdk/runners/inprocess/MockClock.java        |   60 +
 .../ParDoMultiEvaluatorFactoryTest.java         |  412 +++
 .../ParDoSingleEvaluatorFactoryTest.java        |  310 ++
 .../TransformExecutorServicesTest.java          |  134 +
 .../inprocess/TransformExecutorTest.java        |  312 ++
 .../UnboundedReadEvaluatorFactoryTest.java      |  327 ++
 .../inprocess/ViewEvaluatorFactoryTest.java     |   96 +
 .../WatermarkCallbackExecutorTest.java          |  126 +
 .../sdk/testing/CoderPropertiesTest.java        |  214 ++
 .../sdk/testing/DataflowAssertTest.java         |  326 ++
 .../sdk/testing/DataflowJUnitTestRunner.java    |  129 +
 .../dataflow/sdk/testing/ExpectedLogs.java      |  306 ++
 .../dataflow/sdk/testing/ExpectedLogsTest.java  |  153 +
 .../sdk/testing/FastNanoClockAndSleeper.java    |   47 +
 .../testing/FastNanoClockAndSleeperTest.java    |   47 +
 .../sdk/testing/PCollectionViewTesting.java     |  295 ++
 .../sdk/testing/ResetDateTimeProvider.java      |   41 +
 .../sdk/testing/ResetDateTimeProviderTest.java  |   55 +
 .../sdk/testing/RestoreSystemProperties.java    |   51 +
 .../testing/RestoreSystemPropertiesTest.java    |   50 +
 .../sdk/testing/SerializableMatchersTest.java   |  165 +
 .../sdk/testing/SystemNanoTimeSleeper.java      |   68 +
 .../sdk/testing/SystemNanoTimeSleeperTest.java  |   53 +
 .../testing/TestDataflowPipelineRunnerTest.java |  317 ++
 .../dataflow/sdk/testing/TestPipelineTest.java  |   93 +
 .../transforms/ApproximateQuantilesTest.java    |  299 ++
 .../sdk/transforms/ApproximateUniqueTest.java   |  291 ++
 .../dataflow/sdk/transforms/CombineFnsTest.java |  413 +++
 .../dataflow/sdk/transforms/CombineTest.java    | 1137 +++++++
 .../dataflow/sdk/transforms/CountTest.java      |  121 +
 .../dataflow/sdk/transforms/CreateTest.java     |  240 ++
 .../sdk/transforms/DoFnContextTest.java         |   68 +
 .../DoFnDelegatingAggregatorTest.java           |  143 +
 .../sdk/transforms/DoFnReflectorTest.java       |  493 +++
 .../cloud/dataflow/sdk/transforms/DoFnTest.java |  206 ++
 .../dataflow/sdk/transforms/DoFnTesterTest.java |  253 ++
 .../sdk/transforms/DoFnWithContextTest.java     |  225 ++
 .../dataflow/sdk/transforms/FilterTest.java     |  160 +
 .../sdk/transforms/FlatMapElementsTest.java     |  124 +
 .../dataflow/sdk/transforms/FlattenTest.java    |  369 +++
 .../dataflow/sdk/transforms/GroupByKeyTest.java |  438 +++
 .../IntraBundleParallelizationTest.java         |  250 ++
 .../cloud/dataflow/sdk/transforms/KeysTest.java |   83 +
 .../dataflow/sdk/transforms/KvSwapTest.java     |   91 +
 .../sdk/transforms/MapElementsTest.java         |  134 +
 .../cloud/dataflow/sdk/transforms/MaxTest.java  |   66 +
 .../cloud/dataflow/sdk/transforms/MeanTest.java |   72 +
 .../cloud/dataflow/sdk/transforms/MinTest.java  |   66 +
 .../cloud/dataflow/sdk/transforms/NoOpDoFn.java |  143 +
 .../dataflow/sdk/transforms/PTransformTest.java |   41 +
 .../dataflow/sdk/transforms/ParDoTest.java      | 1541 +++++++++
 .../dataflow/sdk/transforms/PartitionTest.java  |  140 +
 .../sdk/transforms/RemoveDuplicatesTest.java    |  131 +
 .../dataflow/sdk/transforms/SampleTest.java     |  260 ++
 .../sdk/transforms/SimpleStatsFnsTest.java      |  129 +
 .../cloud/dataflow/sdk/transforms/SumTest.java  |   66 +
 .../cloud/dataflow/sdk/transforms/TopTest.java  |  259 ++
 .../dataflow/sdk/transforms/ValuesTest.java     |   93 +
 .../cloud/dataflow/sdk/transforms/ViewTest.java | 1548 +++++++++
 .../dataflow/sdk/transforms/WithKeysTest.java   |  127 +
 .../sdk/transforms/WithTimestampsTest.java      |  210 ++
 .../transforms/display/DisplayDataMatchers.java |   98 +
 .../display/DisplayDataMatchersTest.java        |   81 +
 .../sdk/transforms/display/DisplayDataTest.java |  633 ++++
 .../transforms/join/CoGbkResultCoderTest.java   |   85 +
 .../sdk/transforms/join/CoGbkResultTest.java    |  124 +
 .../sdk/transforms/join/CoGroupByKeyTest.java   |  507 +++
 .../sdk/transforms/join/UnionCoderTest.java     |   48 +
 .../sdk/transforms/windowing/AfterAllTest.java  |  151 +
 .../sdk/transforms/windowing/AfterEachTest.java |  122 +
 .../transforms/windowing/AfterFirstTest.java    |  175 +
 .../sdk/transforms/windowing/AfterPaneTest.java |  126 +
 .../windowing/AfterProcessingTimeTest.java      |  157 +
 .../AfterSynchronizedProcessingTimeTest.java    |  121 +
 .../windowing/AfterWatermarkTest.java           |  338 ++
 .../windowing/CalendarWindowsTest.java          |  260 ++
 .../windowing/DefaultTriggerTest.java           |  176 +
 .../transforms/windowing/FixedWindowsTest.java  |  124 +
 .../windowing/IntervalWindowTest.java           |   94 +
 .../windowing/OrFinallyTriggerTest.java         |  209 ++
 .../sdk/transforms/windowing/PaneInfoTest.java  |   75 +
 .../transforms/windowing/RepeatedlyTest.java    |  128 +
 .../sdk/transforms/windowing/SessionsTest.java  |  156 +
 .../windowing/SlidingWindowsTest.java           |  193 ++
 .../sdk/transforms/windowing/TriggerTest.java   |  117 +
 .../sdk/transforms/windowing/WindowTest.java    |  226 ++
 .../sdk/transforms/windowing/WindowingTest.java |  244 ++
 .../cloud/dataflow/sdk/util/ApiSurfaceTest.java |  187 ++
 ...mptAndTimeBoundedExponentialBackOffTest.java |  212 ++
 .../AttemptBoundedExponentialBackOffTest.java   |   85 +
 .../cloud/dataflow/sdk/util/AvroUtilsTest.java  |  225 ++
 .../sdk/util/BatchTimerInternalsTest.java       |  116 +
 .../sdk/util/BigQueryTableInserterTest.java     |  239 ++
 .../sdk/util/BigQueryTableRowIteratorTest.java  |  255 ++
 .../dataflow/sdk/util/BigQueryUtilTest.java     |  479 +++
 ...BufferedElementCountingOutputStreamTest.java |  205 ++
 .../cloud/dataflow/sdk/util/CoderUtilsTest.java |  229 ++
 .../dataflow/sdk/util/CombineFnUtilTest.java    |   62 +
 .../sdk/util/CounterAggregatorTest.java         |  253 ++
 .../sdk/util/DataflowPathValidatorTest.java     |   92 +
 .../sdk/util/ExecutableTriggerTest.java         |  130 +
 .../util/ExposedByteArrayInputStreamTest.java   |   78 +
 .../util/ExposedByteArrayOutputStreamTest.java  |  245 ++
 .../sdk/util/FileIOChannelFactoryTest.java      |  226 ++
 .../sdk/util/FinishedTriggersBitSetTest.java    |   54 +
 .../sdk/util/FinishedTriggersProperties.java    |  109 +
 .../sdk/util/FinishedTriggersSetTest.java       |   60 +
 .../sdk/util/GcsIOChannelFactoryTest.java       |   43 +
 .../cloud/dataflow/sdk/util/GcsUtilTest.java    |  490 +++
 .../sdk/util/GroupAlsoByWindowsProperties.java  |  718 +++++
 ...oupAlsoByWindowsViaOutputBufferDoFnTest.java |  111 +
 .../dataflow/sdk/util/IOChannelUtilsTest.java   |   94 +
 .../dataflow/sdk/util/InstanceBuilderTest.java  |  115 +
 .../IntervalBoundedExponentialBackOffTest.java  |   99 +
 .../sdk/util/KeyedWorkItemCoderTest.java        |   61 +
 .../util/LateDataDroppingDoFnRunnerTest.java    |  115 +
 .../sdk/util/MergingActiveWindowSetTest.java    |  175 +
 .../dataflow/sdk/util/MonitoringUtilTest.java   |  146 +
 .../sdk/util/MutationDetectorsTest.java         |  148 +
 .../cloud/dataflow/sdk/util/PTupleTest.java     |   40 +
 .../dataflow/sdk/util/PackageUtilTest.java      |  482 +++
 .../dataflow/sdk/util/RandomAccessDataTest.java |  205 ++
 .../dataflow/sdk/util/ReduceFnRunnerTest.java   | 1049 ++++++
 .../cloud/dataflow/sdk/util/ReduceFnTester.java |  776 +++++
 .../cloud/dataflow/sdk/util/ReshuffleTest.java  |  208 ++
 .../dataflow/sdk/util/ReshuffleTriggerTest.java |   58 +
 .../util/RetryHttpRequestInitializerTest.java   |  296 ++
 .../sdk/util/SerializableUtilsTest.java         |  165 +
 .../cloud/dataflow/sdk/util/SerializerTest.java |  162 +
 .../dataflow/sdk/util/SimpleDoFnRunnerTest.java |   86 +
 .../dataflow/sdk/util/StreamUtilsTest.java      |   71 +
 .../dataflow/sdk/util/StringUtilsTest.java      |  145 +
 .../cloud/dataflow/sdk/util/StructsTest.java    |  206 ++
 .../cloud/dataflow/sdk/util/TimeUtilTest.java   |   73 +
 .../dataflow/sdk/util/TimerInternalsTest.java   |   52 +
 .../cloud/dataflow/sdk/util/TriggerTester.java  |  585 ++++
 .../sdk/util/UnownedInputStreamTest.java        |   76 +
 .../sdk/util/UnownedOutputStreamTest.java       |   57 +
 .../util/UploadIdResponseInterceptorTest.java   |   99 +
 .../sdk/util/UserCodeExceptionTest.java         |  176 +
 .../cloud/dataflow/sdk/util/VarIntTest.java     |  277 ++
 .../dataflow/sdk/util/WindowedValueTest.java    |   57 +
 .../cloud/dataflow/sdk/util/ZipFilesTest.java   |  311 ++
 .../sdk/util/common/CounterSetTest.java         |  225 ++
 .../dataflow/sdk/util/common/CounterTest.java   |  589 ++++
 .../sdk/util/common/CounterTestUtils.java       |   56 +
 .../sdk/util/common/ReflectHelpersTest.java     |  126 +
 .../dataflow/sdk/util/gcsfs/GcsPathTest.java    |  333 ++
 .../CopyOnAccessInMemoryStateInternalsTest.java |  553 ++++
 .../util/state/InMemoryStateInternalsTest.java  |  348 ++
 .../sdk/util/state/StateNamespacesTest.java     |  129 +
 .../dataflow/sdk/util/state/StateTagTest.java   |  173 +
 .../cloud/dataflow/sdk/values/KVTest.java       |  112 +
 .../sdk/values/PCollectionListTest.java         |   47 +
 .../sdk/values/PCollectionTupleTest.java        |   93 +
 .../cloud/dataflow/sdk/values/PDoneTest.java    |  102 +
 .../cloud/dataflow/sdk/values/TupleTagTest.java |   87 +
 .../dataflow/sdk/values/TypeDescriptorTest.java |  193 ++
 .../dataflow/sdk/values/TypedPValueTest.java    |  164 +
 .../PipelineOptionsFactoryJava8Test.java        |   90 +
 1451 files changed, 156174 insertions(+), 156174 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index b8944d6..3145c40 100644
--- a/pom.xml
+++ b/pom.xml
@@ -125,7 +125,7 @@
   <packaging>pom</packaging>
 
   <modules>
-    <module>sdk</module>
+    <module>sdks/java/core</module>
     <module>runners</module>
     <module>examples</module>
     <module>maven-archetypes</module>

[20/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
deleted file mode 100644
index c77ac44..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ParDo.java
+++ /dev/null
@@ -1,1321 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.display.DisplayData.Builder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.IllegalMutationException;
-import com.google.cloud.dataflow.sdk.util.MutationDetector;
-import com.google.cloud.dataflow.sdk.util.MutationDetectors;
-import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-import com.google.cloud.dataflow.sdk.values.TypedPValue;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Maps;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentMap;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link ParDo} is the core element-wise transform in Google Cloud
- * Dataflow, invoking a user-specified function on each of the elements of the input
- * {@link PCollection} to produce zero or more output elements, all
- * of which are collected into the output {@link PCollection}.
- *
- * <p>Elements are processed independently, and possibly in parallel across
- * distributed cloud resources.
- *
- * <p>The {@link ParDo} processing style is similar to what happens inside
- * the "Mapper" or "Reducer" class of a MapReduce-style algorithm.
- *
- * <h2>{@link DoFn DoFns}</h2>
- *
- * <p>The function to use to process each element is specified by a
- * {@link DoFn DoFn&lt;InputT, OutputT&gt;}, primarily via its
- * {@link DoFn#processElement processElement} method. The {@link DoFn} may also
- * override the default implementations of {@link DoFn#startBundle startBundle}
- * and {@link DoFn#finishBundle finishBundle}.
- *
- * <p>Conceptually, when a {@link ParDo} transform is executed, the
- * elements of the input {@link PCollection} are first divided up
- * into some number of "bundles". These are farmed off to distributed
- * worker machines (or run locally, if using the {@link DirectPipelineRunner}).
- * For each bundle of input elements processing proceeds as follows:
- *
- * <ol>
- *   <li>A fresh instance of the argument {@link DoFn} is created on a worker. This may
- *     be through deserialization or other means. If the {@link DoFn} subclass
- *     does not override {@link DoFn#startBundle startBundle} or
- *     {@link DoFn#finishBundle finishBundle} then this may be optimized since
- *     it cannot observe the start and end of a bundle.</li>
- *   <li>The {@link DoFn DoFn's} {@link DoFn#startBundle} method is called to
- *     initialize it. If this method is not overridden, the call may be optimized
- *     away.</li>
- *   <li>The {@link DoFn DoFn's} {@link DoFn#processElement} method
- *     is called on each of the input elements in the bundle.</li>
- *   <li>The {@link DoFn DoFn's} {@link DoFn#finishBundle} method is called
- *     to complete its work. After {@link DoFn#finishBundle} is called, the
- *     framework will never again invoke any of these three processing methods.
- *     If this method is not overridden, this call may be optimized away.</li>
- * </ol>
- *
- * Each of the calls to any of the {@link DoFn DoFn's} processing
- * methods can produce zero or more output elements. All of the
- * of output elements from all of the {@link DoFn} instances
- * are included in the output {@link PCollection}.
- *
- * <p>For example:
- *
- * <pre> {@code
- * PCollection<String> lines = ...;
- * PCollection<String> words =
- *     lines.apply(ParDo.of(new DoFn<String, String>() {
- *         public void processElement(ProcessContext c) {
- *           String line = c.element();
- *           for (String word : line.split("[^a-zA-Z']+")) {
- *             c.output(word);
- *           }
- *         }}));
- * PCollection<Integer> wordLengths =
- *     words.apply(ParDo.of(new DoFn<String, Integer>() {
- *         public void processElement(ProcessContext c) {
- *           String word = c.element();
- *           Integer length = word.length();
- *           c.output(length);
- *         }}));
- * } </pre>
- *
- * <p>Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and the output {@code PCollection}
- * has the same {@link WindowFn} associated with it as the input.
- *
- * <h2>Naming {@link ParDo ParDo} transforms</h2>
- *
- * <p>The name of a transform is used to provide a name for any node in the
- * {@link Pipeline} graph resulting from application of the transform.
- * It is best practice to provide a name at the time of application,
- * via {@link PCollection#apply(String, PTransform)}. Otherwise,
- * a unique name - which may not be stable across pipeline revision -
- * will be generated, based on the transform name.
- *
- * <p>If a {@link ParDo} is applied exactly once inlined, then
- * it can be given a name via {@link #named}. For example:
- *
- * <pre> {@code
- * PCollection<String> words =
- *     lines.apply(ParDo.named("ExtractWords")
- *                      .of(new DoFn<String, String>() { ... }));
- * PCollection<Integer> wordLengths =
- *     words.apply(ParDo.named("ComputeWordLengths")
- *                      .of(new DoFn<String, Integer>() { ... }));
- * } </pre>
- *
- * <h2>Side Inputs</h2>
- *
- * <p>While a {@link ParDo} processes elements from a single "main input"
- * {@link PCollection}, it can take additional "side input"
- * {@link PCollectionView PCollectionViews}. These side input
- * {@link PCollectionView PCollectionViews} express styles of accessing
- * {@link PCollection PCollections} computed by earlier pipeline operations,
- * passed in to the {@link ParDo} transform using
- * {@link #withSideInputs}, and their contents accessible to each of
- * the {@link DoFn} operations via {@link DoFn.ProcessContext#sideInput sideInput}.
- * For example:
- *
- * <pre> {@code
- * PCollection<String> words = ...;
- * PCollection<Integer> maxWordLengthCutOff = ...; // Singleton PCollection
- * final PCollectionView<Integer> maxWordLengthCutOffView =
- *     maxWordLengthCutOff.apply(View.<Integer>asSingleton());
- * PCollection<String> wordsBelowCutOff =
- *     words.apply(ParDo.withSideInputs(maxWordLengthCutOffView)
- *                      .of(new DoFn<String, String>() {
- *         public void processElement(ProcessContext c) {
- *           String word = c.element();
- *           int lengthCutOff = c.sideInput(maxWordLengthCutOffView);
- *           if (word.length() <= lengthCutOff) {
- *             c.output(word);
- *           }
- *         }}));
- * } </pre>
- *
- * <h2>Side Outputs</h2>
- *
- * <p>Optionally, a {@link ParDo} transform can produce multiple
- * output {@link PCollection PCollections}, both a "main output"
- * {@code PCollection<OutputT>} plus any number of "side output"
- * {@link PCollection PCollections}, each keyed by a distinct {@link TupleTag},
- * and bundled in a {@link PCollectionTuple}. The {@link TupleTag TupleTags}
- * to be used for the output {@link PCollectionTuple} are specified by
- * invoking {@link #withOutputTags}. Unconsumed side outputs do not
- * necessarily need to be explicitly specified, even if the {@link DoFn}
- * generates them. Within the {@link DoFn}, an element is added to the
- * main output {@link PCollection} as normal, using
- * {@link DoFn.Context#output}, while an element is added to a side output
- * {@link PCollection} using {@link DoFn.Context#sideOutput}. For example:
- *
- * <pre> {@code
- * PCollection<String> words = ...;
- * // Select words whose length is below a cut off,
- * // plus the lengths of words that are above the cut off.
- * // Also select words starting with "MARKER".
- * final int wordLengthCutOff = 10;
- * // Create tags to use for the main and side outputs.
- * final TupleTag<String> wordsBelowCutOffTag =
- *     new TupleTag<String>(){};
- * final TupleTag<Integer> wordLengthsAboveCutOffTag =
- *     new TupleTag<Integer>(){};
- * final TupleTag<String> markedWordsTag =
- *     new TupleTag<String>(){};
- * PCollectionTuple results =
- *     words.apply(
- *         ParDo
- *         // Specify the main and consumed side output tags of the
- *         // PCollectionTuple result:
- *         .withOutputTags(wordsBelowCutOffTag,
- *                         TupleTagList.of(wordLengthsAboveCutOffTag)
- *                                     .and(markedWordsTag))
- *         .of(new DoFn<String, String>() {
- *             // Create a tag for the unconsumed side output.
- *             final TupleTag<String> specialWordsTag =
- *                 new TupleTag<String>(){};
- *             public void processElement(ProcessContext c) {
- *               String word = c.element();
- *               if (word.length() <= wordLengthCutOff) {
- *                 // Emit this short word to the main output.
- *                 c.output(word);
- *               } else {
- *                 // Emit this long word's length to a side output.
- *                 c.sideOutput(wordLengthsAboveCutOffTag, word.length());
- *               }
- *               if (word.startsWith("MARKER")) {
- *                 // Emit this word to a different side output.
- *                 c.sideOutput(markedWordsTag, word);
- *               }
- *               if (word.startsWith("SPECIAL")) {
- *                 // Emit this word to the unconsumed side output.
- *                 c.sideOutput(specialWordsTag, word);
- *               }
- *             }}));
- * // Extract the PCollection results, by tag.
- * PCollection<String> wordsBelowCutOff =
- *     results.get(wordsBelowCutOffTag);
- * PCollection<Integer> wordLengthsAboveCutOff =
- *     results.get(wordLengthsAboveCutOffTag);
- * PCollection<String> markedWords =
- *     results.get(markedWordsTag);
- * } </pre>
- *
- * <h2>Properties May Be Specified In Any Order</h2>
- *
- * <p>Several properties can be specified for a {@link ParDo}
- * {@link PTransform}, including name, side inputs, side output tags,
- * and {@link DoFn} to invoke. Only the {@link DoFn} is required; the
- * name is encouraged but not required, and side inputs and side
- * output tags are only specified when they're needed. These
- * properties can be specified in any order, as long as they're
- * specified before the {@link ParDo} {@link PTransform} is applied.
- *
- * <p>The approach used to allow these properties to be specified in
- * any order, with some properties omitted, is to have each of the
- * property "setter" methods defined as static factory methods on
- * {@link ParDo} itself, which return an instance of either
- * {@link ParDo.Unbound} or
- * {@link ParDo.Bound} nested classes, each of which offer
- * property setter instance methods to enable setting additional
- * properties. {@link ParDo.Bound} is used for {@link ParDo}
- * transforms whose {@link DoFn} is specified and whose input and
- * output static types have been bound. {@link ParDo.Unbound ParDo.Unbound} is used
- * for {@link ParDo} transforms that have not yet had their
- * {@link DoFn} specified. Only {@link ParDo.Bound} instances can be
- * applied.
- *
- * <p>Another benefit of this approach is that it reduces the number
- * of type parameters that need to be specified manually. In
- * particular, the input and output types of the {@link ParDo}
- * {@link PTransform} are inferred automatically from the type
- * parameters of the {@link DoFn} argument passed to {@link ParDo#of}.
- *
- * <h2>Output Coders</h2>
- *
- * <p>By default, the {@link Coder Coder&lt;OutputT&gt;} for the
- * elements of the main output {@link PCollection PCollection&lt;OutputT&gt;} is
- * inferred from the concrete type of the {@link DoFn DoFn&lt;InputT, OutputT&gt;}.
- *
- * <p>By default, the {@link Coder Coder&lt;SideOutputT&gt;} for the elements of
- * a side output {@link PCollection PCollection&lt;SideOutputT&gt;} is inferred
- * from the concrete type of the corresponding {@link TupleTag TupleTag&lt;SideOutputT&gt;}.
- * To be successful, the {@link TupleTag} should be created as an instance
- * of a trivial anonymous subclass, with {@code {}} suffixed to the
- * constructor call. Such uses block Java's generic type parameter
- * inference, so the {@code <X>} argument must be provided explicitly.
- * For example:
- * <pre> {@code
- * // A TupleTag to use for a side input can be written concisely:
- * final TupleTag<Integer> sideInputag = new TupleTag<>();
- * // A TupleTag to use for a side output should be written with "{}",
- * // and explicit generic parameter type:
- * final TupleTag<String> sideOutputTag = new TupleTag<String>(){};
- * } </pre>
- * This style of {@code TupleTag} instantiation is used in the example of
- * multiple side outputs, above.
- *
- * <h2>Serializability of {@link DoFn DoFns}</h2>
- *
- * <p>A {@link DoFn} passed to a {@link ParDo} transform must be
- * {@link Serializable}. This allows the {@link DoFn} instance
- * created in this "main program" to be sent (in serialized form) to
- * remote worker machines and reconstituted for each bundles of elements
- * of the input {@link PCollection} being processed. A {@link DoFn}
- * can have instance variable state, and non-transient instance
- * variable state will be serialized in the main program and then
- * deserialized on remote worker machines for each bundle of elements
- * to process.
- *
- * <p>To aid in ensuring that {@link DoFn DoFns} are properly
- * {@link Serializable}, even local execution using the
- * {@link DirectPipelineRunner} will serialize and then deserialize
- * {@link DoFn DoFns} before executing them on a bundle.
- *
- * <p>{@link DoFn DoFns} expressed as anonymous inner classes can be
- * convenient, but due to a quirk in Java's rules for serializability,
- * non-static inner or nested classes (including anonymous inner
- * classes) automatically capture their enclosing class's instance in
- * their serialized state. This can lead to including much more than
- * intended in the serialized state of a {@link DoFn}, or even things
- * that aren't {@link Serializable}.
- *
- * <p>There are two ways to avoid unintended serialized state in a
- * {@link DoFn}:
- *
- * <ul>
- *
- * <li>Define the {@link DoFn} as a named, static class.
- *
- * <li>Define the {@link DoFn} as an anonymous inner class inside of
- * a static method.
- *
- * </ul>
- *
- * <p>Both of these approaches ensure that there is no implicit enclosing
- * instance serialized along with the {@link DoFn} instance.
- *
- * <p>Prior to Java 8, any local variables of the enclosing
- * method referenced from within an anonymous inner class need to be
- * marked as {@code final}. If defining the {@link DoFn} as a named
- * static class, such variables would be passed as explicit
- * constructor arguments and stored in explicit instance variables.
- *
- * <p>There are three main ways to initialize the state of a
- * {@link DoFn} instance processing a bundle:
- *
- * <ul>
- *
- * <li>Define instance variable state (including implicit instance
- * variables holding final variables captured by an anonymous inner
- * class), initialized by the {@link DoFn}'s constructor (which is
- * implicit for an anonymous inner class). This state will be
- * automatically serialized and then deserialized in the {@code DoFn}
- * instance created for each bundle. This method is good for state
- * known when the original {@code DoFn} is created in the main
- * program, if it's not overly large.
- *
- * <li>Compute the state as a singleton {@link PCollection} and pass it
- * in as a side input to the {@link DoFn}. This is good if the state
- * needs to be computed by the pipeline, or if the state is very large
- * and so is best read from file(s) rather than sent as part of the
- * {@code DoFn}'s serialized state.
- *
- * <li>Initialize the state in each {@link DoFn} instance, in
- * {@link DoFn#startBundle}. This is good if the initialization
- * doesn't depend on any information known only by the main program or
- * computed by earlier pipeline operations, but is the same for all
- * instances of this {@link DoFn} for all program executions, say
- * setting up empty caches or initializing constant data.
- *
- * </ul>
- *
- * <h2>No Global Shared State</h2>
- *
- * <p>{@link ParDo} operations are intended to be able to run in
- * parallel across multiple worker machines. This precludes easy
- * sharing and updating mutable state across those machines. There is
- * no support in the Google Cloud Dataflow system for communicating
- * and synchronizing updates to shared state across worker machines,
- * so programs should not access any mutable static variable state in
- * their {@link DoFn}, without understanding that the Java processes
- * for the main program and workers will each have its own independent
- * copy of such state, and there won't be any automatic copying of
- * that state across Java processes. All information should be
- * communicated to {@link DoFn} instances via main and side inputs and
- * serialized state, and all output should be communicated from a
- * {@link DoFn} instance via main and side outputs, in the absence of
- * external communication mechanisms written by user code.
- *
- * <h2>Fault Tolerance</h2>
- *
- * <p>In a distributed system, things can fail: machines can crash,
- * machines can be unable to communicate across the network, etc.
- * While individual failures are rare, the larger the job, the greater
- * the chance that something, somewhere, will fail. The Google Cloud
- * Dataflow service strives to mask such failures automatically,
- * principally by retrying failed {@link DoFn} bundle. This means
- * that a {@code DoFn} instance might process a bundle partially, then
- * crash for some reason, then be rerun (often on a different worker
- * machine) on that same bundle and on the same elements as before.
- * Sometimes two or more {@link DoFn} instances will be running on the
- * same bundle simultaneously, with the system taking the results of
- * the first instance to complete successfully. Consequently, the
- * code in a {@link DoFn} needs to be written such that these
- * duplicate (sequential or concurrent) executions do not cause
- * problems. If the outputs of a {@link DoFn} are a pure function of
- * its inputs, then this requirement is satisfied. However, if a
- * {@link DoFn DoFn's} execution has external side-effects, such as performing
- * updates to external HTTP services, then the {@link DoFn DoFn's} code
- * needs to take care to ensure that those updates are idempotent and
- * that concurrent updates are acceptable. This property can be
- * difficult to achieve, so it is advisable to strive to keep
- * {@link DoFn DoFns} as pure functions as much as possible.
- *
- * <h2>Optimization</h2>
- *
- * <p>The Google Cloud Dataflow service automatically optimizes a
- * pipeline before it is executed. A key optimization, <i>fusion</i>,
- * relates to {@link ParDo} operations. If one {@link ParDo} operation produces a
- * {@link PCollection} that is then consumed as the main input of another
- * {@link ParDo} operation, the two {@link ParDo} operations will be <i>fused</i>
- * together into a single ParDo operation and run in a single pass;
- * this is "producer-consumer fusion". Similarly, if
- * two or more ParDo operations have the same {@link PCollection} main input,
- * they will be fused into a single {@link ParDo} that makes just one pass
- * over the input {@link PCollection}; this is "sibling fusion".
- *
- * <p>If after fusion there are no more unfused references to a
- * {@link PCollection} (e.g., one between a producer ParDo and a consumer
- * {@link ParDo}), the {@link PCollection} itself is "fused away" and won't ever be
- * written to disk, saving all the I/O and space expense of
- * constructing it.
- *
- * <p>The Google Cloud Dataflow service applies fusion as much as
- * possible, greatly reducing the cost of executing pipelines. As a
- * result, it is essentially "free" to write {@link ParDo} operations in a
- * very modular, composable style, each {@link ParDo} operation doing one
- * clear task, and stringing together sequences of {@link ParDo} operations to
- * get the desired overall effect. Such programs can be easier to
- * understand, easier to unit-test, easier to extend and evolve, and
- * easier to reuse in new programs. The predefined library of
- * PTransforms that come with Google Cloud Dataflow makes heavy use of
- * this modular, composable style, trusting to the Google Cloud
- * Dataflow service's optimizer to "flatten out" all the compositions
- * into highly optimized stages.
- *
- * @see <a href="https://cloud.google.com/dataflow/model/par-do">the web
- * documentation for ParDo</a>
- */
-public class ParDo {
-
-  /**
-   * Creates a {@link ParDo} {@link PTransform} with the given name.
-   *
-   * <p>See the discussion of naming above for more explanation.
-   *
-   * <p>The resulting {@link PTransform} is incomplete, and its
-   * input/output types are not yet bound. Use
-   * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
-   * invoke, which will also bind the input/output types of this
-   * {@link PTransform}.
-   */
-  public static Unbound named(String name) {
-    return new Unbound().named(name);
-  }
-
-  /**
-   * Creates a {@link ParDo} {@link PTransform} with the given
-   * side inputs.
-   *
-   * <p>Side inputs are {@link PCollectionView PCollectionViews}, whose contents are
-   * computed during pipeline execution and then made accessible to
-   * {@link DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}. Each
-   * invocation of the {@link DoFn} receives the same values for these
-   * side inputs.
-   *
-   * <p>See the discussion of Side Inputs above for more explanation.
-   *
-   * <p>The resulting {@link PTransform} is incomplete, and its
-   * input/output types are not yet bound. Use
-   * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
-   * invoke, which will also bind the input/output types of this
-   * {@link PTransform}.
-   */
-  public static Unbound withSideInputs(PCollectionView<?>... sideInputs) {
-    return new Unbound().withSideInputs(sideInputs);
-  }
-
-  /**
-    * Creates a {@link ParDo} with the given side inputs.
-    *
-   * <p>Side inputs are {@link PCollectionView}s, whose contents are
-   * computed during pipeline execution and then made accessible to
-   * {@code DoFn} code via {@link DoFn.ProcessContext#sideInput sideInput}.
-   *
-   * <p>See the discussion of Side Inputs above for more explanation.
-   *
-   * <p>The resulting {@link PTransform} is incomplete, and its
-   * input/output types are not yet bound. Use
-   * {@link ParDo.Unbound#of} to specify the {@link DoFn} to
-   * invoke, which will also bind the input/output types of this
-   * {@link PTransform}.
-   */
-  public static Unbound withSideInputs(
-      Iterable<? extends PCollectionView<?>> sideInputs) {
-    return new Unbound().withSideInputs(sideInputs);
-  }
-
-  /**
-   * Creates a multi-output {@link ParDo} {@link PTransform} whose
-   * output {@link PCollection}s will be referenced using the given main
-   * output and side output tags.
-   *
-   * <p>{@link TupleTag TupleTags} are used to name (with its static element
-   * type {@code T}) each main and side output {@code PCollection<T>}.
-   * This {@link PTransform PTransform's} {@link DoFn} emits elements to the main
-   * output {@link PCollection} as normal, using
-   * {@link DoFn.Context#output}. The {@link DoFn} emits elements to
-   * a side output {@code PCollection} using
-   * {@link DoFn.Context#sideOutput}, passing that side output's tag
-   * as an argument. The result of invoking this {@link PTransform}
-   * will be a {@link PCollectionTuple}, and any of the the main and
-   * side output {@code PCollection}s can be retrieved from it via
-   * {@link PCollectionTuple#get}, passing the output's tag as an
-   * argument.
-   *
-   * <p>See the discussion of Side Outputs above for more explanation.
-   *
-   * <p>The resulting {@link PTransform} is incomplete, and its input
-   * type is not yet bound. Use {@link ParDo.UnboundMulti#of}
-   * to specify the {@link DoFn} to invoke, which will also bind the
-   * input type of this {@link PTransform}.
-   */
-  public static <OutputT> UnboundMulti<OutputT> withOutputTags(
-      TupleTag<OutputT> mainOutputTag,
-      TupleTagList sideOutputTags) {
-    return new Unbound().withOutputTags(mainOutputTag, sideOutputTags);
-  }
-
-  /**
-   * Creates a {@link ParDo} {@link PTransform} that will invoke the
-   * given {@link DoFn} function.
-   *
-   * <p>The resulting {@link PTransform PTransform's} types have been bound, with the
-   * input being a {@code PCollection<InputT>} and the output a
-   * {@code PCollection<OutputT>}, inferred from the types of the argument
-   * {@code DoFn<InputT, OutputT>}. It is ready to be applied, or further
-   * properties can be set on it first.
-   */
-  public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
-    return new Unbound().of(fn);
-  }
-
-  private static <InputT, OutputT> DoFn<InputT, OutputT>
-      adapt(DoFnWithContext<InputT, OutputT> fn) {
-    return DoFnReflector.of(fn.getClass()).toDoFn(fn);
-  }
-
-  /**
-   * Creates a {@link ParDo} {@link PTransform} that will invoke the
-   * given {@link DoFnWithContext} function.
-   *
-   * <p>The resulting {@link PTransform PTransform's} types have been bound, with the
-   * input being a {@code PCollection<InputT>} and the output a
-   * {@code PCollection<OutputT>}, inferred from the types of the argument
-   * {@code DoFn<InputT, OutputT>}. It is ready to be applied, or further
-   * properties can be set on it first.
-   *
-   * <p>{@link DoFnWithContext} is an experimental alternative to
-   * {@link DoFn} which simplifies accessing the window of the element.
-   */
-  @Experimental
-  public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
-    return of(adapt(fn));
-  }
-
-  /**
-   * An incomplete {@link ParDo} transform, with unbound input/output types.
-   *
-   * <p>Before being applied, {@link ParDo.Unbound#of} must be
-   * invoked to specify the {@link DoFn} to invoke, which will also
-   * bind the input/output types of this {@link PTransform}.
-   */
-  public static class Unbound {
-    private final String name;
-    private final List<PCollectionView<?>> sideInputs;
-
-    Unbound() {
-      this(null, ImmutableList.<PCollectionView<?>>of());
-    }
-
-    Unbound(String name, List<PCollectionView<?>> sideInputs) {
-      this.name = name;
-      this.sideInputs = sideInputs;
-    }
-
-    /**
-     * Returns a new {@link ParDo} transform that's like this
-     * transform but with the specified name. Does not modify this
-     * transform. The resulting transform is still incomplete.
-     *
-     * <p>See the discussion of naming above for more explanation.
-     */
-    public Unbound named(String name) {
-      return new Unbound(name, sideInputs);
-    }
-
-    /**
-     * Returns a new {@link ParDo} transform that's like this
-     * transform but with the specified additional side inputs.
-     * Does not modify this transform. The resulting transform is
-     * still incomplete.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public Unbound withSideInputs(PCollectionView<?>... sideInputs) {
-      return withSideInputs(Arrays.asList(sideInputs));
-    }
-
-    /**
-     * Returns a new {@link ParDo} transform that is like this
-     * transform but with the specified additional side inputs. Does not modify
-     * this transform. The resulting transform is still incomplete.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public Unbound withSideInputs(
-        Iterable<? extends PCollectionView<?>> sideInputs) {
-      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
-      builder.addAll(this.sideInputs);
-      builder.addAll(sideInputs);
-      return new Unbound(name, builder.build());
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} transform that's like
-     * this transform but with the specified main and side output
-     * tags. Does not modify this transform. The resulting transform
-     * is still incomplete.
-     *
-     * <p>See the discussion of Side Outputs above and on
-     * {@link ParDo#withOutputTags} for more explanation.
-     */
-    public <OutputT> UnboundMulti<OutputT> withOutputTags(TupleTag<OutputT> mainOutputTag,
-                                              TupleTagList sideOutputTags) {
-      return new UnboundMulti<>(
-          name, sideInputs, mainOutputTag, sideOutputTags);
-    }
-
-    /**
-     * Returns a new {@link ParDo} {@link PTransform} that's like this
-     * transform but that will invoke the given {@link DoFn}
-     * function, and that has its input and output types bound. Does
-     * not modify this transform. The resulting {@link PTransform} is
-     * sufficiently specified to be applied, but more properties can
-     * still be specified.
-     */
-    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
-      return new Bound<>(name, sideInputs, fn);
-    }
-
-    /**
-     * Returns a new {@link ParDo} {@link PTransform} that's like this
-     * transform but which will invoke the given {@link DoFnWithContext}
-     * function, and which has its input and output types bound. Does
-     * not modify this transform. The resulting {@link PTransform} is
-     * sufficiently specified to be applied, but more properties can
-     * still be specified.
-     */
-    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
-      return of(adapt(fn));
-    }
-  }
-
-  /**
-   * A {@link PTransform} that, when applied to a {@code PCollection<InputT>},
-   * invokes a user-specified {@code DoFn<InputT, OutputT>} on all its elements,
-   * with all its outputs collected into an output
-   * {@code PCollection<OutputT>}.
-   *
-   * <p>A multi-output form of this transform can be created with
-   * {@link ParDo.Bound#withOutputTags}.
-   *
-   * @param <InputT> the type of the (main) input {@link PCollection} elements
-   * @param <OutputT> the type of the (main) output {@link PCollection} elements
-   */
-  public static class Bound<InputT, OutputT>
-      extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
-    // Inherits name.
-    private final List<PCollectionView<?>> sideInputs;
-    private final DoFn<InputT, OutputT> fn;
-
-    Bound(String name,
-          List<PCollectionView<?>> sideInputs,
-          DoFn<InputT, OutputT> fn) {
-      super(name);
-      this.sideInputs = sideInputs;
-      this.fn = SerializableUtils.clone(fn);
-    }
-
-    /**
-     * Returns a new {@link ParDo} {@link PTransform} that's like this
-     * {@link PTransform} but with the specified name. Does not
-     * modify this {@link PTransform}.
-     *
-     * <p>See the discussion of Naming above for more explanation.
-     */
-    public Bound<InputT, OutputT> named(String name) {
-      return new Bound<>(name, sideInputs, fn);
-    }
-
-    /**
-     * Returns a new {@link ParDo} {@link PTransform} that's like this
-     * {@link PTransform} but with the specified additional side inputs. Does not
-     * modify this {@link PTransform}.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public Bound<InputT, OutputT> withSideInputs(PCollectionView<?>... sideInputs) {
-      return withSideInputs(Arrays.asList(sideInputs));
-    }
-
-    /**
-     * Returns a new {@link ParDo} {@link PTransform} that's like this
-     * {@link PTransform} but with the specified additional side inputs. Does not
-     * modify this {@link PTransform}.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public Bound<InputT, OutputT> withSideInputs(
-        Iterable<? extends PCollectionView<?>> sideInputs) {
-      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
-      builder.addAll(this.sideInputs);
-      builder.addAll(sideInputs);
-      return new Bound<>(name, builder.build(), fn);
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this {@link PTransform} but with the specified main
-     * and side output tags. Does not modify this {@link PTransform}.
-     *
-     * <p>See the discussion of Side Outputs above and on
-     * {@link ParDo#withOutputTags} for more explanation.
-     */
-    public BoundMulti<InputT, OutputT> withOutputTags(TupleTag<OutputT> mainOutputTag,
-                                           TupleTagList sideOutputTags) {
-      return new BoundMulti<>(
-          name, sideInputs, mainOutputTag, sideOutputTags, fn);
-    }
-
-    @Override
-    public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
-      return PCollection.<OutputT>createPrimitiveOutputInternal(
-              input.getPipeline(),
-              input.getWindowingStrategy(),
-              input.isBounded())
-          .setTypeDescriptorInternal(fn.getOutputTypeDescriptor());
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    protected Coder<OutputT> getDefaultOutputCoder(PCollection<? extends InputT> input)
-        throws CannotProvideCoderException {
-      return input.getPipeline().getCoderRegistry().getDefaultCoder(
-          fn.getOutputTypeDescriptor(),
-          fn.getInputTypeDescriptor(),
-          ((PCollection<InputT>) input).getCoder());
-    }
-
-    @Override
-    protected String getKindString() {
-      Class<?> clazz = DoFnReflector.getDoFnClass(fn);
-      if (clazz.isAnonymousClass()) {
-        return "AnonymousParDo";
-      } else {
-        return String.format("ParDo(%s)", StringUtils.approximateSimpleName(clazz));
-      }
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * <p>{@link ParDo} registers its internal {@link DoFn} as a subcomponent for display metadata.
-     * {@link DoFn} implementations can register display data by overriding
-     * {@link DoFn#populateDisplayData}.
-     */
-    @Override
-    public void populateDisplayData(Builder builder) {
-      builder.include(fn);
-    }
-
-    public DoFn<InputT, OutputT> getFn() {
-      return fn;
-    }
-
-    public List<PCollectionView<?>> getSideInputs() {
-      return sideInputs;
-    }
-  }
-
-  /**
-   * An incomplete multi-output {@link ParDo} transform, with unbound
-   * input type.
-   *
-   * <p>Before being applied, {@link ParDo.UnboundMulti#of} must be
-   * invoked to specify the {@link DoFn} to invoke, which will also
-   * bind the input type of this {@link PTransform}.
-   *
-   * @param <OutputT> the type of the main output {@code PCollection} elements
-   */
-  public static class UnboundMulti<OutputT> {
-    private final String name;
-    private final List<PCollectionView<?>> sideInputs;
-    private final TupleTag<OutputT> mainOutputTag;
-    private final TupleTagList sideOutputTags;
-
-    UnboundMulti(String name,
-                 List<PCollectionView<?>> sideInputs,
-                 TupleTag<OutputT> mainOutputTag,
-                 TupleTagList sideOutputTags) {
-      this.name = name;
-      this.sideInputs = sideInputs;
-      this.mainOutputTag = mainOutputTag;
-      this.sideOutputTags = sideOutputTags;
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} transform that's like
-     * this transform but with the specified name. Does not modify
-     * this transform. The resulting transform is still incomplete.
-     *
-     * <p>See the discussion of Naming above for more explanation.
-     */
-    public UnboundMulti<OutputT> named(String name) {
-      return new UnboundMulti<>(
-          name, sideInputs, mainOutputTag, sideOutputTags);
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} transform that's like
-     * this transform but with the specified side inputs. Does not
-     * modify this transform. The resulting transform is still
-     * incomplete.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public UnboundMulti<OutputT> withSideInputs(
-        PCollectionView<?>... sideInputs) {
-      return withSideInputs(Arrays.asList(sideInputs));
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} transform that's like
-     * this transform but with the specified additional side inputs. Does not
-     * modify this transform. The resulting transform is still
-     * incomplete.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public UnboundMulti<OutputT> withSideInputs(
-        Iterable<? extends PCollectionView<?>> sideInputs) {
-      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
-      builder.addAll(this.sideInputs);
-      builder.addAll(sideInputs);
-      return new UnboundMulti<>(
-          name, builder.build(),
-          mainOutputTag, sideOutputTags);
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this transform but that will invoke the given
-     * {@link DoFn} function, and that has its input type bound.
-     * Does not modify this transform. The resulting
-     * {@link PTransform} is sufficiently specified to be applied, but
-     * more properties can still be specified.
-     */
-    public <InputT> BoundMulti<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
-      return new BoundMulti<>(
-          name, sideInputs, mainOutputTag, sideOutputTags, fn);
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this transform but which will invoke the given
-     * {@link DoFnWithContext} function, and which has its input type bound.
-     * Does not modify this transform. The resulting
-     * {@link PTransform} is sufficiently specified to be applied, but
-     * more properties can still be specified.
-     */
-    public <InputT> BoundMulti<InputT, OutputT> of(DoFnWithContext<InputT, OutputT> fn) {
-      return of(adapt(fn));
-    }
-  }
-
-  /**
-   * A {@link PTransform} that, when applied to a
-   * {@code PCollection<InputT>}, invokes a user-specified
-   * {@code DoFn<InputT, OutputT>} on all its elements, which can emit elements
-   * to any of the {@link PTransform}'s main and side output
-   * {@code PCollection}s, which are bundled into a result
-   * {@code PCollectionTuple}.
-   *
-   * @param <InputT> the type of the (main) input {@code PCollection} elements
-   * @param <OutputT> the type of the main output {@code PCollection} elements
-   */
-  public static class BoundMulti<InputT, OutputT>
-      extends PTransform<PCollection<? extends InputT>, PCollectionTuple> {
-    // Inherits name.
-    private final List<PCollectionView<?>> sideInputs;
-    private final TupleTag<OutputT> mainOutputTag;
-    private final TupleTagList sideOutputTags;
-    private final DoFn<InputT, OutputT> fn;
-
-    BoundMulti(String name,
-               List<PCollectionView<?>> sideInputs,
-               TupleTag<OutputT> mainOutputTag,
-               TupleTagList sideOutputTags,
-               DoFn<InputT, OutputT> fn) {
-      super(name);
-      this.sideInputs = sideInputs;
-      this.mainOutputTag = mainOutputTag;
-      this.sideOutputTags = sideOutputTags;
-      this.fn = SerializableUtils.clone(fn);
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this {@link PTransform} but with the specified
-     * name. Does not modify this {@link PTransform}.
-     *
-     * <p>See the discussion of Naming above for more explanation.
-     */
-    public BoundMulti<InputT, OutputT> named(String name) {
-      return new BoundMulti<>(
-          name, sideInputs, mainOutputTag, sideOutputTags, fn);
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this {@link PTransform} but with the specified additional side
-     * inputs. Does not modify this {@link PTransform}.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public BoundMulti<InputT, OutputT> withSideInputs(
-        PCollectionView<?>... sideInputs) {
-      return withSideInputs(Arrays.asList(sideInputs));
-    }
-
-    /**
-     * Returns a new multi-output {@link ParDo} {@link PTransform}
-     * that's like this {@link PTransform} but with the specified additional side
-     * inputs. Does not modify this {@link PTransform}.
-     *
-     * <p>See the discussion of Side Inputs above and on
-     * {@link ParDo#withSideInputs} for more explanation.
-     */
-    public BoundMulti<InputT, OutputT> withSideInputs(
-        Iterable<? extends PCollectionView<?>> sideInputs) {
-      ImmutableList.Builder<PCollectionView<?>> builder = ImmutableList.builder();
-      builder.addAll(this.sideInputs);
-      builder.addAll(sideInputs);
-      return new BoundMulti<>(
-          name, builder.build(),
-          mainOutputTag, sideOutputTags, fn);
-    }
-
-
-    @Override
-    public PCollectionTuple apply(PCollection<? extends InputT> input) {
-      PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
-          input.getPipeline(),
-          TupleTagList.of(mainOutputTag).and(sideOutputTags.getAll()),
-          input.getWindowingStrategy(),
-          input.isBounded());
-
-      // The fn will likely be an instance of an anonymous subclass
-      // such as DoFn<Integer, String> { }, thus will have a high-fidelity
-      // TypeDescriptor for the output type.
-      outputs.get(mainOutputTag).setTypeDescriptorInternal(fn.getOutputTypeDescriptor());
-
-      return outputs;
-    }
-
-    @Override
-    protected Coder<OutputT> getDefaultOutputCoder() {
-      throw new RuntimeException(
-          "internal error: shouldn't be calling this on a multi-output ParDo");
-    }
-
-    @Override
-    public <T> Coder<T> getDefaultOutputCoder(
-        PCollection<? extends InputT> input, TypedPValue<T> output)
-        throws CannotProvideCoderException {
-      @SuppressWarnings("unchecked")
-      Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder();
-      return input.getPipeline().getCoderRegistry().getDefaultCoder(
-          output.getTypeDescriptor(),
-          fn.getInputTypeDescriptor(),
-          inputCoder);
-      }
-
-    @Override
-    protected String getKindString() {
-      Class<?> clazz = DoFnReflector.getDoFnClass(fn);
-      if (fn.getClass().isAnonymousClass()) {
-        return "AnonymousParMultiDo";
-      } else {
-        return String.format("ParMultiDo(%s)", StringUtils.approximateSimpleName(clazz));
-      }
-    }
-
-    public DoFn<InputT, OutputT> getFn() {
-      return fn;
-    }
-
-    public TupleTag<OutputT> getMainOutputTag() {
-      return mainOutputTag;
-    }
-
-    public TupleTagList getSideOutputTags() {
-      return sideOutputTags;
-    }
-
-    public List<PCollectionView<?>> getSideInputs() {
-      return sideInputs;
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  static {
-    DirectPipelineRunner.registerDefaultTransformEvaluator(
-        Bound.class,
-        new DirectPipelineRunner.TransformEvaluator<Bound>() {
-          @Override
-          public void evaluate(
-              Bound transform,
-              DirectPipelineRunner.EvaluationContext context) {
-            evaluateSingleHelper(transform, context);
-          }
-        });
-  }
-
-  private static <InputT, OutputT> void evaluateSingleHelper(
-      Bound<InputT, OutputT> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-    TupleTag<OutputT> mainOutputTag = new TupleTag<>("out");
-
-    DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
-
-    PCollectionTuple outputs = PCollectionTuple.of(mainOutputTag, context.getOutput(transform));
-
-    evaluateHelper(
-        transform.fn,
-        context.getStepName(transform),
-        context.getInput(transform),
-        transform.sideInputs,
-        mainOutputTag,
-        Collections.<TupleTag<?>>emptyList(),
-        outputs,
-        context,
-        executionContext);
-
-    context.setPCollectionValuesWithMetadata(
-        context.getOutput(transform),
-        executionContext.getOutput(mainOutputTag));
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  static {
-    DirectPipelineRunner.registerDefaultTransformEvaluator(
-        BoundMulti.class,
-        new DirectPipelineRunner.TransformEvaluator<BoundMulti>() {
-          @Override
-          public void evaluate(
-              BoundMulti transform,
-              DirectPipelineRunner.EvaluationContext context) {
-            evaluateMultiHelper(transform, context);
-          }
-        });
-  }
-
-  private static <InputT, OutputT> void evaluateMultiHelper(
-      BoundMulti<InputT, OutputT> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-
-    DirectModeExecutionContext executionContext = DirectModeExecutionContext.create();
-
-    evaluateHelper(
-        transform.fn,
-        context.getStepName(transform),
-        context.getInput(transform),
-        transform.sideInputs,
-        transform.mainOutputTag,
-        transform.sideOutputTags.getAll(),
-        context.getOutput(transform),
-        context,
-        executionContext);
-
-    for (Map.Entry<TupleTag<?>, PCollection<?>> entry
-        : context.getOutput(transform).getAll().entrySet()) {
-      @SuppressWarnings("unchecked")
-      TupleTag<Object> tag = (TupleTag<Object>) entry.getKey();
-      @SuppressWarnings("unchecked")
-      PCollection<Object> pc = (PCollection<Object>) entry.getValue();
-
-      context.setPCollectionValuesWithMetadata(
-          pc,
-          (tag == transform.mainOutputTag
-              ? executionContext.getOutput(tag)
-              : executionContext.getSideOutput(tag)));
-    }
-  }
-
-  /**
-   * Evaluates a single-output or multi-output {@link ParDo} directly.
-   *
-   * <p>This evaluation method is intended for use in testing scenarios; it is designed for clarity
-   * and correctness-checking, not speed.
-   *
-   * <p>Of particular note, this performs best-effort checking that inputs and outputs are not
-   * mutated in violation of the requirements upon a {@link DoFn}.
-   */
-  private static <InputT, OutputT, ActualInputT extends InputT> void evaluateHelper(
-      DoFn<InputT, OutputT> doFn,
-      String stepName,
-      PCollection<ActualInputT> input,
-      List<PCollectionView<?>> sideInputs,
-      TupleTag<OutputT> mainOutputTag,
-      List<TupleTag<?>> sideOutputTags,
-      PCollectionTuple outputs,
-      DirectPipelineRunner.EvaluationContext context,
-      DirectModeExecutionContext executionContext) {
-    // TODO: Run multiple shards?
-    DoFn<InputT, OutputT> fn = context.ensureSerializable(doFn);
-
-    SideInputReader sideInputReader = makeSideInputReader(context, sideInputs);
-
-    // When evaluating via the DirectPipelineRunner, this output manager checks each output for
-    // illegal mutations when the next output comes along. We then verify again after finishBundle()
-    // The common case we expect this to catch is a user mutating an input in order to repeatedly
-    // emit "variations".
-    ImmutabilityCheckingOutputManager<ActualInputT> outputManager =
-        new ImmutabilityCheckingOutputManager<>(
-            fn.getClass().getSimpleName(),
-            new DoFnRunnerBase.ListOutputManager(),
-            outputs);
-
-    DoFnRunner<InputT, OutputT> fnRunner =
-        DoFnRunners.createDefault(
-            context.getPipelineOptions(),
-            fn,
-            sideInputReader,
-            outputManager,
-            mainOutputTag,
-            sideOutputTags,
-            executionContext.getOrCreateStepContext(stepName, stepName, null),
-            context.getAddCounterMutator(),
-            input.getWindowingStrategy());
-
-    fnRunner.startBundle();
-
-    for (DirectPipelineRunner.ValueWithMetadata<ActualInputT> elem
-             : context.getPCollectionValuesWithMetadata(input)) {
-      if (elem.getValue() instanceof KV) {
-        // In case the DoFn needs keyed state, set the implicit keys to the keys
-        // in the input elements.
-        @SuppressWarnings("unchecked")
-        KV<?, ?> kvElem = (KV<?, ?>) elem.getValue();
-        executionContext.setKey(kvElem.getKey());
-      } else {
-        executionContext.setKey(elem.getKey());
-      }
-
-      // We check the input for mutations only through the call span of processElement.
-      // This will miss some cases, but the check is ad hoc and best effort. The common case
-      // is that the input is mutated to be used for output.
-      try {
-        MutationDetector inputMutationDetector = MutationDetectors.forValueWithCoder(
-            elem.getWindowedValue().getValue(), input.getCoder());
-        @SuppressWarnings("unchecked")
-        WindowedValue<InputT> windowedElem = ((WindowedValue<InputT>) elem.getWindowedValue());
-        fnRunner.processElement(windowedElem);
-        inputMutationDetector.verifyUnmodified();
-      } catch (CoderException e) {
-        throw UserCodeException.wrap(e);
-      } catch (IllegalMutationException exn) {
-        throw new IllegalMutationException(
-            String.format("DoFn %s mutated input value %s of class %s (new value was %s)."
-                + " Input values must not be mutated in any way.",
-                fn.getClass().getSimpleName(),
-                exn.getSavedValue(), exn.getSavedValue().getClass(), exn.getNewValue()),
-            exn.getSavedValue(),
-            exn.getNewValue(),
-            exn);
-      }
-    }
-
-    // Note that the input could have been retained and mutated prior to this final output,
-    // but for now it degrades readability too much to be worth trying to catch that particular
-    // corner case.
-    fnRunner.finishBundle();
-    outputManager.verifyLatestOutputsUnmodified();
-  }
-
-  private static SideInputReader makeSideInputReader(
-      DirectPipelineRunner.EvaluationContext context, List<PCollectionView<?>> sideInputs) {
-    PTuple sideInputValues = PTuple.empty();
-    for (PCollectionView<?> view : sideInputs) {
-      sideInputValues = sideInputValues.and(
-          view.getTagInternal(),
-          context.getPCollectionView(view));
-    }
-    return DirectSideInputReader.of(sideInputValues);
-  }
-
-  /**
-   * A {@code DoFnRunner.OutputManager} that provides facilities for checking output values for
-   * illegal mutations.
-   *
-   * <p>When used via the try-with-resources pattern, it is guaranteed that every value passed
-   * to {@link #output} will have been checked for illegal mutation.
-   */
-  private static class ImmutabilityCheckingOutputManager<InputT>
-      implements DoFnRunners.OutputManager, AutoCloseable {
-
-    private final DoFnRunners.OutputManager underlyingOutputManager;
-    private final ConcurrentMap<TupleTag<?>, MutationDetector> mutationDetectorForTag;
-    private final PCollectionTuple outputs;
-    private String doFnName;
-
-    public ImmutabilityCheckingOutputManager(
-        String doFnName,
-        DoFnRunners.OutputManager underlyingOutputManager,
-        PCollectionTuple outputs) {
-      this.doFnName = doFnName;
-      this.underlyingOutputManager = underlyingOutputManager;
-      this.outputs = outputs;
-      this.mutationDetectorForTag = Maps.newConcurrentMap();
-    }
-
-    @Override
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-
-      // Skip verifying undeclared outputs, since we don't have coders for them.
-      if (outputs.has(tag)) {
-        try {
-          MutationDetector newDetector =
-              MutationDetectors.forValueWithCoder(
-                  output.getValue(), outputs.get(tag).getCoder());
-          MutationDetector priorDetector = mutationDetectorForTag.put(tag, newDetector);
-          verifyOutputUnmodified(priorDetector);
-        } catch (CoderException e) {
-          throw UserCodeException.wrap(e);
-        }
-      }
-
-      // Actually perform the output.
-      underlyingOutputManager.output(tag, output);
-    }
-
-    /**
-     * Throws {@link IllegalMutationException} if the prior output for any tag has been mutated
-     * since being output.
-     */
-    public void verifyLatestOutputsUnmodified() {
-      for (MutationDetector detector : mutationDetectorForTag.values()) {
-        verifyOutputUnmodified(detector);
-      }
-    }
-
-    /**
-     * Adapts the error message from the provided {@code detector}.
-     *
-     * <p>The {@code detector} may be null, in which case no check is performed. This is merely
-     * to consolidate null checking to this method.
-     */
-    private <T> void verifyOutputUnmodified(@Nullable MutationDetector detector) {
-      if (detector == null) {
-        return;
-      }
-
-      try {
-        detector.verifyUnmodified();
-      } catch (IllegalMutationException exn) {
-        throw new IllegalMutationException(String.format(
-            "DoFn %s mutated value %s after it was output (new value was %s)."
-                + " Values must not be mutated in any way after being output.",
-                doFnName, exn.getSavedValue(), exn.getNewValue()),
-            exn.getSavedValue(), exn.getNewValue(),
-            exn);
-      }
-    }
-
-    /**
-     * When used in a {@code try}-with-resources block, verifies all of the latest outputs upon
-     * {@link #close()}.
-     */
-    @Override
-    public void close() {
-      verifyLatestOutputsUnmodified();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
deleted file mode 100644
index bbbccbc..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Partition.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-
-import java.io.Serializable;
-
-/**
- * {@code Partition} takes a {@code PCollection<T>} and a
- * {@code PartitionFn}, uses the {@code PartitionFn} to split the
- * elements of the input {@code PCollection} into {@code N} partitions, and
- * returns a {@code PCollectionList<T>} that bundles {@code N}
- * {@code PCollection<T>}s containing the split elements.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<Student> students = ...;
- * // Split students up into 10 partitions, by percentile:
- * PCollectionList<Student> studentsByPercentile =
- *     students.apply(Partition.of(10, new PartitionFn<Student>() {
- *         public int partitionFor(Student student, int numPartitions) {
- *             return student.getPercentile()  // 0..99
- *                  * numPartitions / 100;
- *         }}))
- * for (int i = 0; i < 10; i++) {
- *   PCollection<Student> partition = studentsByPercentile.get(i);
- *   ...
- * }
- * } </pre>
- *
- * <p>By default, the {@code Coder} of each of the
- * {@code PCollection}s in the output {@code PCollectionList} is the
- * same as the {@code Coder} of the input {@code PCollection}.
- *
- * <p>Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and each output {@code PCollection}
- * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * associated with it as the input.
- *
- * @param <T> the type of the elements of the input and output
- * {@code PCollection}s
- */
-public class Partition<T> extends PTransform<PCollection<T>, PCollectionList<T>> {
-
-  /**
-   * A function object that chooses an output partition for an element.
-   *
-   * @param <T> the type of the elements being partitioned
-   */
-  public interface PartitionFn<T> extends Serializable {
-    /**
-     * Chooses the partition into which to put the given element.
-     *
-     * @param elem the element to be partitioned
-     * @param numPartitions the total number of partitions ({@code >= 1})
-     * @return index of the selected partition (in the range
-     * {@code [0..numPartitions-1]})
-     */
-    public int partitionFor(T elem, int numPartitions);
-  }
-
-  /**
-   * Returns a new {@code Partition} {@code PTransform} that divides
-   * its input {@code PCollection} into the given number of partitions,
-   * using the given partitioning function.
-   *
-   * @param numPartitions the number of partitions to divide the input
-   * {@code PCollection} into
-   * @param partitionFn the function to invoke on each element to
-   * choose its output partition
-   * @throws IllegalArgumentException if {@code numPartitions <= 0}
-   */
-  public static <T> Partition<T> of(
-      int numPartitions, PartitionFn<? super T> partitionFn) {
-    return new Partition<>(new PartitionDoFn<T>(numPartitions, partitionFn));
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  @Override
-  public PCollectionList<T> apply(PCollection<T> in) {
-    final TupleTagList outputTags = partitionDoFn.getOutputTags();
-
-    PCollectionTuple outputs = in.apply(
-        ParDo
-        .withOutputTags(new TupleTag<Void>(){}, outputTags)
-        .of(partitionDoFn));
-
-    PCollectionList<T> pcs = PCollectionList.empty(in.getPipeline());
-    Coder<T> coder = in.getCoder();
-
-    for (TupleTag<?> outputTag : outputTags.getAll()) {
-      // All the tuple tags are actually TupleTag<T>
-      // And all the collections are actually PCollection<T>
-      @SuppressWarnings("unchecked")
-      TupleTag<T> typedOutputTag = (TupleTag<T>) outputTag;
-      pcs = pcs.and(outputs.get(typedOutputTag).setCoder(coder));
-    }
-    return pcs;
-  }
-
-  private final transient PartitionDoFn<T> partitionDoFn;
-
-  private Partition(PartitionDoFn<T> partitionDoFn) {
-    this.partitionDoFn = partitionDoFn;
-  }
-
-  private static class PartitionDoFn<X> extends DoFn<X, Void> {
-    private final int numPartitions;
-    private final PartitionFn<? super X> partitionFn;
-    private final TupleTagList outputTags;
-
-    /**
-     * Constructs a PartitionDoFn.
-     *
-     * @throws IllegalArgumentException if {@code numPartitions <= 0}
-     */
-    public PartitionDoFn(int numPartitions, PartitionFn<? super X> partitionFn) {
-      if (numPartitions <= 0) {
-        throw new IllegalArgumentException("numPartitions must be > 0");
-      }
-
-      this.numPartitions = numPartitions;
-      this.partitionFn = partitionFn;
-
-      TupleTagList buildOutputTags = TupleTagList.empty();
-      for (int partition = 0; partition < numPartitions; partition++) {
-        buildOutputTags = buildOutputTags.and(new TupleTag<X>());
-      }
-      outputTags = buildOutputTags;
-    }
-
-    public TupleTagList getOutputTags() {
-      return outputTags;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      X input = c.element();
-      int partition = partitionFn.partitionFor(input, numPartitions);
-      if (0 <= partition && partition < numPartitions) {
-        @SuppressWarnings("unchecked")
-        TupleTag<X> typedTag = (TupleTag<X>) outputTags.get(partition);
-        c.sideOutput(typedTag, input);
-      } else {
-        throw new IndexOutOfBoundsException(
-            "Partition function returned out of bounds index: " +
-            partition + " not in [0.." + numPartitions + ")");
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
deleted file mode 100644
index 8913138..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/RemoveDuplicates.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-/**
- * {@code RemoveDuplicates<T>} takes a {@code PCollection<T>} and
- * returns a {@code PCollection<T>} that has all the elements of the
- * input but with duplicate elements removed such that each element is
- * unique within each window.
- *
- * <p>Two values of type {@code T} are compared for equality <b>not</b> by
- * regular Java {@link Object#equals}, but instead by first encoding
- * each of the elements using the {@code PCollection}'s {@code Coder}, and then
- * comparing the encoded bytes.  This admits efficient parallel
- * evaluation.
- *
- * <p>Optionally, a function may be provided that maps each element to a representative
- * value.  In this case, two elements will be considered duplicates if they have equal
- * representative values, with equality being determined as above.
- *
- * <p>By default, the {@code Coder} of the output {@code PCollection}
- * is the same as the {@code Coder} of the input {@code PCollection}.
- *
- * <p>Each output element is in the same window as its corresponding input
- * element, and has the timestamp of the end of that window.  The output
- * {@code PCollection} has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * as the input.
- *
- * <p>Does not preserve any order the input PCollection might have had.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<String> words = ...;
- * PCollection<String> uniqueWords =
- *     words.apply(RemoveDuplicates.<String>create());
- * } </pre>
- *
- * @param <T> the type of the elements of the input and output
- * {@code PCollection}s
- */
-public class RemoveDuplicates<T> extends PTransform<PCollection<T>,
-                                                    PCollection<T>> {
-  /**
-   * Returns a {@code RemoveDuplicates<T>} {@code PTransform}.
-   *
-   * @param <T> the type of the elements of the input and output
-   * {@code PCollection}s
-   */
-  public static <T> RemoveDuplicates<T> create() {
-    return new RemoveDuplicates<T>();
-  }
-
-  /**
-   * Returns a {@code RemoveDuplicates<T, IdT>} {@code PTransform}.
-   *
-   * @param <T> the type of the elements of the input and output
-   * {@code PCollection}s
-   * @param <IdT> the type of the representative value used to dedup
-   */
-  public static <T, IdT> WithRepresentativeValues<T, IdT> withRepresentativeValueFn(
-      SerializableFunction<T, IdT> fn) {
-    return new WithRepresentativeValues<T, IdT>(fn, null);
-  }
-
-  @Override
-  public PCollection<T> apply(PCollection<T> in) {
-    return in
-        .apply(ParDo.named("CreateIndex")
-            .of(new DoFn<T, KV<T, Void>>() {
-                  @Override
-                  public void processElement(ProcessContext c) {
-                    c.output(KV.of(c.element(), (Void) null));
-                  }
-                }))
-        .apply(Combine.<T, Void>perKey(
-            new SerializableFunction<Iterable<Void>, Void>() {
-              @Override
-              public Void apply(Iterable<Void> iter) {
-                return null; // ignore input
-                }
-            }))
-        .apply(Keys.<T>create());
-  }
-
-  /**
-   * A {@link RemoveDuplicates} {@link PTransform} that uses a {@link SerializableFunction} to
-   * obtain a representative value for each input element.
-   *
-   * Construct via {@link RemoveDuplicates#withRepresentativeValueFn(SerializableFunction)}.
-   *
-   * @param <T> the type of input and output element
-   * @param <IdT> the type of representative values used to dedup
-   */
-  public static class WithRepresentativeValues<T, IdT>
-      extends PTransform<PCollection<T>, PCollection<T>> {
-    private final SerializableFunction<T, IdT> fn;
-    private final TypeDescriptor<IdT> representativeType;
-
-    private WithRepresentativeValues(
-        SerializableFunction<T, IdT> fn, TypeDescriptor<IdT> representativeType) {
-      this.fn = fn;
-      this.representativeType = representativeType;
-    }
-
-    @Override
-    public PCollection<T> apply(PCollection<T> in) {
-      WithKeys<IdT, T> withKeys = WithKeys.of(fn);
-      if (representativeType != null) {
-        withKeys = withKeys.withKeyType(representativeType);
-      }
-      return in
-          .apply(withKeys)
-          .apply(Combine.<IdT, T, T>perKey(
-              new Combine.BinaryCombineFn<T>() {
-                @Override
-                public T apply(T left, T right) {
-                  return left;
-                }
-              }))
-          .apply(Values.<T>create());
-    }
-
-    /**
-     * Return a {@code WithRepresentativeValues} {@link PTransform} that is like this one, but with
-     * the specified output type descriptor.
-     *
-     * Required for use of {@link RemoveDuplicates#withRepresentativeValueFn(SerializableFunction)}
-     * in Java 8 with a lambda as the fn.
-     *
-     * @param type a {@link TypeDescriptor} describing the representative type of this
-     *             {@code WithRepresentativeValues}
-     * @return A {@code WithRepresentativeValues} {@link PTransform} that is like this one, but with
-     *         the specified output type descriptor.
-     */
-    public WithRepresentativeValues<T, IdT> withRepresentativeType(TypeDescriptor<IdT> type) {
-      return new WithRepresentativeValues<>(fn, type);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
deleted file mode 100644
index c5b6e7e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sample.java
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.base.Preconditions;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-/**
- * {@code PTransform}s for taking samples of the elements in a
- * {@code PCollection}, or samples of the values associated with each
- * key in a {@code PCollection} of {@code KV}s.
- **/
-public class Sample {
-
-  /**
-   * {@code Sample#any(long)} takes a {@code PCollection<T>} and a limit, and
-   * produces a new {@code PCollection<T>} containing up to limit
-   * elements of the input {@code PCollection}.
-   *
-   * <p>If limit is less than or equal to the size of the input
-   * {@code PCollection}, then all the input's elements will be selected.
-   *
-   * <p>All of the elements of the output {@code PCollection} should fit into
-   * main memory of a single worker machine.  This operation does not
-   * run in parallel.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<String> input = ...;
-   * PCollection<String> output = input.apply(Sample.<String>any(100));
-   * } </pre>
-   *
-   * @param <T> the type of the elements of the input and output
-   * {@code PCollection}s
-   * @param limit the number of elements to take from the input
-   */
-  public static <T> PTransform<PCollection<T>, PCollection<T>> any(long limit) {
-    return new SampleAny<>(limit);
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes a {@code PCollection<T>},
-   * selects {@code sampleSize} elements, uniformly at random, and returns a
-   * {@code PCollection<Iterable<T>>} containing the selected elements.
-   * If the input {@code PCollection} has fewer than
-   * {@code sampleSize} elements, then the output {@code Iterable<T>}
-   * will be all the input's elements.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<String> pc = ...;
-   * PCollection<Iterable<String>> sampleOfSize10 =
-   *     pc.apply(Sample.fixedSizeGlobally(10));
-   * } </pre>
-   *
-   * @param sampleSize the number of elements to select; must be {@code >= 0}
-   * @param <T> the type of the elements
-   */
-  public static <T> PTransform<PCollection<T>, PCollection<Iterable<T>>>
-      fixedSizeGlobally(int sampleSize) {
-    return Combine.globally(new FixedSizedSampleFn<T>(sampleSize));
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, V>>} and returns a
-   * {@code PCollection<KV<K, Iterable<V>>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to a sample of {@code sampleSize} values
-   * associated with that key in the input {@code PCollection}, taken
-   * uniformly at random.  If a key in the input {@code PCollection}
-   * has fewer than {@code sampleSize} values associated with it, then
-   * the output {@code Iterable<V>} associated with that key will be
-   * all the values associated with that key in the input
-   * {@code PCollection}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<String, Integer>> pc = ...;
-   * PCollection<KV<String, Iterable<Integer>>> sampleOfSize10PerKey =
-   *     pc.apply(Sample.<String, Integer>fixedSizePerKey());
-   * } </pre>
-   *
-   * @param sampleSize the number of values to select for each
-   * distinct key; must be {@code >= 0}
-   * @param <K> the type of the keys
-   * @param <V> the type of the values
-   */
-  public static <K, V> PTransform<PCollection<KV<K, V>>,
-                                  PCollection<KV<K, Iterable<V>>>>
-      fixedSizePerKey(int sampleSize) {
-    return Combine.perKey(new FixedSizedSampleFn<V>(sampleSize));
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@link PTransform} that takes a {@code PCollection<T>} and a limit, and
-   * produces a new {@code PCollection<T>} containing up to limit
-   * elements of the input {@code PCollection}.
-   */
-  public static class SampleAny<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    private final long limit;
-
-    /**
-     * Constructs a {@code SampleAny<T>} PTransform that, when applied,
-     * produces a new PCollection containing up to {@code limit}
-     * elements of its input {@code PCollection}.
-     */
-    private SampleAny(long limit) {
-      Preconditions.checkArgument(limit >= 0, "Expected non-negative limit, received %s.", limit);
-      this.limit = limit;
-    }
-
-    @Override
-    public PCollection<T> apply(PCollection<T> in) {
-      PCollectionView<Iterable<T>> iterableView = in.apply(View.<T>asIterable());
-      return
-          in.getPipeline()
-          .apply(Create.of((Void) null).withCoder(VoidCoder.of()))
-          .apply(ParDo
-                 .withSideInputs(iterableView)
-                 .of(new SampleAnyDoFn<>(limit, iterableView)))
-          .setCoder(in.getCoder());
-    }
-  }
-
-  /**
-   * A {@link DoFn} that returns up to limit elements from the side input PCollection.
-   */
-  private static class SampleAnyDoFn<T> extends DoFn<Void, T> {
-    long limit;
-    final PCollectionView<Iterable<T>> iterableView;
-
-    public SampleAnyDoFn(long limit, PCollectionView<Iterable<T>> iterableView) {
-      this.limit = limit;
-      this.iterableView = iterableView;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      for (T i : c.sideInput(iterableView)) {
-        if (limit-- <= 0) {
-          break;
-        }
-        c.output(i);
-      }
-    }
-  }
-
-  /**
-   * {@code CombineFn} that computes a fixed-size sample of a
-   * collection of values.
-   *
-   * @param <T> the type of the elements
-   */
-  public static class FixedSizedSampleFn<T>
-      extends CombineFn<T,
-          Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>,
-          Iterable<T>> {
-    private final Top.TopCombineFn<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>
-        topCombineFn;
-    private final Random rand = new Random();
-
-    private FixedSizedSampleFn(int sampleSize) {
-      if (sampleSize < 0) {
-        throw new IllegalArgumentException("sample size must be >= 0");
-      }
-      topCombineFn = new Top.TopCombineFn<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>(
-          sampleSize, new KV.OrderByKey<Integer, T>());
-    }
-
-    @Override
-    public Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>
-        createAccumulator() {
-      return topCombineFn.createAccumulator();
-    }
-
-    @Override
-    public Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>> addInput(
-        Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>> accumulator,
-        T input) {
-      accumulator.addInput(KV.of(rand.nextInt(), input));
-      return accumulator;
-    }
-
-    @Override
-    public Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>
-        mergeAccumulators(
-            Iterable<Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>>
-            accumulators) {
-      return topCombineFn.mergeAccumulators(accumulators);
-    }
-
-    @Override
-    public Iterable<T> extractOutput(
-        Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>> accumulator) {
-      List<T> out = new ArrayList<>();
-      for (KV<Integer, T> element : accumulator.extractOutput()) {
-        out.add(element.getValue());
-      }
-      return out;
-    }
-
-    @Override
-    public Coder<Top.BoundedHeap<KV<Integer, T>, SerializableComparator<KV<Integer, T>>>>
-        getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return topCombineFn.getAccumulatorCoder(
-          registry, KvCoder.of(BigEndianIntegerCoder.of(), inputCoder));
-    }
-
-    @Override
-    public Coder<Iterable<T>> getDefaultOutputCoder(
-        CoderRegistry registry, Coder<T> inputCoder) {
-      return IterableCoder.of(inputCoder);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
deleted file mode 100644
index 7d41917..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableComparator.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import java.io.Serializable;
-import java.util.Comparator;
-
-/**
- * A {@code Comparator} that is also {@code Serializable}.
- *
- * @param <T> type of values being compared
- */
-public interface SerializableComparator<T> extends Comparator<T>, Serializable {
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
deleted file mode 100644
index 81bf3d4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SerializableFunction.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import java.io.Serializable;
-
-/**
- * A function that computes an output value of type {@code OutputT} from an input value of type
- * {@code InputT} and is {@link Serializable}.
- *
- * @param <InputT> input value type
- * @param <OutputT> output value type
- */
-public interface SerializableFunction<InputT, OutputT> extends Serializable {
-  /** Returns the result of invoking this function on the given input. */
-  public OutputT apply(InputT input);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
deleted file mode 100644
index ef6fd81..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/SimpleFunction.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-/**
- * A {@link SerializableFunction} which is not a <i>functional interface</i>.
- * Concrete subclasses allow us to infer type information, which in turn aids
- * {@link Coder} inference.
- */
-public abstract class SimpleFunction<InputT, OutputT>
-    implements SerializableFunction<InputT, OutputT> {
-
-  /**
-   * Returns a {@link TypeDescriptor} capturing what is known statically
-   * about the input type of this {@code DoFn} instance's most-derived
-   * class.
-   *
-   * <p>See {@link #getOutputTypeDescriptor} for more discussion.
-   */
-  public TypeDescriptor<InputT> getInputTypeDescriptor() {
-    return new TypeDescriptor<InputT>(this) {};
-  }
-
-  /**
-   * Returns a {@link TypeDescriptor} capturing what is known statically
-   * about the output type of this {@code DoFn} instance's
-   * most-derived class.
-   *
-   * <p>In the normal case of a concrete {@code DoFn} subclass with
-   * no generic type parameters of its own (including anonymous inner
-   * classes), this will be a complete non-generic type, which is good
-   * for choosing a default output {@code Coder<OutputT>} for the output
-   * {@code PCollection<OutputT>}.
-   */
-  public TypeDescriptor<OutputT> getOutputTypeDescriptor() {
-    return new TypeDescriptor<OutputT>(this) {};
-  }
-}

[58/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
new file mode 100644
index 0000000..c57a5f2
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
+import com.google.cloud.dataflow.sdk.transforms.Top;
+import com.google.cloud.dataflow.sdk.transforms.windowing.CalendarWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.List;
+
+/**
+ * An example that reads Wikipedia edit data from Cloud Storage and computes the user with
+ * the longest string of edits separated by no more than an hour within each month.
+ *
+ * <p>Concepts: Using Windowing to perform time-based aggregations of data.
+ *
+ * <p>It is not recommended to execute this pipeline locally, given the size of the default input
+ * data.
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and an output prefix on GCS:
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
+ *
+ * <p>The default input is {@code gs://dataflow-samples/wikipedia_edits/*.json} and can be
+ * overridden with {@code --input}.
+ *
+ * <p>The input for this example is large enough that it's a good place to enable (experimental)
+ * autoscaling:
+ * <pre>{@code
+ *   --autoscalingAlgorithm=BASIC
+ *   --maxNumWorkers=20
+ * }
+ * </pre>
+ * This will automatically scale the number of workers up over time until the job completes.
+ */
+public class TopWikipediaSessions {
+  private static final String EXPORTED_WIKI_TABLE = "gs://dataflow-samples/wikipedia_edits/*.json";
+
+  /**
+   * Extracts user and timestamp from a TableRow representing a Wikipedia edit.
+   */
+  static class ExtractUserAndTimestamp extends DoFn<TableRow, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      int timestamp = (Integer) row.get("timestamp");
+      String userName = (String) row.get("contributor_username");
+      if (userName != null) {
+        // Sets the implicit timestamp field to be used in windowing.
+        c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
+      }
+    }
+  }
+
+  /**
+   * Computes the number of edits in each user session.  A session is defined as
+   * a string of edits where each is separated from the next by less than an hour.
+   */
+  static class ComputeSessions
+      extends PTransform<PCollection<String>, PCollection<KV<String, Long>>> {
+    @Override
+    public PCollection<KV<String, Long>> apply(PCollection<String> actions) {
+      return actions
+          .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardHours(1))))
+
+          .apply(Count.<String>perElement());
+    }
+  }
+
+  /**
+   * Computes the longest session ending in each month.
+   */
+  private static class TopPerMonth
+      extends PTransform<PCollection<KV<String, Long>>, PCollection<List<KV<String, Long>>>> {
+    @Override
+    public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> sessions) {
+      return sessions
+        .apply(Window.<KV<String, Long>>into(CalendarWindows.months(1)))
+
+          .apply(Top.of(1, new SerializableComparator<KV<String, Long>>() {
+                    @Override
+                    public int compare(KV<String, Long> o1, KV<String, Long> o2) {
+                      return Long.compare(o1.getValue(), o2.getValue());
+                    }
+                  }).withoutDefaults());
+    }
+  }
+
+  static class SessionsToStringsDoFn extends DoFn<KV<String, Long>, KV<String, Long>>
+      implements RequiresWindowAccess {
+
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(KV.of(
+          c.element().getKey() + " : " + c.window(), c.element().getValue()));
+    }
+  }
+
+  static class FormatOutputDoFn extends DoFn<List<KV<String, Long>>, String>
+      implements RequiresWindowAccess {
+    @Override
+    public void processElement(ProcessContext c) {
+      for (KV<String, Long> item : c.element()) {
+        String session = item.getKey();
+        long count = item.getValue();
+        c.output(session + " : " + count + " : " + ((IntervalWindow) c.window()).start());
+      }
+    }
+  }
+
+  static class ComputeTopSessions extends PTransform<PCollection<TableRow>, PCollection<String>> {
+
+    private final double samplingThreshold;
+
+    public ComputeTopSessions(double samplingThreshold) {
+      this.samplingThreshold = samplingThreshold;
+    }
+
+    @Override
+    public PCollection<String> apply(PCollection<TableRow> input) {
+      return input
+          .apply(ParDo.of(new ExtractUserAndTimestamp()))
+
+          .apply(ParDo.named("SampleUsers").of(
+              new DoFn<String, String>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  if (Math.abs(c.element().hashCode()) <= Integer.MAX_VALUE * samplingThreshold) {
+                    c.output(c.element());
+                  }
+                }
+              }))
+
+          .apply(new ComputeSessions())
+
+          .apply(ParDo.named("SessionsToStrings").of(new SessionsToStringsDoFn()))
+          .apply(new TopPerMonth())
+          .apply(ParDo.named("FormatOutput").of(new FormatOutputDoFn()));
+    }
+  }
+
+  /**
+   * Options supported by this class.
+   *
+   * <p>Inherits standard Dataflow configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description(
+      "Input specified as a GCS path containing a BigQuery table exported as json")
+    @Default.String(EXPORTED_WIKI_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("File to output results to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(Options.class);
+    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+
+    Pipeline p = Pipeline.create(dataflowOptions);
+
+    double samplingThreshold = 0.1;
+
+    p.apply(TextIO.Read
+        .from(options.getInput())
+        .withCoder(TableRowJsonCoder.of()))
+     .apply(new ComputeTopSessions(samplingThreshold))
+     .apply(TextIO.Write.named("Write").withoutSharding().to(options.getOutput()));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
new file mode 100644
index 0000000..2d54252
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
+
+import org.apache.avro.reflect.Nullable;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A Dataflow Example that runs in both batch and streaming modes with traffic sensor data.
+ * You can configure the running mode by setting {@literal --streaming} to true or false.
+ *
+ * <p>Concepts: The batch and streaming runners, sliding windows, Google Cloud Pub/Sub
+ * topic injection, use of the AvroCoder to encode a custom class, and custom Combine transforms.
+ *
+ * <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
+ * it finds the lane that had the highest flow recorded, for each sensor station. It writes
+ * those max values along with auxiliary info to a BigQuery table.
+ *
+ * <p>In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
+ *
+ * <p>In streaming mode, the pipeline reads the data from a Pub/Sub topic.
+ * By default, the example will run a separate pipeline to inject the data from the default
+ * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
+ * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
+ * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
+ * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
+ * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
+ * is provided in
+ * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
+ *
+ * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
+ * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
+ *
+ * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
+ */
+public class TrafficMaxLaneFlow {
+
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+  private static final Integer VALID_INPUTS = 4999;
+
+  static final int WINDOW_DURATION = 60;  // Default sliding window duration in minutes
+  static final int WINDOW_SLIDE_EVERY = 5;  // Default window 'slide every' setting in minutes
+
+  /**
+   * This class holds information about each lane in a station reading, along with some general
+   * information from the reading.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class LaneInfo {
+    @Nullable String stationId;
+    @Nullable String lane;
+    @Nullable String direction;
+    @Nullable String freeway;
+    @Nullable String recordedTimestamp;
+    @Nullable Integer laneFlow;
+    @Nullable Integer totalFlow;
+    @Nullable Double laneAO;
+    @Nullable Double laneAS;
+
+    public LaneInfo() {}
+
+    public LaneInfo(String stationId, String lane, String direction, String freeway,
+        String timestamp, Integer laneFlow, Double laneAO,
+        Double laneAS, Integer totalFlow) {
+      this.stationId = stationId;
+      this.lane = lane;
+      this.direction = direction;
+      this.freeway = freeway;
+      this.recordedTimestamp = timestamp;
+      this.laneFlow = laneFlow;
+      this.laneAO = laneAO;
+      this.laneAS = laneAS;
+      this.totalFlow = totalFlow;
+    }
+
+    public String getStationId() {
+      return this.stationId;
+    }
+    public String getLane() {
+      return this.lane;
+    }
+    public String getDirection() {
+      return this.direction;
+    }
+    public String getFreeway() {
+      return this.freeway;
+    }
+    public String getRecordedTimestamp() {
+      return this.recordedTimestamp;
+    }
+    public Integer getLaneFlow() {
+      return this.laneFlow;
+    }
+    public Double getLaneAO() {
+      return this.laneAO;
+    }
+    public Double getLaneAS() {
+      return this.laneAS;
+    }
+    public Integer getTotalFlow() {
+      return this.totalFlow;
+    }
+  }
+
+  /**
+   * Extract the timestamp field from the input string, and use it as the element timestamp.
+   */
+  static class ExtractTimestamps extends DoFn<String, String> {
+    private static final DateTimeFormatter dateTimeFormat =
+        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
+
+    @Override
+    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
+      String[] items = c.element().split(",");
+      if (items.length > 0) {
+        try {
+          String timestamp = items[0];
+          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
+        } catch (IllegalArgumentException e) {
+          // Skip the invalid input.
+        }
+      }
+    }
+  }
+
+  /**
+   * Extract flow information for each of the 8 lanes in a reading, and output as separate tuples.
+   * This will let us determine which lane has the max flow for that station over the span of the
+   * window, and output not only the max flow from that calculation, but other associated
+   * information. The number of lanes for which data is present depends upon which freeway the data
+   * point comes from.
+   */
+  static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
+
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] items = c.element().split(",");
+      if (items.length < 48) {
+        // Skip the invalid input.
+        return;
+      }
+      // extract the sensor information for the lanes from the input string fields.
+      String timestamp = items[0];
+      String stationId = items[1];
+      String freeway = items[2];
+      String direction = items[3];
+      Integer totalFlow = tryIntParse(items[7]);
+      for (int i = 1; i <= 8; ++i) {
+        Integer laneFlow = tryIntParse(items[6 + 5 * i]);
+        Double laneAvgOccupancy = tryDoubleParse(items[7 + 5 * i]);
+        Double laneAvgSpeed = tryDoubleParse(items[8 + 5 * i]);
+        if (laneFlow == null || laneAvgOccupancy == null || laneAvgSpeed == null) {
+          return;
+        }
+        LaneInfo laneInfo = new LaneInfo(stationId, "lane" + i, direction, freeway, timestamp,
+            laneFlow, laneAvgOccupancy, laneAvgSpeed, totalFlow);
+        c.output(KV.of(stationId, laneInfo));
+      }
+    }
+  }
+
+  /**
+   * A custom 'combine function' used with the Combine.perKey transform. Used to find the max lane
+   * flow over all the data points in the Window. Extracts the lane flow from the input string and
+   * determines whether it's the max seen so far. We're using a custom combiner instead of the Max
+   * transform because we want to retain the additional information we've associated with the flow
+   * value.
+   */
+  public static class MaxFlow implements SerializableFunction<Iterable<LaneInfo>, LaneInfo> {
+    @Override
+    public LaneInfo apply(Iterable<LaneInfo> input) {
+      Integer max = 0;
+      LaneInfo maxInfo = new LaneInfo();
+      for (LaneInfo item : input) {
+        Integer flow = item.getLaneFlow();
+        if (flow != null && (flow >= max)) {
+          max = flow;
+          maxInfo = item;
+        }
+      }
+      return maxInfo;
+    }
+  }
+
+  /**
+   * Format the results of the Max Lane flow calculation to a TableRow, to save to BigQuery.
+   * Add the timestamp from the window context.
+   */
+  static class FormatMaxesFn extends DoFn<KV<String, LaneInfo>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+
+      LaneInfo laneInfo = c.element().getValue();
+      TableRow row = new TableRow()
+          .set("station_id", c.element().getKey())
+          .set("direction", laneInfo.getDirection())
+          .set("freeway", laneInfo.getFreeway())
+          .set("lane_max_flow", laneInfo.getLaneFlow())
+          .set("lane", laneInfo.getLane())
+          .set("avg_occ", laneInfo.getLaneAO())
+          .set("avg_speed", laneInfo.getLaneAS())
+          .set("total_flow", laneInfo.getTotalFlow())
+          .set("recorded_timestamp", laneInfo.getRecordedTimestamp())
+          .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+
+    /** Defines the BigQuery schema used for the output. */
+    static TableSchema getSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("station_id").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("direction").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("freeway").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("lane_max_flow").setType("INTEGER"));
+      fields.add(new TableFieldSchema().setName("lane").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("avg_occ").setType("FLOAT"));
+      fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
+      fields.add(new TableFieldSchema().setName("total_flow").setType("INTEGER"));
+      fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+      fields.add(new TableFieldSchema().setName("recorded_timestamp").setType("STRING"));
+      TableSchema schema = new TableSchema().setFields(fields);
+      return schema;
+    }
+  }
+
+  /**
+   * This PTransform extracts lane info, calculates the max lane flow found for a given station (for
+   * the current Window) using a custom 'combiner', and formats the results for BigQuery.
+   */
+  static class MaxLaneFlow
+      extends PTransform<PCollection<KV<String, LaneInfo>>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
+      // stationId, LaneInfo => stationId + max lane flow info
+      PCollection<KV<String, LaneInfo>> flowMaxes =
+          flowInfo.apply(Combine.<String, LaneInfo>perKey(
+              new MaxFlow()));
+
+      // <stationId, max lane flow info>... => row...
+      PCollection<TableRow> results = flowMaxes.apply(
+          ParDo.of(new FormatMaxesFn()));
+
+      return results;
+    }
+  }
+
+  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
+    private final String inputFile;
+
+    public ReadFileAndExtractTimestamps(String inputFile) {
+      this.inputFile = inputFile;
+    }
+
+    @Override
+    public PCollection<String> apply(PBegin begin) {
+      return begin
+          .apply(TextIO.Read.from(inputFile))
+          .apply(ParDo.of(new ExtractTimestamps()));
+    }
+  }
+
+  /**
+    * Options supported by {@link TrafficMaxLaneFlow}.
+    *
+    * <p>Inherits standard configuration options.
+    */
+  private interface TrafficMaxLaneFlowOptions extends DataflowExampleOptions,
+      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
+        @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/traffic_sensor/"
+        + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
+    String getInputFile();
+    void setInputFile(String value);
+
+    @Description("Numeric value of sliding window duration, in minutes")
+    @Default.Integer(WINDOW_DURATION)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+
+    @Description("Numeric value of window 'slide every' setting, in minutes")
+    @Default.Integer(WINDOW_SLIDE_EVERY)
+    Integer getWindowSlideEvery();
+    void setWindowSlideEvery(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    @Default.Boolean(false)
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   *
+   * @throws IOException if there is a problem setting up resources
+   */
+  public static void main(String[] args) throws IOException {
+    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(TrafficMaxLaneFlowOptions.class);
+    options.setBigQuerySchema(FormatMaxesFn.getSchema());
+    // Using DataflowExampleUtils to set up required resources.
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
+
+    Pipeline pipeline = Pipeline.create(options);
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      // Read unbounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()));
+    } else {
+      // Read bounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
+
+      // To read bounded TextIO files, use:
+      // input = pipeline.apply(new ReadFileAndExtractTimestamps(options.getInputFile()));
+    }
+    input
+        // row... => <station route, station speed> ...
+        .apply(ParDo.of(new ExtractFlowInfoFn()))
+        // map the incoming data stream into sliding windows. The default window duration values
+        // work well if you're running the accompanying Pub/Sub generator script with the
+        // --replay flag, which simulates pauses in the sensor data publication. You may want to
+        // adjust them otherwise.
+        .apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
+            Duration.standardMinutes(options.getWindowDuration())).
+            every(Duration.standardMinutes(options.getWindowSlideEvery()))))
+        .apply(new MaxLaneFlow())
+        .apply(BigQueryIO.Write.to(tableRef)
+            .withSchema(FormatMaxesFn.getSchema()));
+
+    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+    if (!Strings.isNullOrEmpty(options.getInputFile())
+        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
+      dataflowUtils.runInjectorPipeline(
+          new ReadFileAndExtractTimestamps(options.getInputFile()),
+          options.getPubsubTopic(),
+          PUBSUB_TIMESTAMP_LABEL_KEY);
+    }
+
+    // Run the pipeline.
+    PipelineResult result = pipeline.run();
+
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
+  }
+
+  private static Integer tryIntParse(String number) {
+    try {
+      return Integer.parseInt(number);
+    } catch (NumberFormatException e) {
+      return null;
+    }
+  }
+
+  private static Double tryDoubleParse(String number) {
+    try {
+      return Double.parseDouble(number);
+    } catch (NumberFormatException e) {
+      return null;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
new file mode 100644
index 0000000..e3e88c2
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
@@ -0,0 +1,459 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PBegin;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+
+import org.apache.avro.reflect.Nullable;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A Dataflow Example that runs in both batch and streaming modes with traffic sensor data.
+ * You can configure the running mode by setting {@literal --streaming} to true or false.
+ *
+ * <p>Concepts: The batch and streaming runners, GroupByKey, sliding windows, and
+ * Google Cloud Pub/Sub topic injection.
+ *
+ * <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
+ * it calculates the average speed over the window for some small set of predefined 'routes',
+ * and looks for 'slowdowns' in those routes. It writes its results to a BigQuery table.
+ *
+ * <p>In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
+ *
+ * <p>In streaming mode, the pipeline reads the data from a Pub/Sub topic.
+ * By default, the example will run a separate pipeline to inject the data from the default
+ * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
+ * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
+ * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
+ * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
+ * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
+ * is provided in
+ * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
+ *
+ * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
+ * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
+ *
+ * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
+ */
+
+public class TrafficRoutes {
+
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+  private static final Integer VALID_INPUTS = 4999;
+
+  // Instantiate some small predefined San Diego routes to analyze
+  static Map<String, String> sdStations = buildStationInfo();
+  static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
+  static final int WINDOW_SLIDE_EVERY = 1;  // Default window 'slide every' setting in minutes
+
+  /**
+   * This class holds information about a station reading's average speed.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class StationSpeed implements Comparable<StationSpeed> {
+    @Nullable String stationId;
+    @Nullable Double avgSpeed;
+    @Nullable Long timestamp;
+
+    public StationSpeed() {}
+
+    public StationSpeed(String stationId, Double avgSpeed, Long timestamp) {
+      this.stationId = stationId;
+      this.avgSpeed = avgSpeed;
+      this.timestamp = timestamp;
+    }
+
+    public String getStationId() {
+      return this.stationId;
+    }
+    public Double getAvgSpeed() {
+      return this.avgSpeed;
+    }
+
+    @Override
+    public int compareTo(StationSpeed other) {
+      return Long.compare(this.timestamp, other.timestamp);
+    }
+  }
+
+  /**
+   * This class holds information about a route's speed/slowdown.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class RouteInfo {
+    @Nullable String route;
+    @Nullable Double avgSpeed;
+    @Nullable Boolean slowdownEvent;
+
+
+    public RouteInfo() {}
+
+    public RouteInfo(String route, Double avgSpeed, Boolean slowdownEvent) {
+      this.route = route;
+      this.avgSpeed = avgSpeed;
+      this.slowdownEvent = slowdownEvent;
+    }
+
+    public String getRoute() {
+      return this.route;
+    }
+    public Double getAvgSpeed() {
+      return this.avgSpeed;
+    }
+    public Boolean getSlowdownEvent() {
+      return this.slowdownEvent;
+    }
+  }
+
+  /**
+   * Extract the timestamp field from the input string, and use it as the element timestamp.
+   */
+  static class ExtractTimestamps extends DoFn<String, String> {
+    private static final DateTimeFormatter dateTimeFormat =
+        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
+
+    @Override
+    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
+      String[] items = c.element().split(",");
+      String timestamp = tryParseTimestamp(items);
+      if (timestamp != null) {
+        try {
+          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
+        } catch (IllegalArgumentException e) {
+          // Skip the invalid input.
+        }
+      }
+    }
+  }
+
+  /**
+   * Filter out readings for the stations along predefined 'routes', and output
+   * (station, speed info) keyed on route.
+   */
+  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
+
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] items = c.element().split(",");
+      String stationType = tryParseStationType(items);
+      // For this analysis, use only 'main line' station types
+      if (stationType != null && stationType.equals("ML")) {
+        Double avgSpeed = tryParseAvgSpeed(items);
+        String stationId = tryParseStationId(items);
+        // For this simple example, filter out everything but some hardwired routes.
+        if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
+          StationSpeed stationSpeed =
+              new StationSpeed(stationId, avgSpeed, c.timestamp().getMillis());
+          // The tuple key is the 'route' name stored in the 'sdStations' hash.
+          KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
+          c.output(outputValue);
+        }
+      }
+    }
+  }
+
+  /**
+   * For a given route, track average speed for the window. Calculate whether
+   * traffic is currently slowing down, via a predefined threshold. If a supermajority of
+   * speeds in this sliding window are less than the previous reading we call this a 'slowdown'.
+   * Note: these calculations are for example purposes only, and are unrealistic and oversimplified.
+   */
+  static class GatherStats
+      extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>> {
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      String route = c.element().getKey();
+      double speedSum = 0.0;
+      int speedCount = 0;
+      int speedups = 0;
+      int slowdowns = 0;
+      List<StationSpeed> infoList = Lists.newArrayList(c.element().getValue());
+      // StationSpeeds sort by embedded timestamp.
+      Collections.sort(infoList);
+      Map<String, Double> prevSpeeds = new HashMap<>();
+      // For all stations in the route, sum (non-null) speeds. Keep a count of the non-null speeds.
+      for (StationSpeed item : infoList) {
+        Double speed = item.getAvgSpeed();
+        if (speed != null) {
+          speedSum += speed;
+          speedCount++;
+          Double lastSpeed = prevSpeeds.get(item.getStationId());
+          if (lastSpeed != null) {
+            if (lastSpeed < speed) {
+              speedups += 1;
+            } else {
+              slowdowns += 1;
+            }
+          }
+          prevSpeeds.put(item.getStationId(), speed);
+        }
+      }
+      if (speedCount == 0) {
+        // No average to compute.
+        return;
+      }
+      double speedAvg = speedSum / speedCount;
+      boolean slowdownEvent = slowdowns >= 2 * speedups;
+      RouteInfo routeInfo = new RouteInfo(route, speedAvg, slowdownEvent);
+      c.output(KV.of(route, routeInfo));
+    }
+  }
+
+  /**
+   * Format the results of the slowdown calculations to a TableRow, to save to BigQuery.
+   */
+  static class FormatStatsFn extends DoFn<KV<String, RouteInfo>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      RouteInfo routeInfo = c.element().getValue();
+      TableRow row = new TableRow()
+          .set("avg_speed", routeInfo.getAvgSpeed())
+          .set("slowdown_event", routeInfo.getSlowdownEvent())
+          .set("route", c.element().getKey())
+          .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+
+    /**
+     * Defines the BigQuery schema used for the output.
+     */
+    static TableSchema getSchema() {
+      List<TableFieldSchema> fields = new ArrayList<>();
+      fields.add(new TableFieldSchema().setName("route").setType("STRING"));
+      fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
+      fields.add(new TableFieldSchema().setName("slowdown_event").setType("BOOLEAN"));
+      fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+      TableSchema schema = new TableSchema().setFields(fields);
+      return schema;
+    }
+  }
+
+  /**
+   * This PTransform extracts speed info from traffic station readings.
+   * It groups the readings by 'route' and analyzes traffic slowdown for that route.
+   * Lastly, it formats the results for BigQuery.
+   */
+  static class TrackSpeed extends
+      PTransform<PCollection<KV<String, StationSpeed>>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> stationSpeed) {
+      // Apply a GroupByKey transform to collect a list of all station
+      // readings for a given route.
+      PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = stationSpeed.apply(
+        GroupByKey.<String, StationSpeed>create());
+
+      // Analyze 'slowdown' over the route readings.
+      PCollection<KV<String, RouteInfo>> stats = timeGroup.apply(ParDo.of(new GatherStats()));
+
+      // Format the results for writing to BigQuery
+      PCollection<TableRow> results = stats.apply(
+          ParDo.of(new FormatStatsFn()));
+
+      return results;
+    }
+  }
+
+  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
+    private final String inputFile;
+
+    public ReadFileAndExtractTimestamps(String inputFile) {
+      this.inputFile = inputFile;
+    }
+
+    @Override
+    public PCollection<String> apply(PBegin begin) {
+      return begin
+          .apply(TextIO.Read.from(inputFile))
+          .apply(ParDo.of(new ExtractTimestamps()));
+    }
+  }
+
+  /**
+  * Options supported by {@link TrafficRoutes}.
+  *
+  * <p>Inherits standard configuration options.
+  */
+  private interface TrafficRoutesOptions extends DataflowExampleOptions,
+      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
+    @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/traffic_sensor/"
+        + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
+    String getInputFile();
+    void setInputFile(String value);
+
+    @Description("Numeric value of sliding window duration, in minutes")
+    @Default.Integer(WINDOW_DURATION)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+
+    @Description("Numeric value of window 'slide every' setting, in minutes")
+    @Default.Integer(WINDOW_SLIDE_EVERY)
+    Integer getWindowSlideEvery();
+    void setWindowSlideEvery(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    @Default.Boolean(false)
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   *
+   * @throws IOException if there is a problem setting up resources
+   */
+  public static void main(String[] args) throws IOException {
+    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(TrafficRoutesOptions.class);
+
+    options.setBigQuerySchema(FormatStatsFn.getSchema());
+    // Using DataflowExampleUtils to set up required resources.
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
+
+    Pipeline pipeline = Pipeline.create(options);
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      // Read unbounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()));
+    } else {
+      // Read bounded PubSubIO.
+      input = pipeline.apply(PubsubIO.Read
+          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
+
+      // To read bounded TextIO files, use:
+      // input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
+      //    .apply(ParDo.of(new ExtractTimestamps()));
+    }
+    input
+        // row... => <station route, station speed> ...
+        .apply(ParDo.of(new ExtractStationSpeedFn()))
+        // map the incoming data stream into sliding windows.
+        // The default window duration values work well if you're running the accompanying Pub/Sub
+        // generator script without the --replay flag, so that there are no simulated pauses in
+        // the sensor data publication. You may want to adjust the values otherwise.
+        .apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
+            Duration.standardMinutes(options.getWindowDuration())).
+            every(Duration.standardMinutes(options.getWindowSlideEvery()))))
+        .apply(new TrackSpeed())
+        .apply(BigQueryIO.Write.to(tableRef)
+            .withSchema(FormatStatsFn.getSchema()));
+
+    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
+    if (!Strings.isNullOrEmpty(options.getInputFile())
+        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
+      dataflowUtils.runInjectorPipeline(
+          new ReadFileAndExtractTimestamps(options.getInputFile()),
+          options.getPubsubTopic(),
+          PUBSUB_TIMESTAMP_LABEL_KEY);
+    }
+
+    // Run the pipeline.
+    PipelineResult result = pipeline.run();
+
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
+    dataflowUtils.waitToFinish(result);
+  }
+
+  private static Double tryParseAvgSpeed(String[] inputItems) {
+    try {
+      return Double.parseDouble(tryParseString(inputItems, 9));
+    } catch (NumberFormatException e) {
+      return null;
+    } catch (NullPointerException e) {
+      return null;
+    }
+  }
+
+  private static String tryParseStationType(String[] inputItems) {
+    return tryParseString(inputItems, 4);
+  }
+
+  private static String tryParseStationId(String[] inputItems) {
+    return tryParseString(inputItems, 1);
+  }
+
+  private static String tryParseTimestamp(String[] inputItems) {
+    return tryParseString(inputItems, 0);
+  }
+
+  private static String tryParseString(String[] inputItems, int index) {
+    return inputItems.length >= index ? inputItems[index] : null;
+  }
+
+  /**
+   * Define some small hard-wired San Diego 'routes' to track based on sensor station ID.
+   */
+  private static Map<String, String> buildStationInfo() {
+    Map<String, String> stations = new Hashtable<String, String>();
+      stations.put("1108413", "SDRoute1"); // from freeway 805 S
+      stations.put("1108699", "SDRoute2"); // from freeway 78 E
+      stations.put("1108702", "SDRoute2");
+    return stations;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
new file mode 100644
index 0000000..503bcad
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An example that reads the public samples of weather data from BigQuery, counts the number of
+ * tornadoes that occur in each month, and writes the results to BigQuery.
+ *
+ * <p>Concepts: Reading/writing BigQuery; counting a PCollection; user-defined PTransforms
+ *
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * table.
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and the BigQuery table for the output, with the form
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and the BigQuery table for the output:
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
+ *
+ * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
+ * and can be overridden with {@code --input}.
+ */
+public class BigQueryTornadoes {
+  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
+  private static final String WEATHER_SAMPLES_TABLE =
+      "clouddataflow-readonly:samples.weather_stations";
+
+  /**
+   * Examines each row in the input table. If a tornado was recorded
+   * in that sample, the month in which it occurred is output.
+   */
+  static class ExtractTornadoesFn extends DoFn<TableRow, Integer> {
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      if ((Boolean) row.get("tornado")) {
+        c.output(Integer.parseInt((String) row.get("month")));
+      }
+    }
+  }
+
+  /**
+   * Prepares the data for writing to BigQuery by building a TableRow object containing an
+   * integer representation of month and the number of tornadoes that occurred in each month.
+   */
+  static class FormatCountsFn extends DoFn<KV<Integer, Long>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("month", c.element().getKey())
+          .set("tornado_count", c.element().getValue());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Takes rows from a table and generates a table of counts.
+   *
+   * <p>The input schema is described by
+   * https://developers.google.com/bigquery/docs/dataset-gsod .
+   * The output contains the total number of tornadoes found in each month in
+   * the following schema:
+   * <ul>
+   *   <li>month: integer</li>
+   *   <li>tornado_count: integer</li>
+   * </ul>
+   */
+  static class CountTornadoes
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // row... => month...
+      PCollection<Integer> tornadoes = rows.apply(
+          ParDo.of(new ExtractTornadoesFn()));
+
+      // month... => <month,count>...
+      PCollection<KV<Integer, Long>> tornadoCounts =
+          tornadoes.apply(Count.<Integer>perElement());
+
+      // <month,count>... => row...
+      PCollection<TableRow> results = tornadoCounts.apply(
+          ParDo.of(new FormatCountsFn()));
+
+      return results;
+    }
+  }
+
+  /**
+   * Options supported by {@link BigQueryTornadoes}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(WEATHER_SAMPLES_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("BigQuery table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>. The dataset must already exist.")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args) {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    Pipeline p = Pipeline.create(options);
+
+    // Build the table schema for the output table.
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER"));
+    TableSchema schema = new TableSchema().setFields(fields);
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(new CountTornadoes())
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
new file mode 100644
index 0000000..9540dd4
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An example that reads the public 'Shakespeare' data, and for each word in
+ * the dataset that is over a given length, generates a string containing the
+ * list of play names in which that word appears, and saves this information
+ * to a bigquery table.
+ *
+ * <p>Concepts: the Combine.perKey transform, which lets you combine the values in a
+ * key-grouped Collection, and how to use an Aggregator to track information in the
+ * Monitoring UI.
+ *
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * table.
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and the BigQuery table for the output:
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://<STAGING DIRECTORY>
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and the BigQuery table for the output:
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
+ *
+ * <p>The BigQuery input table defaults to {@code publicdata:samples.shakespeare} and can
+ * be overridden with {@code --input}.
+ */
+public class CombinePerKeyExamples {
+  // Use the shakespeare public BigQuery sample
+  private static final String SHAKESPEARE_TABLE =
+      "publicdata:samples.shakespeare";
+  // We'll track words >= this word length across all plays in the table.
+  private static final int MIN_WORD_LENGTH = 9;
+
+  /**
+   * Examines each row in the input table. If the word is greater than or equal to MIN_WORD_LENGTH,
+   * outputs word, play_name.
+   */
+  static class ExtractLargeWordsFn extends DoFn<TableRow, KV<String, String>> {
+    private final Aggregator<Long, Long> smallerWords =
+        createAggregator("smallerWords", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      String playName = (String) row.get("corpus");
+      String word = (String) row.get("word");
+      if (word.length() >= MIN_WORD_LENGTH) {
+        c.output(KV.of(word, playName));
+      } else {
+        // Track how many smaller words we're not including. This information will be
+        // visible in the Monitoring UI.
+        smallerWords.addValue(1L);
+      }
+    }
+  }
+
+
+  /**
+   * Prepares the data for writing to BigQuery by building a TableRow object
+   * containing a word with a string listing the plays in which it appeared.
+   */
+  static class FormatShakespeareOutputFn extends DoFn<KV<String, String>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("word", c.element().getKey())
+          .set("all_plays", c.element().getValue());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Reads the public 'Shakespeare' data, and for each word in the dataset
+   * over a given length, generates a string containing the list of play names
+   * in which that word appears. It does this via the Combine.perKey
+   * transform, with the ConcatWords combine function.
+   *
+   * <p>Combine.perKey is similar to a GroupByKey followed by a ParDo, but
+   * has more restricted semantics that allow it to be executed more
+   * efficiently. These records are then formatted as BQ table rows.
+   */
+  static class PlaysForWord
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // row... => <word, play_name> ...
+      PCollection<KV<String, String>> words = rows.apply(
+          ParDo.of(new ExtractLargeWordsFn()));
+
+      // word, play_name => word, all_plays ...
+      PCollection<KV<String, String>> wordAllPlays =
+          words.apply(Combine.<String, String>perKey(
+              new ConcatWords()));
+
+      // <word, all_plays>... => row...
+      PCollection<TableRow> results = wordAllPlays.apply(
+          ParDo.of(new FormatShakespeareOutputFn()));
+
+      return results;
+    }
+  }
+
+  /**
+   * A 'combine function' used with the Combine.perKey transform. Builds a
+   * comma-separated string of all input items.  So, it will build a string
+   * containing all the different Shakespeare plays in which the given input
+   * word has appeared.
+   */
+  public static class ConcatWords implements SerializableFunction<Iterable<String>, String> {
+    @Override
+    public String apply(Iterable<String> input) {
+      StringBuilder all = new StringBuilder();
+      for (String item : input) {
+        if (!item.isEmpty()) {
+          if (all.length() == 0) {
+            all.append(item);
+          } else {
+            all.append(",");
+            all.append(item);
+          }
+        }
+      }
+      return all.toString();
+    }
+  }
+
+  /**
+   * Options supported by {@link CombinePerKeyExamples}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(SHAKESPEARE_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("Table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>. "
+        + "The dataset_id must already exist")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    // Build the table schema for the output table.
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("all_plays").setType("STRING"));
+    TableSchema schema = new TableSchema().setFields(fields);
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(new PlaysForWord())
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
new file mode 100644
index 0000000..eaf1e20
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import static com.google.api.services.datastore.client.DatastoreHelper.getPropertyMap;
+import static com.google.api.services.datastore.client.DatastoreHelper.getString;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeFilter;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeKey;
+import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
+
+import com.google.api.services.datastore.DatastoreV1.Entity;
+import com.google.api.services.datastore.DatastoreV1.Key;
+import com.google.api.services.datastore.DatastoreV1.Property;
+import com.google.api.services.datastore.DatastoreV1.PropertyFilter;
+import com.google.api.services.datastore.DatastoreV1.Query;
+import com.google.api.services.datastore.DatastoreV1.Value;
+import com.google.cloud.dataflow.examples.WordCount;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.DatastoreIO;
+import com.google.cloud.dataflow.sdk.io.Read;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import java.util.Map;
+import java.util.UUID;
+
+import javax.annotation.Nullable;
+
+/**
+ * A WordCount example using DatastoreIO.
+ *
+ * <p>This example shows how to use DatastoreIO to read from Datastore and
+ * write the results to Cloud Storage.  Note that this example will write
+ * data to Datastore, which may incur charge for Datastore operations.
+ *
+ * <p>To run this example, users need to use gcloud to get credential for Datastore:
+ * <pre>{@code
+ * $ gcloud auth login
+ * }</pre>
+ *
+ * <p>To run this pipeline locally, the following options must be provided:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --dataset=YOUR_DATASET_ID
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PATH]
+ * }</pre>
+ *
+ * <p>To run this example using Dataflow service, you must additionally
+ * provide either {@literal --stagingLocation} or {@literal --tempLocation}, and
+ * select one of the Dataflow pipeline runners, eg
+ * {@literal --runner=BlockingDataflowPipelineRunner}.
+ *
+ * <p><b>Note:</b> this example creates entities with <i>Ancestor keys</i> to ensure that all
+ * entities created are in the same entity group. Similarly, the query used to read from the Cloud
+ * Datastore uses an <i>Ancestor filter</i>. Ancestors are used to ensure strongly consistent
+ * results in Cloud Datastore. For more information, see the Cloud Datastore documentation on
+ * <a href="https://cloud.google.com/datastore/docs/concepts/structuring_for_strong_consistency">
+ * Structing Data for Strong Consistency</a>.
+ */
+public class DatastoreWordCount {
+
+  /**
+   * A DoFn that gets the content of an entity (one line in a
+   * Shakespeare play) and converts it to a string.
+   */
+  static class GetContentFn extends DoFn<Entity, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      Map<String, Value> props = getPropertyMap(c.element());
+      Value value = props.get("content");
+      if (value != null) {
+        c.output(getString(value));
+      }
+    }
+  }
+
+  /**
+   * A helper function to create the ancestor key for all created and queried entities.
+   *
+   * <p>We use ancestor keys and ancestor queries for strong consistency. See
+   * {@link DatastoreWordCount} javadoc for more information.
+   */
+  static Key makeAncestorKey(@Nullable String namespace, String kind) {
+    Key.Builder keyBuilder = makeKey(kind, "root");
+    if (namespace != null) {
+      keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
+    }
+    return keyBuilder.build();
+  }
+
+  /**
+   * A DoFn that creates entity for every line in Shakespeare.
+   */
+  static class CreateEntityFn extends DoFn<String, Entity> {
+    private final String namespace;
+    private final String kind;
+    private final Key ancestorKey;
+
+    CreateEntityFn(String namespace, String kind) {
+      this.namespace = namespace;
+      this.kind = kind;
+
+      // Build the ancestor key for all created entities once, including the namespace.
+      ancestorKey = makeAncestorKey(namespace, kind);
+    }
+
+    public Entity makeEntity(String content) {
+      Entity.Builder entityBuilder = Entity.newBuilder();
+
+      // All created entities have the same ancestor Key.
+      Key.Builder keyBuilder = makeKey(ancestorKey, kind, UUID.randomUUID().toString());
+      // NOTE: Namespace is not inherited between keys created with DatastoreHelper.makeKey, so
+      // we must set the namespace on keyBuilder. TODO: Once partitionId inheritance is added,
+      // we can simplify this code.
+      if (namespace != null) {
+        keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
+      }
+
+      entityBuilder.setKey(keyBuilder.build());
+      entityBuilder.addProperty(Property.newBuilder().setName("content")
+          .setValue(Value.newBuilder().setStringValue(content)));
+      return entityBuilder.build();
+    }
+
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(makeEntity(c.element()));
+    }
+  }
+
+  /**
+   * Options supported by {@link DatastoreWordCount}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  public static interface Options extends PipelineOptions {
+    @Description("Path of the file to read from and store to Datastore")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Path of the file to write to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+
+    @Description("Dataset ID to read from datastore")
+    @Validation.Required
+    String getDataset();
+    void setDataset(String value);
+
+    @Description("Dataset entity kind")
+    @Default.String("shakespeare-demo")
+    String getKind();
+    void setKind(String value);
+
+    @Description("Dataset namespace")
+    String getNamespace();
+    void setNamespace(@Nullable String value);
+
+    @Description("Read an existing dataset, do not write first")
+    boolean isReadOnly();
+    void setReadOnly(boolean value);
+
+    @Description("Number of output shards")
+    @Default.Integer(0) // If the system should choose automatically.
+    int getNumShards();
+    void setNumShards(int value);
+  }
+
+  /**
+   * An example that creates a pipeline to populate DatastoreIO from a
+   * text input.  Forces use of DirectPipelineRunner for local execution mode.
+   */
+  public static void writeDataToDatastore(Options options) {
+      Pipeline p = Pipeline.create(options);
+      p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+       .apply(ParDo.of(new CreateEntityFn(options.getNamespace(), options.getKind())))
+       .apply(DatastoreIO.writeTo(options.getDataset()));
+
+      p.run();
+  }
+
+  /**
+   * Build a Cloud Datastore ancestor query for the specified {@link Options#getNamespace} and
+   * {@link Options#getKind}.
+   *
+   * <p>We use ancestor keys and ancestor queries for strong consistency. See
+   * {@link DatastoreWordCount} javadoc for more information.
+   *
+   * @see <a href="https://cloud.google.com/datastore/docs/concepts/queries#Datastore_Ancestor_filters">Ancestor filters</a>
+   */
+  static Query makeAncestorKindQuery(Options options) {
+    Query.Builder q = Query.newBuilder();
+    q.addKindBuilder().setName(options.getKind());
+    q.setFilter(makeFilter(
+        "__key__",
+        PropertyFilter.Operator.HAS_ANCESTOR,
+        makeValue(makeAncestorKey(options.getNamespace(), options.getKind()))));
+    return q.build();
+  }
+
+  /**
+   * An example that creates a pipeline to do DatastoreIO.Read from Datastore.
+   */
+  public static void readDataFromDatastore(Options options) {
+    Query query = makeAncestorKindQuery(options);
+
+    // For Datastore sources, the read namespace can be set on the entire query.
+    DatastoreIO.Source source = DatastoreIO.source()
+        .withDataset(options.getDataset())
+        .withQuery(query)
+        .withNamespace(options.getNamespace());
+
+    Pipeline p = Pipeline.create(options);
+    p.apply("ReadShakespeareFromDatastore", Read.from(source))
+        .apply("StringifyEntity", ParDo.of(new GetContentFn()))
+        .apply("CountWords", new WordCount.CountWords())
+        .apply("PrintWordCount", MapElements.via(new WordCount.FormatAsTextFn()))
+        .apply("WriteLines", TextIO.Write.to(options.getOutput())
+            .withNumShards(options.getNumShards()));
+    p.run();
+  }
+
+  /**
+   * An example to demo how to use {@link DatastoreIO}.  The runner here is
+   * customizable, which means users could pass either {@code DirectPipelineRunner}
+   * or {@code DataflowPipelineRunner} in the pipeline options.
+   */
+  public static void main(String args[]) {
+    // The options are used in two places, for Dataflow service, and
+    // building DatastoreIO.Read object
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+
+    if (!options.isReadOnly()) {
+      // First example: write data to Datastore for reading later.
+      //
+      // NOTE: this write does not delete any existing Entities in the Datastore, so if run
+      // multiple times with the same output dataset, there may be duplicate entries. The
+      // Datastore Query tool in the Google Developers Console can be used to inspect or erase all
+      // entries with a particular namespace and/or kind.
+      DatastoreWordCount.writeDataToDatastore(options);
+    }
+
+    // Second example: do parallel read from Datastore.
+    DatastoreWordCount.readDataFromDatastore(options);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
new file mode 100644
index 0000000..9873561
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+
+/**
+ * This example uses as input Shakespeare's plays as plaintext files, and will remove any
+ * duplicate lines across all the files. (The output does not preserve any input order).
+ *
+ * <p>Concepts: the RemoveDuplicates transform, and how to wire transforms together.
+ * Demonstrates {@link com.google.cloud.dataflow.sdk.io.TextIO.Read}/
+ * {@link RemoveDuplicates}/{@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ *   --project=YOUR_PROJECT_ID
+ * and a local output file or output prefix on GCS:
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * and an output prefix on GCS:
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ *
+ * <p>The input defaults to {@code gs://dataflow-samples/shakespeare/*} and can be
+ * overridden with {@code --input}.
+ */
+public class DeDupExample {
+
+  /**
+   * Options supported by {@link DeDupExample}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Path to the directory or GCS prefix containing files to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/*")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Path of the file to write to")
+    @Default.InstanceFactory(OutputFactory.class)
+    String getOutput();
+    void setOutput(String value);
+
+    /** Returns gs://${STAGING_LOCATION}/"deduped.txt". */
+    public static class OutputFactory implements DefaultValueFactory<String> {
+      @Override
+      public String create(PipelineOptions options) {
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        if (dataflowOptions.getStagingLocation() != null) {
+          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
+              .resolve("deduped.txt").toString();
+        } else {
+          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
+        }
+      }
+    }
+  }
+
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
+     .apply(RemoveDuplicates.<String>create())
+     .apply(TextIO.Write.named("DedupedShakespeare")
+         .to(options.getOutput()));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
new file mode 100644
index 0000000..781873a
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Mean;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * This is an example that demonstrates several approaches to filtering, and use of the Mean
+ * transform. It shows how to dynamically set parameters by defining and using new pipeline options,
+ * and how to use a value derived by the pipeline.
+ *
+ * <p>Concepts: The Mean transform; Options configuration; using pipeline-derived data as a side
+ * input; approaches to filtering, selection, and projection.
+ *
+ * <p>The example reads public samples of weather data from BigQuery. It performs a
+ * projection on the data, finds the global mean of the temperature readings, filters on readings
+ * for a single given month, and then outputs only data (for that month) that has a mean temp
+ * smaller than the derived global mean.
+*
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * table.
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and the BigQuery table for the output:
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ *   [--monthFilter=<month_number>]
+ * }
+ * </pre>
+ * where optional parameter {@code --monthFilter} is set to a number 1-12.
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and the BigQuery table for the output:
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ *   [--monthFilter=<month_number>]
+ * }
+ * </pre>
+ * where optional parameter {@code --monthFilter} is set to a number 1-12.
+ *
+ * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
+ * and can be overridden with {@code --input}.
+ */
+public class FilterExamples {
+  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
+  private static final String WEATHER_SAMPLES_TABLE =
+      "clouddataflow-readonly:samples.weather_stations";
+  static final Logger LOG = Logger.getLogger(FilterExamples.class.getName());
+  static final int MONTH_TO_FILTER = 7;
+
+  /**
+   * Examines each row in the input table. Outputs only the subset of the cells this example
+   * is interested in-- the mean_temp and year, month, and day-- as a bigquery table row.
+   */
+  static class ProjectionFn extends DoFn<TableRow, TableRow> {
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      // Grab year, month, day, mean_temp from the row
+      Integer year = Integer.parseInt((String) row.get("year"));
+      Integer month = Integer.parseInt((String) row.get("month"));
+      Integer day = Integer.parseInt((String) row.get("day"));
+      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
+      // Prepares the data for writing to BigQuery by building a TableRow object
+      TableRow outRow = new TableRow()
+          .set("year", year).set("month", month)
+          .set("day", day).set("mean_temp", meanTemp);
+      c.output(outRow);
+    }
+  }
+
+  /**
+   * Implements 'filter' functionality.
+   *
+   * <p>Examines each row in the input table. Outputs only rows from the month
+   * monthFilter, which is passed in as a parameter during construction of this DoFn.
+   */
+  static class FilterSingleMonthDataFn extends DoFn<TableRow, TableRow> {
+    Integer monthFilter;
+
+    public FilterSingleMonthDataFn(Integer monthFilter) {
+      this.monthFilter = monthFilter;
+    }
+
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      Integer month;
+      month = (Integer) row.get("month");
+      if (month.equals(this.monthFilter)) {
+        c.output(row);
+      }
+    }
+  }
+
+  /**
+   * Examines each row (weather reading) in the input table. Output the temperature
+   * reading for that row ('mean_temp').
+   */
+  static class ExtractTempFn extends DoFn<TableRow, Double> {
+    @Override
+    public void processElement(ProcessContext c){
+      TableRow row = c.element();
+      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
+      c.output(meanTemp);
+    }
+  }
+
+
+
+  /*
+   * Finds the global mean of the mean_temp for each day/record, and outputs
+   * only data that has a mean temp larger than this global mean.
+   **/
+  static class BelowGlobalMean
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    Integer monthFilter;
+
+    public BelowGlobalMean(Integer monthFilter) {
+      this.monthFilter = monthFilter;
+    }
+
+
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // Extract the mean_temp from each row.
+      PCollection<Double> meanTemps = rows.apply(
+          ParDo.of(new ExtractTempFn()));
+
+      // Find the global mean, of all the mean_temp readings in the weather data,
+      // and prepare this singleton PCollectionView for use as a side input.
+      final PCollectionView<Double> globalMeanTemp =
+          meanTemps.apply(Mean.<Double>globally())
+               .apply(View.<Double>asSingleton());
+
+      // Rows filtered to remove all but a single month
+      PCollection<TableRow> monthFilteredRows = rows
+          .apply(ParDo.of(new FilterSingleMonthDataFn(monthFilter)));
+
+      // Then, use the global mean as a side input, to further filter the weather data.
+      // By using a side input to pass in the filtering criteria, we can use a value
+      // that is computed earlier in pipeline execution.
+      // We'll only output readings with temperatures below this mean.
+      PCollection<TableRow> filteredRows = monthFilteredRows
+          .apply(ParDo
+              .named("ParseAndFilter")
+              .withSideInputs(globalMeanTemp)
+              .of(new DoFn<TableRow, TableRow>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  Double meanTemp = Double.parseDouble(c.element().get("mean_temp").toString());
+                  Double gTemp = c.sideInput(globalMeanTemp);
+                  if (meanTemp < gTemp) {
+                    c.output(c.element());
+                  }
+                }
+              }));
+
+      return filteredRows;
+    }
+  }
+
+
+  /**
+   * Options supported by {@link FilterExamples}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(WEATHER_SAMPLES_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("Table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>. "
+        + "The dataset_id must already exist")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+
+    @Description("Numeric value of month to filter on")
+    @Default.Integer(MONTH_TO_FILTER)
+    Integer getMonthFilter();
+    void setMonthFilter(Integer value);
+  }
+
+  /**
+   * Helper method to build the table schema for the output table.
+   */
+  private static TableSchema buildWeatherSchemaProjection() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("year").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("day").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("mean_temp").setType("FLOAT"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    TableSchema schema = buildWeatherSchemaProjection();
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(ParDo.of(new ProjectionFn()))
+     .apply(new BelowGlobalMean(options.getMonthFilter()))
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}

[54/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
deleted file mode 100644
index e3e88c2..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-
-import org.apache.avro.reflect.Nullable;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A Dataflow Example that runs in both batch and streaming modes with traffic sensor data.
- * You can configure the running mode by setting {@literal --streaming} to true or false.
- *
- * <p>Concepts: The batch and streaming runners, GroupByKey, sliding windows, and
- * Google Cloud Pub/Sub topic injection.
- *
- * <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
- * it calculates the average speed over the window for some small set of predefined 'routes',
- * and looks for 'slowdowns' in those routes. It writes its results to a BigQuery table.
- *
- * <p>In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
- *
- * <p>In streaming mode, the pipeline reads the data from a Pub/Sub topic.
- * By default, the example will run a separate pipeline to inject the data from the default
- * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
- * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
- * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
- * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
- * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
- * is provided in
- * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
- *
- * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
- * from the example common package (there are no defaults for a general Dataflow pipeline).
- * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
- * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
- * the example will try to create them.
- *
- * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
- * and then exits.
- */
-
-public class TrafficRoutes {
-
-  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
-  private static final Integer VALID_INPUTS = 4999;
-
-  // Instantiate some small predefined San Diego routes to analyze
-  static Map<String, String> sdStations = buildStationInfo();
-  static final int WINDOW_DURATION = 3;  // Default sliding window duration in minutes
-  static final int WINDOW_SLIDE_EVERY = 1;  // Default window 'slide every' setting in minutes
-
-  /**
-   * This class holds information about a station reading's average speed.
-   */
-  @DefaultCoder(AvroCoder.class)
-  static class StationSpeed implements Comparable<StationSpeed> {
-    @Nullable String stationId;
-    @Nullable Double avgSpeed;
-    @Nullable Long timestamp;
-
-    public StationSpeed() {}
-
-    public StationSpeed(String stationId, Double avgSpeed, Long timestamp) {
-      this.stationId = stationId;
-      this.avgSpeed = avgSpeed;
-      this.timestamp = timestamp;
-    }
-
-    public String getStationId() {
-      return this.stationId;
-    }
-    public Double getAvgSpeed() {
-      return this.avgSpeed;
-    }
-
-    @Override
-    public int compareTo(StationSpeed other) {
-      return Long.compare(this.timestamp, other.timestamp);
-    }
-  }
-
-  /**
-   * This class holds information about a route's speed/slowdown.
-   */
-  @DefaultCoder(AvroCoder.class)
-  static class RouteInfo {
-    @Nullable String route;
-    @Nullable Double avgSpeed;
-    @Nullable Boolean slowdownEvent;
-
-
-    public RouteInfo() {}
-
-    public RouteInfo(String route, Double avgSpeed, Boolean slowdownEvent) {
-      this.route = route;
-      this.avgSpeed = avgSpeed;
-      this.slowdownEvent = slowdownEvent;
-    }
-
-    public String getRoute() {
-      return this.route;
-    }
-    public Double getAvgSpeed() {
-      return this.avgSpeed;
-    }
-    public Boolean getSlowdownEvent() {
-      return this.slowdownEvent;
-    }
-  }
-
-  /**
-   * Extract the timestamp field from the input string, and use it as the element timestamp.
-   */
-  static class ExtractTimestamps extends DoFn<String, String> {
-    private static final DateTimeFormatter dateTimeFormat =
-        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
-
-    @Override
-    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
-      String[] items = c.element().split(",");
-      String timestamp = tryParseTimestamp(items);
-      if (timestamp != null) {
-        try {
-          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
-        } catch (IllegalArgumentException e) {
-          // Skip the invalid input.
-        }
-      }
-    }
-  }
-
-  /**
-   * Filter out readings for the stations along predefined 'routes', and output
-   * (station, speed info) keyed on route.
-   */
-  static class ExtractStationSpeedFn extends DoFn<String, KV<String, StationSpeed>> {
-
-    @Override
-    public void processElement(ProcessContext c) {
-      String[] items = c.element().split(",");
-      String stationType = tryParseStationType(items);
-      // For this analysis, use only 'main line' station types
-      if (stationType != null && stationType.equals("ML")) {
-        Double avgSpeed = tryParseAvgSpeed(items);
-        String stationId = tryParseStationId(items);
-        // For this simple example, filter out everything but some hardwired routes.
-        if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
-          StationSpeed stationSpeed =
-              new StationSpeed(stationId, avgSpeed, c.timestamp().getMillis());
-          // The tuple key is the 'route' name stored in the 'sdStations' hash.
-          KV<String, StationSpeed> outputValue = KV.of(sdStations.get(stationId), stationSpeed);
-          c.output(outputValue);
-        }
-      }
-    }
-  }
-
-  /**
-   * For a given route, track average speed for the window. Calculate whether
-   * traffic is currently slowing down, via a predefined threshold. If a supermajority of
-   * speeds in this sliding window are less than the previous reading we call this a 'slowdown'.
-   * Note: these calculations are for example purposes only, and are unrealistic and oversimplified.
-   */
-  static class GatherStats
-      extends DoFn<KV<String, Iterable<StationSpeed>>, KV<String, RouteInfo>> {
-    @Override
-    public void processElement(ProcessContext c) throws IOException {
-      String route = c.element().getKey();
-      double speedSum = 0.0;
-      int speedCount = 0;
-      int speedups = 0;
-      int slowdowns = 0;
-      List<StationSpeed> infoList = Lists.newArrayList(c.element().getValue());
-      // StationSpeeds sort by embedded timestamp.
-      Collections.sort(infoList);
-      Map<String, Double> prevSpeeds = new HashMap<>();
-      // For all stations in the route, sum (non-null) speeds. Keep a count of the non-null speeds.
-      for (StationSpeed item : infoList) {
-        Double speed = item.getAvgSpeed();
-        if (speed != null) {
-          speedSum += speed;
-          speedCount++;
-          Double lastSpeed = prevSpeeds.get(item.getStationId());
-          if (lastSpeed != null) {
-            if (lastSpeed < speed) {
-              speedups += 1;
-            } else {
-              slowdowns += 1;
-            }
-          }
-          prevSpeeds.put(item.getStationId(), speed);
-        }
-      }
-      if (speedCount == 0) {
-        // No average to compute.
-        return;
-      }
-      double speedAvg = speedSum / speedCount;
-      boolean slowdownEvent = slowdowns >= 2 * speedups;
-      RouteInfo routeInfo = new RouteInfo(route, speedAvg, slowdownEvent);
-      c.output(KV.of(route, routeInfo));
-    }
-  }
-
-  /**
-   * Format the results of the slowdown calculations to a TableRow, to save to BigQuery.
-   */
-  static class FormatStatsFn extends DoFn<KV<String, RouteInfo>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-      RouteInfo routeInfo = c.element().getValue();
-      TableRow row = new TableRow()
-          .set("avg_speed", routeInfo.getAvgSpeed())
-          .set("slowdown_event", routeInfo.getSlowdownEvent())
-          .set("route", c.element().getKey())
-          .set("window_timestamp", c.timestamp().toString());
-      c.output(row);
-    }
-
-    /**
-     * Defines the BigQuery schema used for the output.
-     */
-    static TableSchema getSchema() {
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName("route").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
-      fields.add(new TableFieldSchema().setName("slowdown_event").setType("BOOLEAN"));
-      fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
-      TableSchema schema = new TableSchema().setFields(fields);
-      return schema;
-    }
-  }
-
-  /**
-   * This PTransform extracts speed info from traffic station readings.
-   * It groups the readings by 'route' and analyzes traffic slowdown for that route.
-   * Lastly, it formats the results for BigQuery.
-   */
-  static class TrackSpeed extends
-      PTransform<PCollection<KV<String, StationSpeed>>, PCollection<TableRow>> {
-    @Override
-    public PCollection<TableRow> apply(PCollection<KV<String, StationSpeed>> stationSpeed) {
-      // Apply a GroupByKey transform to collect a list of all station
-      // readings for a given route.
-      PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = stationSpeed.apply(
-        GroupByKey.<String, StationSpeed>create());
-
-      // Analyze 'slowdown' over the route readings.
-      PCollection<KV<String, RouteInfo>> stats = timeGroup.apply(ParDo.of(new GatherStats()));
-
-      // Format the results for writing to BigQuery
-      PCollection<TableRow> results = stats.apply(
-          ParDo.of(new FormatStatsFn()));
-
-      return results;
-    }
-  }
-
-  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
-    private final String inputFile;
-
-    public ReadFileAndExtractTimestamps(String inputFile) {
-      this.inputFile = inputFile;
-    }
-
-    @Override
-    public PCollection<String> apply(PBegin begin) {
-      return begin
-          .apply(TextIO.Read.from(inputFile))
-          .apply(ParDo.of(new ExtractTimestamps()));
-    }
-  }
-
-  /**
-  * Options supported by {@link TrafficRoutes}.
-  *
-  * <p>Inherits standard configuration options.
-  */
-  private interface TrafficRoutesOptions extends DataflowExampleOptions,
-      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
-    @Description("Input file to inject to Pub/Sub topic")
-    @Default.String("gs://dataflow-samples/traffic_sensor/"
-        + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
-    String getInputFile();
-    void setInputFile(String value);
-
-    @Description("Numeric value of sliding window duration, in minutes")
-    @Default.Integer(WINDOW_DURATION)
-    Integer getWindowDuration();
-    void setWindowDuration(Integer value);
-
-    @Description("Numeric value of window 'slide every' setting, in minutes")
-    @Default.Integer(WINDOW_SLIDE_EVERY)
-    Integer getWindowSlideEvery();
-    void setWindowSlideEvery(Integer value);
-
-    @Description("Whether to run the pipeline with unbounded input")
-    @Default.Boolean(false)
-    boolean isUnbounded();
-    void setUnbounded(boolean value);
-  }
-
-  /**
-   * Sets up and starts streaming pipeline.
-   *
-   * @throws IOException if there is a problem setting up resources
-   */
-  public static void main(String[] args) throws IOException {
-    TrafficRoutesOptions options = PipelineOptionsFactory.fromArgs(args)
-        .withValidation()
-        .as(TrafficRoutesOptions.class);
-
-    options.setBigQuerySchema(FormatStatsFn.getSchema());
-    // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
-
-    Pipeline pipeline = Pipeline.create(options);
-    TableReference tableRef = new TableReference();
-    tableRef.setProjectId(options.getProject());
-    tableRef.setDatasetId(options.getBigQueryDataset());
-    tableRef.setTableId(options.getBigQueryTable());
-
-    PCollection<String> input;
-    if (options.isUnbounded()) {
-      // Read unbounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()));
-    } else {
-      // Read bounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
-
-      // To read bounded TextIO files, use:
-      // input = pipeline.apply(TextIO.Read.from(options.getInputFile()))
-      //    .apply(ParDo.of(new ExtractTimestamps()));
-    }
-    input
-        // row... => <station route, station speed> ...
-        .apply(ParDo.of(new ExtractStationSpeedFn()))
-        // map the incoming data stream into sliding windows.
-        // The default window duration values work well if you're running the accompanying Pub/Sub
-        // generator script without the --replay flag, so that there are no simulated pauses in
-        // the sensor data publication. You may want to adjust the values otherwise.
-        .apply(Window.<KV<String, StationSpeed>>into(SlidingWindows.of(
-            Duration.standardMinutes(options.getWindowDuration())).
-            every(Duration.standardMinutes(options.getWindowSlideEvery()))))
-        .apply(new TrackSpeed())
-        .apply(BigQueryIO.Write.to(tableRef)
-            .withSchema(FormatStatsFn.getSchema()));
-
-    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-    if (!Strings.isNullOrEmpty(options.getInputFile())
-        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
-      dataflowUtils.runInjectorPipeline(
-          new ReadFileAndExtractTimestamps(options.getInputFile()),
-          options.getPubsubTopic(),
-          PUBSUB_TIMESTAMP_LABEL_KEY);
-    }
-
-    // Run the pipeline.
-    PipelineResult result = pipeline.run();
-
-    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
-    dataflowUtils.waitToFinish(result);
-  }
-
-  private static Double tryParseAvgSpeed(String[] inputItems) {
-    try {
-      return Double.parseDouble(tryParseString(inputItems, 9));
-    } catch (NumberFormatException e) {
-      return null;
-    } catch (NullPointerException e) {
-      return null;
-    }
-  }
-
-  private static String tryParseStationType(String[] inputItems) {
-    return tryParseString(inputItems, 4);
-  }
-
-  private static String tryParseStationId(String[] inputItems) {
-    return tryParseString(inputItems, 1);
-  }
-
-  private static String tryParseTimestamp(String[] inputItems) {
-    return tryParseString(inputItems, 0);
-  }
-
-  private static String tryParseString(String[] inputItems, int index) {
-    return inputItems.length >= index ? inputItems[index] : null;
-  }
-
-  /**
-   * Define some small hard-wired San Diego 'routes' to track based on sensor station ID.
-   */
-  private static Map<String, String> buildStationInfo() {
-    Map<String, String> stations = new Hashtable<String, String>();
-      stations.put("1108413", "SDRoute1"); // from freeway 805 S
-      stations.put("1108699", "SDRoute2"); // from freeway 78 E
-      stations.put("1108702", "SDRoute2");
-    return stations;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
deleted file mode 100644
index 503bcad..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * An example that reads the public samples of weather data from BigQuery, counts the number of
- * tornadoes that occur in each month, and writes the results to BigQuery.
- *
- * <p>Concepts: Reading/writing BigQuery; counting a PCollection; user-defined PTransforms
- *
- * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
- * table.
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output, with the form
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
- *
- * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
- * and can be overridden with {@code --input}.
- */
-public class BigQueryTornadoes {
-  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
-  private static final String WEATHER_SAMPLES_TABLE =
-      "clouddataflow-readonly:samples.weather_stations";
-
-  /**
-   * Examines each row in the input table. If a tornado was recorded
-   * in that sample, the month in which it occurred is output.
-   */
-  static class ExtractTornadoesFn extends DoFn<TableRow, Integer> {
-    @Override
-    public void processElement(ProcessContext c){
-      TableRow row = c.element();
-      if ((Boolean) row.get("tornado")) {
-        c.output(Integer.parseInt((String) row.get("month")));
-      }
-    }
-  }
-
-  /**
-   * Prepares the data for writing to BigQuery by building a TableRow object containing an
-   * integer representation of month and the number of tornadoes that occurred in each month.
-   */
-  static class FormatCountsFn extends DoFn<KV<Integer, Long>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = new TableRow()
-          .set("month", c.element().getKey())
-          .set("tornado_count", c.element().getValue());
-      c.output(row);
-    }
-  }
-
-  /**
-   * Takes rows from a table and generates a table of counts.
-   *
-   * <p>The input schema is described by
-   * https://developers.google.com/bigquery/docs/dataset-gsod .
-   * The output contains the total number of tornadoes found in each month in
-   * the following schema:
-   * <ul>
-   *   <li>month: integer</li>
-   *   <li>tornado_count: integer</li>
-   * </ul>
-   */
-  static class CountTornadoes
-      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    @Override
-    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
-
-      // row... => month...
-      PCollection<Integer> tornadoes = rows.apply(
-          ParDo.of(new ExtractTornadoesFn()));
-
-      // month... => <month,count>...
-      PCollection<KV<Integer, Long>> tornadoCounts =
-          tornadoes.apply(Count.<Integer>perElement());
-
-      // <month,count>... => row...
-      PCollection<TableRow> results = tornadoCounts.apply(
-          ParDo.of(new FormatCountsFn()));
-
-      return results;
-    }
-  }
-
-  /**
-   * Options supported by {@link BigQueryTornadoes}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description("Table to read from, specified as "
-        + "<project_id>:<dataset_id>.<table_id>")
-    @Default.String(WEATHER_SAMPLES_TABLE)
-    String getInput();
-    void setInput(String value);
-
-    @Description("BigQuery table to write to, specified as "
-        + "<project_id>:<dataset_id>.<table_id>. The dataset must already exist.")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  public static void main(String[] args) {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-
-    Pipeline p = Pipeline.create(options);
-
-    // Build the table schema for the output table.
-    List<TableFieldSchema> fields = new ArrayList<>();
-    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("tornado_count").setType("INTEGER"));
-    TableSchema schema = new TableSchema().setFields(fields);
-
-    p.apply(BigQueryIO.Read.from(options.getInput()))
-     .apply(new CountTornadoes())
-     .apply(BigQueryIO.Write
-        .to(options.getOutput())
-        .withSchema(schema)
-        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
deleted file mode 100644
index 9540dd4..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * An example that reads the public 'Shakespeare' data, and for each word in
- * the dataset that is over a given length, generates a string containing the
- * list of play names in which that word appears, and saves this information
- * to a bigquery table.
- *
- * <p>Concepts: the Combine.perKey transform, which lets you combine the values in a
- * key-grouped Collection, and how to use an Aggregator to track information in the
- * Monitoring UI.
- *
- * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
- * table.
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://<STAGING DIRECTORY>
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
- *
- * <p>The BigQuery input table defaults to {@code publicdata:samples.shakespeare} and can
- * be overridden with {@code --input}.
- */
-public class CombinePerKeyExamples {
-  // Use the shakespeare public BigQuery sample
-  private static final String SHAKESPEARE_TABLE =
-      "publicdata:samples.shakespeare";
-  // We'll track words >= this word length across all plays in the table.
-  private static final int MIN_WORD_LENGTH = 9;
-
-  /**
-   * Examines each row in the input table. If the word is greater than or equal to MIN_WORD_LENGTH,
-   * outputs word, play_name.
-   */
-  static class ExtractLargeWordsFn extends DoFn<TableRow, KV<String, String>> {
-    private final Aggregator<Long, Long> smallerWords =
-        createAggregator("smallerWords", new Sum.SumLongFn());
-
-    @Override
-    public void processElement(ProcessContext c){
-      TableRow row = c.element();
-      String playName = (String) row.get("corpus");
-      String word = (String) row.get("word");
-      if (word.length() >= MIN_WORD_LENGTH) {
-        c.output(KV.of(word, playName));
-      } else {
-        // Track how many smaller words we're not including. This information will be
-        // visible in the Monitoring UI.
-        smallerWords.addValue(1L);
-      }
-    }
-  }
-
-
-  /**
-   * Prepares the data for writing to BigQuery by building a TableRow object
-   * containing a word with a string listing the plays in which it appeared.
-   */
-  static class FormatShakespeareOutputFn extends DoFn<KV<String, String>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = new TableRow()
-          .set("word", c.element().getKey())
-          .set("all_plays", c.element().getValue());
-      c.output(row);
-    }
-  }
-
-  /**
-   * Reads the public 'Shakespeare' data, and for each word in the dataset
-   * over a given length, generates a string containing the list of play names
-   * in which that word appears. It does this via the Combine.perKey
-   * transform, with the ConcatWords combine function.
-   *
-   * <p>Combine.perKey is similar to a GroupByKey followed by a ParDo, but
-   * has more restricted semantics that allow it to be executed more
-   * efficiently. These records are then formatted as BQ table rows.
-   */
-  static class PlaysForWord
-      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    @Override
-    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
-
-      // row... => <word, play_name> ...
-      PCollection<KV<String, String>> words = rows.apply(
-          ParDo.of(new ExtractLargeWordsFn()));
-
-      // word, play_name => word, all_plays ...
-      PCollection<KV<String, String>> wordAllPlays =
-          words.apply(Combine.<String, String>perKey(
-              new ConcatWords()));
-
-      // <word, all_plays>... => row...
-      PCollection<TableRow> results = wordAllPlays.apply(
-          ParDo.of(new FormatShakespeareOutputFn()));
-
-      return results;
-    }
-  }
-
-  /**
-   * A 'combine function' used with the Combine.perKey transform. Builds a
-   * comma-separated string of all input items.  So, it will build a string
-   * containing all the different Shakespeare plays in which the given input
-   * word has appeared.
-   */
-  public static class ConcatWords implements SerializableFunction<Iterable<String>, String> {
-    @Override
-    public String apply(Iterable<String> input) {
-      StringBuilder all = new StringBuilder();
-      for (String item : input) {
-        if (!item.isEmpty()) {
-          if (all.length() == 0) {
-            all.append(item);
-          } else {
-            all.append(",");
-            all.append(item);
-          }
-        }
-      }
-      return all.toString();
-    }
-  }
-
-  /**
-   * Options supported by {@link CombinePerKeyExamples}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description("Table to read from, specified as "
-        + "<project_id>:<dataset_id>.<table_id>")
-    @Default.String(SHAKESPEARE_TABLE)
-    String getInput();
-    void setInput(String value);
-
-    @Description("Table to write to, specified as "
-        + "<project_id>:<dataset_id>.<table_id>. "
-        + "The dataset_id must already exist")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  public static void main(String[] args)
-      throws Exception {
-
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline p = Pipeline.create(options);
-
-    // Build the table schema for the output table.
-    List<TableFieldSchema> fields = new ArrayList<>();
-    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
-    fields.add(new TableFieldSchema().setName("all_plays").setType("STRING"));
-    TableSchema schema = new TableSchema().setFields(fields);
-
-    p.apply(BigQueryIO.Read.from(options.getInput()))
-     .apply(new PlaysForWord())
-     .apply(BigQueryIO.Write
-        .to(options.getOutput())
-        .withSchema(schema)
-        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
deleted file mode 100644
index eaf1e20..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import static com.google.api.services.datastore.client.DatastoreHelper.getPropertyMap;
-import static com.google.api.services.datastore.client.DatastoreHelper.getString;
-import static com.google.api.services.datastore.client.DatastoreHelper.makeFilter;
-import static com.google.api.services.datastore.client.DatastoreHelper.makeKey;
-import static com.google.api.services.datastore.client.DatastoreHelper.makeValue;
-
-import com.google.api.services.datastore.DatastoreV1.Entity;
-import com.google.api.services.datastore.DatastoreV1.Key;
-import com.google.api.services.datastore.DatastoreV1.Property;
-import com.google.api.services.datastore.DatastoreV1.PropertyFilter;
-import com.google.api.services.datastore.DatastoreV1.Query;
-import com.google.api.services.datastore.DatastoreV1.Value;
-import com.google.cloud.dataflow.examples.WordCount;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.DatastoreIO;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-
-import java.util.Map;
-import java.util.UUID;
-
-import javax.annotation.Nullable;
-
-/**
- * A WordCount example using DatastoreIO.
- *
- * <p>This example shows how to use DatastoreIO to read from Datastore and
- * write the results to Cloud Storage.  Note that this example will write
- * data to Datastore, which may incur charge for Datastore operations.
- *
- * <p>To run this example, users need to use gcloud to get credential for Datastore:
- * <pre>{@code
- * $ gcloud auth login
- * }</pre>
- *
- * <p>To run this pipeline locally, the following options must be provided:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --dataset=YOUR_DATASET_ID
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PATH]
- * }</pre>
- *
- * <p>To run this example using Dataflow service, you must additionally
- * provide either {@literal --stagingLocation} or {@literal --tempLocation}, and
- * select one of the Dataflow pipeline runners, eg
- * {@literal --runner=BlockingDataflowPipelineRunner}.
- *
- * <p><b>Note:</b> this example creates entities with <i>Ancestor keys</i> to ensure that all
- * entities created are in the same entity group. Similarly, the query used to read from the Cloud
- * Datastore uses an <i>Ancestor filter</i>. Ancestors are used to ensure strongly consistent
- * results in Cloud Datastore. For more information, see the Cloud Datastore documentation on
- * <a href="https://cloud.google.com/datastore/docs/concepts/structuring_for_strong_consistency">
- * Structing Data for Strong Consistency</a>.
- */
-public class DatastoreWordCount {
-
-  /**
-   * A DoFn that gets the content of an entity (one line in a
-   * Shakespeare play) and converts it to a string.
-   */
-  static class GetContentFn extends DoFn<Entity, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      Map<String, Value> props = getPropertyMap(c.element());
-      Value value = props.get("content");
-      if (value != null) {
-        c.output(getString(value));
-      }
-    }
-  }
-
-  /**
-   * A helper function to create the ancestor key for all created and queried entities.
-   *
-   * <p>We use ancestor keys and ancestor queries for strong consistency. See
-   * {@link DatastoreWordCount} javadoc for more information.
-   */
-  static Key makeAncestorKey(@Nullable String namespace, String kind) {
-    Key.Builder keyBuilder = makeKey(kind, "root");
-    if (namespace != null) {
-      keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
-    }
-    return keyBuilder.build();
-  }
-
-  /**
-   * A DoFn that creates entity for every line in Shakespeare.
-   */
-  static class CreateEntityFn extends DoFn<String, Entity> {
-    private final String namespace;
-    private final String kind;
-    private final Key ancestorKey;
-
-    CreateEntityFn(String namespace, String kind) {
-      this.namespace = namespace;
-      this.kind = kind;
-
-      // Build the ancestor key for all created entities once, including the namespace.
-      ancestorKey = makeAncestorKey(namespace, kind);
-    }
-
-    public Entity makeEntity(String content) {
-      Entity.Builder entityBuilder = Entity.newBuilder();
-
-      // All created entities have the same ancestor Key.
-      Key.Builder keyBuilder = makeKey(ancestorKey, kind, UUID.randomUUID().toString());
-      // NOTE: Namespace is not inherited between keys created with DatastoreHelper.makeKey, so
-      // we must set the namespace on keyBuilder. TODO: Once partitionId inheritance is added,
-      // we can simplify this code.
-      if (namespace != null) {
-        keyBuilder.getPartitionIdBuilder().setNamespace(namespace);
-      }
-
-      entityBuilder.setKey(keyBuilder.build());
-      entityBuilder.addProperty(Property.newBuilder().setName("content")
-          .setValue(Value.newBuilder().setStringValue(content)));
-      return entityBuilder.build();
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(makeEntity(c.element()));
-    }
-  }
-
-  /**
-   * Options supported by {@link DatastoreWordCount}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  public static interface Options extends PipelineOptions {
-    @Description("Path of the file to read from and store to Datastore")
-    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
-    String getInput();
-    void setInput(String value);
-
-    @Description("Path of the file to write to")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-
-    @Description("Dataset ID to read from datastore")
-    @Validation.Required
-    String getDataset();
-    void setDataset(String value);
-
-    @Description("Dataset entity kind")
-    @Default.String("shakespeare-demo")
-    String getKind();
-    void setKind(String value);
-
-    @Description("Dataset namespace")
-    String getNamespace();
-    void setNamespace(@Nullable String value);
-
-    @Description("Read an existing dataset, do not write first")
-    boolean isReadOnly();
-    void setReadOnly(boolean value);
-
-    @Description("Number of output shards")
-    @Default.Integer(0) // If the system should choose automatically.
-    int getNumShards();
-    void setNumShards(int value);
-  }
-
-  /**
-   * An example that creates a pipeline to populate DatastoreIO from a
-   * text input.  Forces use of DirectPipelineRunner for local execution mode.
-   */
-  public static void writeDataToDatastore(Options options) {
-      Pipeline p = Pipeline.create(options);
-      p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
-       .apply(ParDo.of(new CreateEntityFn(options.getNamespace(), options.getKind())))
-       .apply(DatastoreIO.writeTo(options.getDataset()));
-
-      p.run();
-  }
-
-  /**
-   * Build a Cloud Datastore ancestor query for the specified {@link Options#getNamespace} and
-   * {@link Options#getKind}.
-   *
-   * <p>We use ancestor keys and ancestor queries for strong consistency. See
-   * {@link DatastoreWordCount} javadoc for more information.
-   *
-   * @see <a href="https://cloud.google.com/datastore/docs/concepts/queries#Datastore_Ancestor_filters">Ancestor filters</a>
-   */
-  static Query makeAncestorKindQuery(Options options) {
-    Query.Builder q = Query.newBuilder();
-    q.addKindBuilder().setName(options.getKind());
-    q.setFilter(makeFilter(
-        "__key__",
-        PropertyFilter.Operator.HAS_ANCESTOR,
-        makeValue(makeAncestorKey(options.getNamespace(), options.getKind()))));
-    return q.build();
-  }
-
-  /**
-   * An example that creates a pipeline to do DatastoreIO.Read from Datastore.
-   */
-  public static void readDataFromDatastore(Options options) {
-    Query query = makeAncestorKindQuery(options);
-
-    // For Datastore sources, the read namespace can be set on the entire query.
-    DatastoreIO.Source source = DatastoreIO.source()
-        .withDataset(options.getDataset())
-        .withQuery(query)
-        .withNamespace(options.getNamespace());
-
-    Pipeline p = Pipeline.create(options);
-    p.apply("ReadShakespeareFromDatastore", Read.from(source))
-        .apply("StringifyEntity", ParDo.of(new GetContentFn()))
-        .apply("CountWords", new WordCount.CountWords())
-        .apply("PrintWordCount", MapElements.via(new WordCount.FormatAsTextFn()))
-        .apply("WriteLines", TextIO.Write.to(options.getOutput())
-            .withNumShards(options.getNumShards()));
-    p.run();
-  }
-
-  /**
-   * An example to demo how to use {@link DatastoreIO}.  The runner here is
-   * customizable, which means users could pass either {@code DirectPipelineRunner}
-   * or {@code DataflowPipelineRunner} in the pipeline options.
-   */
-  public static void main(String args[]) {
-    // The options are used in two places, for Dataflow service, and
-    // building DatastoreIO.Read object
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-
-    if (!options.isReadOnly()) {
-      // First example: write data to Datastore for reading later.
-      //
-      // NOTE: this write does not delete any existing Entities in the Datastore, so if run
-      // multiple times with the same output dataset, there may be duplicate entries. The
-      // Datastore Query tool in the Google Developers Console can be used to inspect or erase all
-      // entries with a particular namespace and/or kind.
-      DatastoreWordCount.writeDataToDatastore(options);
-    }
-
-    // Second example: do parallel read from Datastore.
-    DatastoreWordCount.readDataFromDatastore(options);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
deleted file mode 100644
index 9873561..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-
-/**
- * This example uses as input Shakespeare's plays as plaintext files, and will remove any
- * duplicate lines across all the files. (The output does not preserve any input order).
- *
- * <p>Concepts: the RemoveDuplicates transform, and how to wire transforms together.
- * Demonstrates {@link com.google.cloud.dataflow.sdk.io.TextIO.Read}/
- * {@link RemoveDuplicates}/{@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- *   --project=YOUR_PROJECT_ID
- * and a local output file or output prefix on GCS:
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * and an output prefix on GCS:
- *   --output=gs://YOUR_OUTPUT_PREFIX
- *
- * <p>The input defaults to {@code gs://dataflow-samples/shakespeare/*} and can be
- * overridden with {@code --input}.
- */
-public class DeDupExample {
-
-  /**
-   * Options supported by {@link DeDupExample}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description("Path to the directory or GCS prefix containing files to read from")
-    @Default.String("gs://dataflow-samples/shakespeare/*")
-    String getInput();
-    void setInput(String value);
-
-    @Description("Path of the file to write to")
-    @Default.InstanceFactory(OutputFactory.class)
-    String getOutput();
-    void setOutput(String value);
-
-    /** Returns gs://${STAGING_LOCATION}/"deduped.txt". */
-    public static class OutputFactory implements DefaultValueFactory<String> {
-      @Override
-      public String create(PipelineOptions options) {
-        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-        if (dataflowOptions.getStagingLocation() != null) {
-          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
-              .resolve("deduped.txt").toString();
-        } else {
-          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
-        }
-      }
-    }
-  }
-
-
-  public static void main(String[] args)
-      throws Exception {
-
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline p = Pipeline.create(options);
-
-    p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
-     .apply(RemoveDuplicates.<String>create())
-     .apply(TextIO.Write.named("DedupedShakespeare")
-         .to(options.getOutput()));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
deleted file mode 100644
index 781873a..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Mean;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- * This is an example that demonstrates several approaches to filtering, and use of the Mean
- * transform. It shows how to dynamically set parameters by defining and using new pipeline options,
- * and how to use a value derived by the pipeline.
- *
- * <p>Concepts: The Mean transform; Options configuration; using pipeline-derived data as a side
- * input; approaches to filtering, selection, and projection.
- *
- * <p>The example reads public samples of weather data from BigQuery. It performs a
- * projection on the data, finds the global mean of the temperature readings, filters on readings
- * for a single given month, and then outputs only data (for that month) that has a mean temp
- * smaller than the derived global mean.
-*
- * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
- * table.
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- *   [--monthFilter=<month_number>]
- * }
- * </pre>
- * where optional parameter {@code --monthFilter} is set to a number 1-12.
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- *   [--monthFilter=<month_number>]
- * }
- * </pre>
- * where optional parameter {@code --monthFilter} is set to a number 1-12.
- *
- * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
- * and can be overridden with {@code --input}.
- */
-public class FilterExamples {
-  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
-  private static final String WEATHER_SAMPLES_TABLE =
-      "clouddataflow-readonly:samples.weather_stations";
-  static final Logger LOG = Logger.getLogger(FilterExamples.class.getName());
-  static final int MONTH_TO_FILTER = 7;
-
-  /**
-   * Examines each row in the input table. Outputs only the subset of the cells this example
-   * is interested in-- the mean_temp and year, month, and day-- as a bigquery table row.
-   */
-  static class ProjectionFn extends DoFn<TableRow, TableRow> {
-    @Override
-    public void processElement(ProcessContext c){
-      TableRow row = c.element();
-      // Grab year, month, day, mean_temp from the row
-      Integer year = Integer.parseInt((String) row.get("year"));
-      Integer month = Integer.parseInt((String) row.get("month"));
-      Integer day = Integer.parseInt((String) row.get("day"));
-      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
-      // Prepares the data for writing to BigQuery by building a TableRow object
-      TableRow outRow = new TableRow()
-          .set("year", year).set("month", month)
-          .set("day", day).set("mean_temp", meanTemp);
-      c.output(outRow);
-    }
-  }
-
-  /**
-   * Implements 'filter' functionality.
-   *
-   * <p>Examines each row in the input table. Outputs only rows from the month
-   * monthFilter, which is passed in as a parameter during construction of this DoFn.
-   */
-  static class FilterSingleMonthDataFn extends DoFn<TableRow, TableRow> {
-    Integer monthFilter;
-
-    public FilterSingleMonthDataFn(Integer monthFilter) {
-      this.monthFilter = monthFilter;
-    }
-
-    @Override
-    public void processElement(ProcessContext c){
-      TableRow row = c.element();
-      Integer month;
-      month = (Integer) row.get("month");
-      if (month.equals(this.monthFilter)) {
-        c.output(row);
-      }
-    }
-  }
-
-  /**
-   * Examines each row (weather reading) in the input table. Output the temperature
-   * reading for that row ('mean_temp').
-   */
-  static class ExtractTempFn extends DoFn<TableRow, Double> {
-    @Override
-    public void processElement(ProcessContext c){
-      TableRow row = c.element();
-      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
-      c.output(meanTemp);
-    }
-  }
-
-
-
-  /*
-   * Finds the global mean of the mean_temp for each day/record, and outputs
-   * only data that has a mean temp larger than this global mean.
-   **/
-  static class BelowGlobalMean
-      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    Integer monthFilter;
-
-    public BelowGlobalMean(Integer monthFilter) {
-      this.monthFilter = monthFilter;
-    }
-
-
-    @Override
-    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
-
-      // Extract the mean_temp from each row.
-      PCollection<Double> meanTemps = rows.apply(
-          ParDo.of(new ExtractTempFn()));
-
-      // Find the global mean, of all the mean_temp readings in the weather data,
-      // and prepare this singleton PCollectionView for use as a side input.
-      final PCollectionView<Double> globalMeanTemp =
-          meanTemps.apply(Mean.<Double>globally())
-               .apply(View.<Double>asSingleton());
-
-      // Rows filtered to remove all but a single month
-      PCollection<TableRow> monthFilteredRows = rows
-          .apply(ParDo.of(new FilterSingleMonthDataFn(monthFilter)));
-
-      // Then, use the global mean as a side input, to further filter the weather data.
-      // By using a side input to pass in the filtering criteria, we can use a value
-      // that is computed earlier in pipeline execution.
-      // We'll only output readings with temperatures below this mean.
-      PCollection<TableRow> filteredRows = monthFilteredRows
-          .apply(ParDo
-              .named("ParseAndFilter")
-              .withSideInputs(globalMeanTemp)
-              .of(new DoFn<TableRow, TableRow>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  Double meanTemp = Double.parseDouble(c.element().get("mean_temp").toString());
-                  Double gTemp = c.sideInput(globalMeanTemp);
-                  if (meanTemp < gTemp) {
-                    c.output(c.element());
-                  }
-                }
-              }));
-
-      return filteredRows;
-    }
-  }
-
-
-  /**
-   * Options supported by {@link FilterExamples}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description("Table to read from, specified as "
-        + "<project_id>:<dataset_id>.<table_id>")
-    @Default.String(WEATHER_SAMPLES_TABLE)
-    String getInput();
-    void setInput(String value);
-
-    @Description("Table to write to, specified as "
-        + "<project_id>:<dataset_id>.<table_id>. "
-        + "The dataset_id must already exist")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-
-    @Description("Numeric value of month to filter on")
-    @Default.Integer(MONTH_TO_FILTER)
-    Integer getMonthFilter();
-    void setMonthFilter(Integer value);
-  }
-
-  /**
-   * Helper method to build the table schema for the output table.
-   */
-  private static TableSchema buildWeatherSchemaProjection() {
-    List<TableFieldSchema> fields = new ArrayList<>();
-    fields.add(new TableFieldSchema().setName("year").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("day").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("mean_temp").setType("FLOAT"));
-    TableSchema schema = new TableSchema().setFields(fields);
-    return schema;
-  }
-
-  public static void main(String[] args)
-      throws Exception {
-
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline p = Pipeline.create(options);
-
-    TableSchema schema = buildWeatherSchemaProjection();
-
-    p.apply(BigQueryIO.Read.from(options.getInput()))
-     .apply(ParDo.of(new ProjectionFn()))
-     .apply(new BelowGlobalMean(options.getMonthFilter()))
-     .apply(BigQueryIO.Write
-        .to(options.getOutput())
-        .withSchema(schema)
-        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
deleted file mode 100644
index 745c5d6..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
-import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-/**
- * This example shows how to do a join on two collections.
- * It uses a sample of the GDELT 'world event' data (http://goo.gl/OB6oin), joining the event
- * 'action' country code against a table that maps country codes to country names.
- *
- * <p>Concepts: Join operation; multiple input sources.
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and a local output file or output prefix on GCS:
- * <pre>{@code
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
- */
-public class JoinExamples {
-
-  // A 1000-row sample of the GDELT data here: gdelt-bq:full.events.
-  private static final String GDELT_EVENTS_TABLE =
-      "clouddataflow-readonly:samples.gdelt_sample";
-  // A table that maps country codes to country names.
-  private static final String COUNTRY_CODES =
-      "gdelt-bq:full.crosswalk_geocountrycodetohuman";
-
-  /**
-   * Join two collections, using country code as the key.
-   */
-  static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
-      PCollection<TableRow> countryCodes) throws Exception {
-
-    final TupleTag<String> eventInfoTag = new TupleTag<String>();
-    final TupleTag<String> countryInfoTag = new TupleTag<String>();
-
-    // transform both input collections to tuple collections, where the keys are country
-    // codes in both cases.
-    PCollection<KV<String, String>> eventInfo = eventsTable.apply(
-        ParDo.of(new ExtractEventDataFn()));
-    PCollection<KV<String, String>> countryInfo = countryCodes.apply(
-        ParDo.of(new ExtractCountryInfoFn()));
-
-    // country code 'key' -> CGBKR (<event info>, <country name>)
-    PCollection<KV<String, CoGbkResult>> kvpCollection = KeyedPCollectionTuple
-        .of(eventInfoTag, eventInfo)
-        .and(countryInfoTag, countryInfo)
-        .apply(CoGroupByKey.<String>create());
-
-    // Process the CoGbkResult elements generated by the CoGroupByKey transform.
-    // country code 'key' -> string of <event info>, <country name>
-    PCollection<KV<String, String>> finalResultCollection =
-      kvpCollection.apply(ParDo.named("Process").of(
-        new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
-          @Override
-          public void processElement(ProcessContext c) {
-            KV<String, CoGbkResult> e = c.element();
-            String countryCode = e.getKey();
-            String countryName = "none";
-            countryName = e.getValue().getOnly(countryInfoTag);
-            for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
-              // Generate a string that combines information from both collection values
-              c.output(KV.of(countryCode, "Country name: " + countryName
-                      + ", Event info: " + eventInfo));
-            }
-          }
-      }));
-
-    // write to GCS
-    PCollection<String> formattedResults = finalResultCollection
-        .apply(ParDo.named("Format").of(new DoFn<KV<String, String>, String>() {
-          @Override
-          public void processElement(ProcessContext c) {
-            String outputstring = "Country code: " + c.element().getKey()
-                + ", " + c.element().getValue();
-            c.output(outputstring);
-          }
-        }));
-    return formattedResults;
-  }
-
-  /**
-   * Examines each row (event) in the input table. Output a KV with the key the country
-   * code of the event, and the value a string encoding event information.
-   */
-  static class ExtractEventDataFn extends DoFn<TableRow, KV<String, String>> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = c.element();
-      String countryCode = (String) row.get("ActionGeo_CountryCode");
-      String sqlDate = (String) row.get("SQLDATE");
-      String actor1Name = (String) row.get("Actor1Name");
-      String sourceUrl = (String) row.get("SOURCEURL");
-      String eventInfo = "Date: " + sqlDate + ", Actor1: " + actor1Name + ", url: " + sourceUrl;
-      c.output(KV.of(countryCode, eventInfo));
-    }
-  }
-
-
-  /**
-   * Examines each row (country info) in the input table. Output a KV with the key the country
-   * code, and the value the country name.
-   */
-  static class ExtractCountryInfoFn extends DoFn<TableRow, KV<String, String>> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = c.element();
-      String countryCode = (String) row.get("FIPSCC");
-      String countryName = (String) row.get("HumanName");
-      c.output(KV.of(countryCode, countryName));
-    }
-  }
-
-
-  /**
-   * Options supported by {@link JoinExamples}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description("Path of the file to write to")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  public static void main(String[] args) throws Exception {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline p = Pipeline.create(options);
-    // the following two 'applys' create multiple inputs to our pipeline, one for each
-    // of our two input sources.
-    PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE));
-    PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES));
-    PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
-    formattedResults.apply(TextIO.Write.to(options.getOutput()));
-    p.run();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
deleted file mode 100644
index 1c26d0f..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Max;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * An example that reads the public samples of weather data from BigQuery, and finds
- * the maximum temperature ('mean_temp') for each month.
- *
- * <p>Concepts: The 'Max' statistical combination function, and how to find the max per
- * key group.
- *
- * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
- * table.
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output, with the form
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
- *
- * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations }
- * and can be overridden with {@code --input}.
- */
-public class MaxPerKeyExamples {
-  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
-  private static final String WEATHER_SAMPLES_TABLE =
-      "clouddataflow-readonly:samples.weather_stations";
-
-  /**
-   * Examines each row (weather reading) in the input table. Output the month of the reading,
-   * and the mean_temp.
-   */
-  static class ExtractTempFn extends DoFn<TableRow, KV<Integer, Double>> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = c.element();
-      Integer month = Integer.parseInt((String) row.get("month"));
-      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
-      c.output(KV.of(month, meanTemp));
-    }
-  }
-
-  /**
-   * Format the results to a TableRow, to save to BigQuery.
-   *
-   */
-  static class FormatMaxesFn extends DoFn<KV<Integer, Double>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = new TableRow()
-          .set("month", c.element().getKey())
-          .set("max_mean_temp", c.element().getValue());
-      c.output(row);
-    }
-  }
-
-  /**
-   * Reads rows from a weather data table, and finds the max mean_temp for each
-   * month via the 'Max' statistical combination function.
-   */
-  static class MaxMeanTemp
-      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
-    @Override
-    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
-
-      // row... => <month, mean_temp> ...
-      PCollection<KV<Integer, Double>> temps = rows.apply(
-          ParDo.of(new ExtractTempFn()));
-
-      // month, mean_temp... => <month, max mean temp>...
-      PCollection<KV<Integer, Double>> tempMaxes =
-          temps.apply(Max.<Integer>doublesPerKey());
-
-      // <month, max>... => row...
-      PCollection<TableRow> results = tempMaxes.apply(
-          ParDo.of(new FormatMaxesFn()));
-
-      return results;
-    }
-  }
-
-  /**
-   * Options supported by {@link MaxPerKeyExamples}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description("Table to read from, specified as "
-        + "<project_id>:<dataset_id>.<table_id>")
-    @Default.String(WEATHER_SAMPLES_TABLE)
-    String getInput();
-    void setInput(String value);
-
-    @Description("Table to write to, specified as "
-        + "<project_id>:<dataset_id>.<table_id>")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  public static void main(String[] args)
-      throws Exception {
-
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline p = Pipeline.create(options);
-
-    // Build the table schema for the output table.
-    List<TableFieldSchema> fields = new ArrayList<>();
-    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT"));
-    TableSchema schema = new TableSchema().setFields(fields);
-
-    p.apply(BigQueryIO.Read.from(options.getInput()))
-     .apply(new MaxMeanTemp())
-     .apply(BigQueryIO.Write
-        .to(options.getOutput())
-        .withSchema(schema)
-        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
deleted file mode 100644
index 99f3080..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-
-# "Cookbook" Examples
-
-This directory holds simple "cookbook" examples, which show how to define
-commonly-used data analysis patterns that you would likely incorporate into a
-larger Dataflow pipeline. They include:
-
- <ul>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java">BigQueryTornadoes</a>
-  &mdash; An example that reads the public samples of weather data from Google
-  BigQuery, counts the number of tornadoes that occur in each month, and
-  writes the results to BigQuery. Demonstrates reading/writing BigQuery,
-  counting a <code>PCollection</code>, and user-defined <code>PTransforms</code>.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
-  &mdash; An example that reads the public &quot;Shakespeare&quot; data, and for
-  each word in the dataset that exceeds a given length, generates a string
-  containing the list of play names in which that word appears.
-  Demonstrates the <code>Combine.perKey</code>
-  transform, which lets you combine the values in a key-grouped
-  <code>PCollection</code>.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java">DatastoreWordCount</a>
-  &mdash; An example that shows you how to read from Google Cloud Datastore.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java">DeDupExample</a>
-  &mdash; An example that uses Shakespeare's plays as plain text files, and
-  removes duplicate lines across all the files. Demonstrates the
-  <code>RemoveDuplicates</code>, <code>TextIO.Read</code>,
-  and <code>TextIO.Write</code> transforms, and how to wire transforms together.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java">FilterExamples</a>
-  &mdash; An example that shows different approaches to filtering, including
-  selection and projection. It also shows how to dynamically set parameters
-  by defining and using new pipeline options, and use how to use a value derived
-  by a pipeline. Demonstrates the <code>Mean</code> transform,
-  <code>Options</code> configuration, and using pipeline-derived data as a side
-  input.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java">JoinExamples</a>
-  &mdash; An example that shows how to join two collections. It uses a
-  sample of the <a href="http://goo.gl/OB6oin">GDELT &quot;world event&quot;
-  data</a>, joining the event <code>action</code> country code against a table
-  that maps country codes to country names. Demonstrates the <code>Join</code>
-  operation, and using multiple input sources.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java">MaxPerKeyExamples</a>
-  &mdash; An example that reads the public samples of weather data from BigQuery,
-  and finds the maximum temperature (<code>mean_temp</code>) for each month.
-  Demonstrates the <code>Max</code> statistical combination transform, and how to
-  find the max-per-key group.
-  </li>
-  </ul>
-
-See the [documentation](https://cloud.google.com/dataflow/getting-started) and the [Examples
-README](../../../../../../../../../README.md) for
-information about how to run these examples.

[27/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
deleted file mode 100644
index b8f9b0b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SourceTestUtils.java
+++ /dev/null
@@ -1,642 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.Source;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.junit.Assert;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Objects;
-import java.util.concurrent.Callable;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
-/**
- * Helper functions and test harnesses for checking correctness of {@link Source}
- * implementations.
- *
- * <p>Contains a few lightweight utilities (e.g. reading items from a source or a reader,
- * such as {@link #readFromSource} and {@link #readFromUnstartedReader}), as well as
- * heavyweight property testing and stress testing harnesses that help getting a large
- * amount of test coverage with few code. Most notable ones are:
- * <ul>
- *   <li>{@link #assertSourcesEqualReferenceSource} helps testing that the data read
- *   by the union of sources produced by {@link BoundedSource#splitIntoBundles}
- *   is the same as data read by the original source.
- *   <li>If your source implements dynamic work rebalancing, use the
- *   {@code assertSplitAtFraction} family of functions - they test behavior of
- *   {@link BoundedSource.BoundedReader#splitAtFraction}, in particular, that
- *   various consistency properties are respected and the total set of data read
- *   by the source is preserved when splits happen.
- *   Use {@link #assertSplitAtFractionBehavior} to test individual cases
- *   of {@code splitAtFraction} and use {@link #assertSplitAtFractionExhaustive}
- *   as a heavy-weight stress test including concurrency. We strongly recommend to
- *   use both.
- * </ul>
- * For example usages, see the unit tests of classes such as
- * {@link com.google.cloud.dataflow.sdk.io.AvroSource} or
- * {@link com.google.cloud.dataflow.sdk.io.XmlSource}.
- *
- * <p>Like {@link DataflowAssert}, requires JUnit and Hamcrest to be present in the classpath.
- */
-public class SourceTestUtils {
-  // A wrapper around a value of type T that compares according to the structural
-  // value provided by a Coder<T>, but prints both the original and structural value,
-  // to help get good error messages from JUnit equality assertion failures and such.
-  private static class ReadableStructuralValue<T> {
-    private T originalValue;
-    private Object structuralValue;
-
-    public ReadableStructuralValue(T originalValue, Object structuralValue) {
-      this.originalValue = originalValue;
-      this.structuralValue = structuralValue;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(structuralValue);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj == null || !(obj instanceof ReadableStructuralValue)) {
-        return false;
-      }
-      return Objects.equals(structuralValue, ((ReadableStructuralValue) obj).structuralValue);
-    }
-
-    @Override
-    public String toString() {
-      return String.format("[%s (structural %s)]", originalValue, structuralValue);
-    }
-  }
-
-  /**
-   * Testing utilities below depend on standard assertions and matchers to compare elements read by
-   * sources. In general the elements may not implement {@code equals}/{@code hashCode} properly,
-   * however every source has a {@link Coder} and every {@code Coder} can
-   * produce a {@link Coder#structuralValue} whose {@code equals}/{@code hashCode} is
-   * consistent with equality of encoded format.
-   * So we use this {@link Coder#structuralValue} to compare elements read by sources.
-   */
-  public static <T> List<ReadableStructuralValue<T>> createStructuralValues(
-      Coder<T> coder, List<T> list)
-      throws Exception {
-    List<ReadableStructuralValue<T>> result = new ArrayList<>();
-    for (T elem : list) {
-      result.add(new ReadableStructuralValue<>(elem, coder.structuralValue(elem)));
-    }
-    return result;
-  }
-
-  /**
-   * Reads all elements from the given {@link BoundedSource}.
-   */
-  public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options)
-      throws IOException {
-    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
-      return readFromUnstartedReader(reader);
-    }
-  }
-
-  /**
-   * Reads all elements from the given unstarted {@link Source.Reader}.
-   */
-  public static <T> List<T> readFromUnstartedReader(Source.Reader<T> reader) throws IOException {
-    return readRemainingFromReader(reader, false);
-  }
-
-  /**
-   * Reads all elements from the given started {@link Source.Reader}.
-   */
-  public static <T> List<T> readFromStartedReader(Source.Reader<T> reader) throws IOException {
-    return readRemainingFromReader(reader, true);
-  }
-
-  /**
-   * Read elements from a {@link Source.Reader} until n elements are read.
-   */
-  public static <T> List<T> readNItemsFromUnstartedReader(Source.Reader<T> reader, int n)
-      throws IOException {
-    return readNItemsFromReader(reader, n, false);
-  }
-
-  /**
-   * Read elements from a {@link Source.Reader} that has already had {@link Source.Reader#start}
-   * called on it, until n elements are read.
-   */
-  public static <T> List<T> readNItemsFromStartedReader(Source.Reader<T> reader, int n)
-      throws IOException {
-    return readNItemsFromReader(reader, n, true);
-  }
-
-  /**
-   * Read elements from a {@link Source.Reader} until n elements are read.
-   *
-   * <p>There must be at least n elements remaining in the reader, except for
-   * the case when n is {@code Integer.MAX_VALUE}, which means "read all
-   * remaining elements".
-   */
-  private static <T> List<T> readNItemsFromReader(Source.Reader<T> reader, int n, boolean started)
-      throws IOException {
-    List<T> res = new ArrayList<>();
-    for (int i = 0; i < n; i++) {
-      boolean shouldStart = (i == 0 && !started);
-      boolean more = shouldStart ? reader.start() : reader.advance();
-      if (n != Integer.MAX_VALUE) {
-        assertTrue(more);
-      }
-      if (!more) {
-        break;
-      }
-      res.add(reader.getCurrent());
-    }
-    return res;
-  }
-
-  /**
-   * Read all remaining elements from a {@link Source.Reader}.
-   */
-  public static <T> List<T> readRemainingFromReader(Source.Reader<T> reader, boolean started)
-      throws IOException {
-    return readNItemsFromReader(reader, Integer.MAX_VALUE, started);
-  }
-
-  /**
-   * Given a reference {@code Source} and a list of {@code Source}s, assert that the union of
-   * the records read from the list of sources is equal to the records read from the reference
-   * source.
-   */
-  public static <T> void assertSourcesEqualReferenceSource(
-      BoundedSource<T> referenceSource,
-      List<? extends BoundedSource<T>> sources,
-      PipelineOptions options)
-      throws Exception {
-    Coder<T> coder = referenceSource.getDefaultOutputCoder();
-    List<T> referenceRecords = readFromSource(referenceSource, options);
-    List<T> bundleRecords = new ArrayList<>();
-    for (BoundedSource<T> source : sources) {
-      assertThat(
-          "Coder type for source "
-              + source
-              + " is not compatible with Coder type for referenceSource "
-              + referenceSource,
-          source.getDefaultOutputCoder(),
-          equalTo(coder));
-      List<T> elems = readFromSource(source, options);
-      bundleRecords.addAll(elems);
-    }
-    List<ReadableStructuralValue<T>> bundleValues =
-        createStructuralValues(coder, bundleRecords);
-    List<ReadableStructuralValue<T>> referenceValues =
-        createStructuralValues(coder, referenceRecords);
-    assertThat(bundleValues, containsInAnyOrder(referenceValues.toArray()));
-  }
-
-  /**
-   * Assert that a {@code Reader} returns a {@code Source} that, when read from, produces the same
-   * records as the reader.
-   */
-  public static <T> void assertUnstartedReaderReadsSameAsItsSource(
-      BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws Exception {
-    Coder<T> coder = reader.getCurrentSource().getDefaultOutputCoder();
-    List<T> expected = readFromUnstartedReader(reader);
-    List<T> actual = readFromSource(reader.getCurrentSource(), options);
-    List<ReadableStructuralValue<T>> expectedStructural = createStructuralValues(coder, expected);
-    List<ReadableStructuralValue<T>> actualStructural = createStructuralValues(coder, actual);
-    assertThat(actualStructural, containsInAnyOrder(expectedStructural.toArray()));
-  }
-
-  /**
-   * Expected outcome of
-   * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader#splitAtFraction}.
-   */
-  public enum ExpectedSplitOutcome {
-    /**
-     * The operation must succeed and the results must be consistent.
-     */
-    MUST_SUCCEED_AND_BE_CONSISTENT,
-    /**
-     * The operation must fail (return {@code null}).
-     */
-    MUST_FAIL,
-    /**
-     * The operation must either fail, or succeed and the results be consistent.
-     */
-    MUST_BE_CONSISTENT_IF_SUCCEEDS
-  }
-
-  /**
-   * Contains two values: the number of items in the primary source, and the number of items in
-   * the residual source, -1 if split failed.
-   */
-  private static class SplitAtFractionResult {
-    public int numPrimaryItems;
-    public int numResidualItems;
-
-    public SplitAtFractionResult(int numPrimaryItems, int numResidualItems) {
-      this.numPrimaryItems = numPrimaryItems;
-      this.numResidualItems = numResidualItems;
-    }
-  }
-
-  /**
-   * Asserts that the {@code source}'s reader either fails to {@code splitAtFraction(fraction)}
-   * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is
-   * consistent according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
-   * <p> Returns SplitAtFractionResult.
-   */
-
-  public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
-      BoundedSource<T> source,
-      int numItemsToReadBeforeSplit,
-      double splitFraction,
-      ExpectedSplitOutcome expectedOutcome,
-      PipelineOptions options)
-      throws Exception {
-    return assertSplitAtFractionBehaviorImpl(
-        source, readFromSource(source, options), numItemsToReadBeforeSplit, splitFraction,
-        expectedOutcome, options);
-  }
-
-  /**
-   * Compares two lists elementwise and throws a detailed assertion failure optimized for
-   * human reading in case they are unequal.
-   */
-  private static <T> void assertListsEqualInOrder(
-      String message, String expectedLabel, List<T> expected, String actualLabel, List<T> actual) {
-    int i = 0;
-    for (; i < expected.size() && i < actual.size(); ++i) {
-      if (!Objects.equals(expected.get(i), actual.get(i))) {
-        Assert.fail(String.format(
-            "%s: %s and %s have %d items in common and then differ. "
-            + "Item in %s (%d more): %s, item in %s (%d more): %s",
-            message, expectedLabel, actualLabel, i,
-            expectedLabel, expected.size() - i - 1, expected.get(i),
-            actualLabel, actual.size() - i - 1, actual.get(i)));
-      }
-    }
-    if (i < expected.size() /* but i == actual.size() */) {
-      Assert.fail(String.format(
-          "%s: %s has %d more items after matching all %d from %s. First 5: %s",
-          message, expectedLabel, expected.size() - actual.size(), actual.size(), actualLabel,
-          expected.subList(actual.size(), Math.min(expected.size(), actual.size() + 5))));
-    } else if (i < actual.size() /* but i == expected.size() */) {
-      Assert.fail(String.format(
-          "%s: %s has %d more items after matching all %d from %s. First 5: %s",
-          message, actualLabel, actual.size() - expected.size(), expected.size(), expectedLabel,
-          actual.subList(expected.size(), Math.min(actual.size(), expected.size() + 5))));
-    } else {
-      // All is well.
-    }
-  }
-
-  private static <T> SourceTestUtils.SplitAtFractionResult assertSplitAtFractionBehaviorImpl(
-      BoundedSource<T> source, List<T> expectedItems, int numItemsToReadBeforeSplit,
-      double splitFraction, ExpectedSplitOutcome expectedOutcome, PipelineOptions options)
-      throws Exception {
-    try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
-      BoundedSource<T> originalSource = reader.getCurrentSource();
-      List<T> currentItems = readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit);
-      BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
-      if (residual != null) {
-        assertFalse(
-            String.format(
-                "Primary source didn't change after a successful split of %s at %f "
-                + "after reading %d items. "
-                + "Was the source object mutated instead of creating a new one? "
-                + "Source objects MUST be immutable.",
-                source, splitFraction, numItemsToReadBeforeSplit),
-            reader.getCurrentSource() == originalSource);
-        assertFalse(
-            String.format(
-                "Residual source equal to original source after a successful split of %s at %f "
-                + "after reading %d items. "
-                + "Was the source object mutated instead of creating a new one? "
-                + "Source objects MUST be immutable.",
-                source, splitFraction, numItemsToReadBeforeSplit),
-            reader.getCurrentSource() == residual);
-      }
-      // Failure cases are: must succeed but fails; must fail but succeeds.
-      switch (expectedOutcome) {
-        case MUST_SUCCEED_AND_BE_CONSISTENT:
-          assertNotNull(
-              "Failed to split reader of source: "
-                  + source
-                  + " at "
-                  + splitFraction
-                  + " after reading "
-                  + numItemsToReadBeforeSplit
-                  + " items",
-              residual);
-          break;
-        case MUST_FAIL:
-          assertEquals(null, residual);
-          break;
-        case MUST_BE_CONSISTENT_IF_SUCCEEDS:
-          // Nothing.
-          break;
-      }
-      currentItems.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplit > 0));
-      BoundedSource<T> primary = reader.getCurrentSource();
-      return verifySingleSplitAtFractionResult(
-          source, expectedItems, currentItems, primary, residual,
-          numItemsToReadBeforeSplit, splitFraction, options);
-    }
-  }
-
-  private static <T> SourceTestUtils.SplitAtFractionResult verifySingleSplitAtFractionResult(
-      BoundedSource<T> source, List<T> expectedItems, List<T> currentItems,
-      BoundedSource<T> primary, BoundedSource<T> residual,
-      int numItemsToReadBeforeSplit, double splitFraction, PipelineOptions options)
-      throws Exception {
-    List<T> primaryItems = readFromSource(primary, options);
-    if (residual != null) {
-      List<T> residualItems = readFromSource(residual, options);
-      List<T> totalItems = new ArrayList<>();
-      totalItems.addAll(primaryItems);
-      totalItems.addAll(residualItems);
-      String errorMsgForPrimarySourceComp =
-          String.format(
-              "Continued reading after split yielded different items than primary source: "
-                  + "split at %s after reading %s items, original source: %s, primary source: %s",
-              splitFraction,
-              numItemsToReadBeforeSplit,
-              source,
-              primary);
-      String errorMsgForTotalSourceComp =
-          String.format(
-              "Items in primary and residual sources after split do not add up to items "
-                  + "in the original source. Split at %s after reading %s items; "
-                  + "original source: %s, primary: %s, residual: %s",
-              splitFraction,
-              numItemsToReadBeforeSplit,
-              source,
-              primary,
-              residual);
-      Coder<T> coder = primary.getDefaultOutputCoder();
-      List<ReadableStructuralValue<T>> primaryValues =
-          createStructuralValues(coder, primaryItems);
-      List<ReadableStructuralValue<T>> currentValues =
-          createStructuralValues(coder, currentItems);
-      List<ReadableStructuralValue<T>> expectedValues =
-          createStructuralValues(coder, expectedItems);
-      List<ReadableStructuralValue<T>> totalValues =
-          createStructuralValues(coder, totalItems);
-      assertListsEqualInOrder(
-          errorMsgForPrimarySourceComp, "current", currentValues, "primary", primaryValues);
-      assertListsEqualInOrder(
-          errorMsgForTotalSourceComp, "total", expectedValues, "primary+residual", totalValues);
-      return new SplitAtFractionResult(primaryItems.size(), residualItems.size());
-    }
-    return new SplitAtFractionResult(primaryItems.size(), -1);
-  }
-
-  /**
-   * Verifies some consistency properties of
-   * {@link BoundedSource.BoundedReader#splitAtFraction} on the given source. Equivalent to
-   * the following pseudocode:
-   * <pre>
-   *   Reader reader = source.createReader();
-   *   read N items from reader;
-   *   Source residual = reader.splitAtFraction(splitFraction);
-   *   Source primary = reader.getCurrentSource();
-   *   assert: items in primary == items we read so far
-   *                               + items we'll get by continuing to read from reader;
-   *   assert: items in original source == items in primary + items in residual
-   * </pre>
-   */
-  public static <T> void assertSplitAtFractionSucceedsAndConsistent(
-      BoundedSource<T> source,
-      int numItemsToReadBeforeSplit,
-      double splitFraction,
-      PipelineOptions options)
-      throws Exception {
-    assertSplitAtFractionBehavior(
-        source,
-        numItemsToReadBeforeSplit,
-        splitFraction,
-        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT,
-        options);
-  }
-
-  /**
-   * Asserts that the {@code source}'s reader fails to {@code splitAtFraction(fraction)}
-   * after reading {@code numItemsToReadBeforeSplit} items.
-   */
-  public static <T> void assertSplitAtFractionFails(
-      BoundedSource<T> source,
-      int numItemsToReadBeforeSplit,
-      double splitFraction,
-      PipelineOptions options)
-      throws Exception {
-    assertSplitAtFractionBehavior(
-        source, numItemsToReadBeforeSplit, splitFraction, ExpectedSplitOutcome.MUST_FAIL, options);
-  }
-
-  private static class SplitFractionStatistics {
-    List<Double> successfulFractions = new ArrayList<>();
-    List<Double> nonTrivialFractions = new ArrayList<>();
-  }
-
-  /**
-   * Asserts that given a start position,
-   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
-   * between two fractions that differ by at least one item) can be called successfully and the
-   * results are consistent if a split succeeds.
-   */
-  private static <T> void assertSplitAtFractionBinary(
-      BoundedSource<T> source,
-      List<T> expectedItems,
-      int numItemsToBeReadBeforeSplit,
-      double leftFraction,
-      SplitAtFractionResult leftResult,
-      double rightFraction,
-      SplitAtFractionResult rightResult,
-      PipelineOptions options,
-      SplitFractionStatistics stats)
-      throws Exception {
-    if (rightFraction - leftFraction < 0.001) {
-      // Do not recurse too deeply. Otherwise we will end up in infinite
-      // recursion, e.g., while trying to find the exact minimal fraction s.t.
-      // split succeeds. A precision of 0.001 when looking for such a fraction
-      // ought to be enough for everybody.
-      return;
-    }
-    double middleFraction = (rightFraction + leftFraction) / 2;
-    if (leftResult == null) {
-      leftResult = assertSplitAtFractionBehaviorImpl(
-          source, expectedItems, numItemsToBeReadBeforeSplit, leftFraction,
-          ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-    }
-    if (rightResult == null) {
-      rightResult = assertSplitAtFractionBehaviorImpl(
-          source, expectedItems, numItemsToBeReadBeforeSplit, rightFraction,
-          ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-    }
-    SplitAtFractionResult middleResult = assertSplitAtFractionBehaviorImpl(
-        source, expectedItems, numItemsToBeReadBeforeSplit, middleFraction,
-        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
-    if (middleResult.numResidualItems != -1) {
-      stats.successfulFractions.add(middleFraction);
-    }
-    if (middleResult.numResidualItems > 0) {
-      stats.nonTrivialFractions.add(middleFraction);
-    }
-    // Two split fractions are equivalent if they yield the same number of
-    // items in primary vs. residual source. Left and right are already not
-    // equivalent. Recurse into [left, middle) and [right, middle) respectively
-    // if middle is not equivalent to left or right.
-    if (leftResult.numPrimaryItems != middleResult.numPrimaryItems) {
-      assertSplitAtFractionBinary(
-          source, expectedItems, numItemsToBeReadBeforeSplit,
-          leftFraction, leftResult, middleFraction, middleResult, options, stats);
-    }
-    if (rightResult.numPrimaryItems != middleResult.numPrimaryItems) {
-      assertSplitAtFractionBinary(
-          source, expectedItems, numItemsToBeReadBeforeSplit,
-          middleFraction, middleResult, rightFraction, rightResult, options, stats);
-    }
-  }
-
-  /**
-   * Asserts that for each possible start position,
-   * {@link BoundedSource.BoundedReader#splitAtFraction} at every interesting fraction (halfway
-   * between two fractions that differ by at least one item) can be called successfully and the
-   * results are consistent if a split succeeds. Verifies multithreaded splitting as well.
-   */
-  public static <T> void assertSplitAtFractionExhaustive(
-      BoundedSource<T> source, PipelineOptions options) throws Exception {
-    List<T> expectedItems = readFromSource(source, options);
-    assertFalse("Empty source", expectedItems.isEmpty());
-    assertFalse("Source reads a single item", expectedItems.size() == 1);
-    List<List<Double>> allNonTrivialFractions = new ArrayList<>();
-    {
-      boolean anySuccessfulFractions = false;
-      boolean anyNonTrivialFractions = false;
-      for (int i = 0; i < expectedItems.size(); i++) {
-        SplitFractionStatistics stats = new SplitFractionStatistics();
-        assertSplitAtFractionBinary(source, expectedItems, i,
-            0.0, null, 1.0, null, options, stats);
-        if (!stats.successfulFractions.isEmpty()) {
-          anySuccessfulFractions = true;
-        }
-        if (!stats.nonTrivialFractions.isEmpty()) {
-          anyNonTrivialFractions = true;
-        }
-        allNonTrivialFractions.add(stats.nonTrivialFractions);
-      }
-      assertTrue(
-          "splitAtFraction test completed vacuously: no successful split fractions found",
-          anySuccessfulFractions);
-      assertTrue(
-          "splitAtFraction test completed vacuously: no non-trivial split fractions found",
-          anyNonTrivialFractions);
-    }
-    {
-      // Perform a stress test of "racy" concurrent splitting:
-      // for every position (number of items read), try to split at the minimum nontrivial
-      // split fraction for that position concurrently with reading the record at that position.
-      // To ensure that the test is non-vacuous, make sure that the splitting succeeds
-      // at least once and fails at least once.
-      ExecutorService executor = Executors.newFixedThreadPool(2);
-      for (int i = 0; i < expectedItems.size(); i++) {
-        double minNonTrivialFraction = 2.0;  // Greater than any possible fraction.
-        for (double fraction : allNonTrivialFractions.get(i)) {
-          minNonTrivialFraction = Math.min(minNonTrivialFraction, fraction);
-        }
-        if (minNonTrivialFraction == 2.0) {
-          // This will not happen all the time because otherwise the test above would
-          // detect vacuousness.
-          continue;
-        }
-        boolean haveSuccess = false, haveFailure = false;
-        while (!haveSuccess || !haveFailure) {
-          if (assertSplitAtFractionConcurrent(
-              executor, source, expectedItems, i, minNonTrivialFraction, options)) {
-            haveSuccess = true;
-          } else {
-            haveFailure = true;
-          }
-        }
-      }
-    }
-  }
-
-  private static <T> boolean assertSplitAtFractionConcurrent(
-      ExecutorService executor, BoundedSource<T> source, List<T> expectedItems,
-      final int numItemsToReadBeforeSplitting, final double fraction, PipelineOptions options)
-      throws Exception {
-    @SuppressWarnings("resource")  // Closed in readerThread
-    final BoundedSource.BoundedReader<T> reader = source.createReader(options);
-    final CountDownLatch unblockSplitter = new CountDownLatch(1);
-    Future<List<T>> readerThread =
-        executor.submit(
-            new Callable<List<T>>() {
-              @Override
-              public List<T> call() throws Exception {
-                try {
-                  List<T> items =
-                      readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplitting);
-                  unblockSplitter.countDown();
-                  items.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplitting > 0));
-                  return items;
-                } finally {
-                  reader.close();
-                }
-              }
-            });
-    Future<KV<BoundedSource<T>, BoundedSource<T>>> splitterThread = executor.submit(
-        new Callable<KV<BoundedSource<T>, BoundedSource<T>>>() {
-          @Override
-          public KV<BoundedSource<T>, BoundedSource<T>> call() throws Exception {
-            unblockSplitter.await();
-            BoundedSource<T> residual = reader.splitAtFraction(fraction);
-            if (residual == null) {
-              return null;
-            }
-            return KV.of(reader.getCurrentSource(), residual);
-          }
-        });
-    List<T> currentItems = readerThread.get();
-    KV<BoundedSource<T>, BoundedSource<T>> splitSources = splitterThread.get();
-    if (splitSources == null) {
-      return false;
-    }
-    SplitAtFractionResult res = verifySingleSplitAtFractionResult(
-        source, expectedItems, currentItems, splitSources.getKey(), splitSources.getValue(),
-        numItemsToReadBeforeSplitting, fraction, options);
-    return (res.numResidualItems > 0);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
deleted file mode 100644
index 1afb691..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineOptions.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
-
-/**
- * A set of options used to configure the {@link TestPipeline}.
- */
-public interface TestDataflowPipelineOptions extends BlockingDataflowPipelineOptions {
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
deleted file mode 100644
index 9fff070..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestDataflowPipelineRunner.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.api.services.dataflow.model.JobMessage;
-import com.google.api.services.dataflow.model.JobMetrics;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult.State;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowJobExecutionException;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil.JobMessagesHandler;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.common.base.Optional;
-import com.google.common.base.Throwables;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-
-/**
- * {@link TestDataflowPipelineRunner} is a pipeline runner that wraps a
- * {@link DataflowPipelineRunner} when running tests against the {@link TestPipeline}.
- *
- * @see TestPipeline
- */
-public class TestDataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob> {
-  private static final String TENTATIVE_COUNTER = "tentative";
-  private static final Logger LOG = LoggerFactory.getLogger(TestDataflowPipelineRunner.class);
-
-  private final TestDataflowPipelineOptions options;
-  private final DataflowPipelineRunner runner;
-  private int expectedNumberOfAssertions = 0;
-
-  TestDataflowPipelineRunner(TestDataflowPipelineOptions options) {
-    this.options = options;
-    this.runner = DataflowPipelineRunner.fromOptions(options);
-  }
-
-  /**
-   * Constructs a runner from the provided options.
-   */
-  public static TestDataflowPipelineRunner fromOptions(
-      PipelineOptions options) {
-    TestDataflowPipelineOptions dataflowOptions = options.as(TestDataflowPipelineOptions.class);
-
-    return new TestDataflowPipelineRunner(dataflowOptions);
-  }
-
-  @Override
-  public DataflowPipelineJob run(Pipeline pipeline) {
-    return run(pipeline, runner);
-  }
-
-  DataflowPipelineJob run(Pipeline pipeline, DataflowPipelineRunner runner) {
-
-    final JobMessagesHandler messageHandler =
-        new MonitoringUtil.PrintHandler(options.getJobMessageOutput());
-    final DataflowPipelineJob job;
-    try {
-      job = runner.run(pipeline);
-    } catch (DataflowJobExecutionException ex) {
-      throw new IllegalStateException("The dataflow failed.");
-    }
-
-    LOG.info("Running Dataflow job {} with {} expected assertions.",
-        job.getJobId(), expectedNumberOfAssertions);
-
-    try {
-      final Optional<Boolean> result;
-      if (options.isStreaming()) {
-        Future<Optional<Boolean>> resultFuture = options.getExecutorService().submit(
-            new Callable<Optional<Boolean>>() {
-          @Override
-          public Optional<Boolean> call() throws Exception {
-            try {
-              for (;;) {
-                Optional<Boolean> result = checkForSuccess(job);
-                if (result.isPresent()) {
-                  return result;
-                }
-                Thread.sleep(10000L);
-              }
-            } finally {
-              LOG.info("Cancelling Dataflow job {}", job.getJobId());
-              job.cancel();
-            }
-          }
-        });
-        State finalState = job.waitToFinish(10L, TimeUnit.MINUTES, new JobMessagesHandler() {
-            @Override
-            public void process(List<JobMessage> messages) {
-              messageHandler.process(messages);
-              for (JobMessage message : messages) {
-                if (message.getMessageImportance() != null
-                    && message.getMessageImportance().equals("JOB_MESSAGE_ERROR")) {
-                  LOG.info("Dataflow job {} threw exception, cancelling. Exception was: {}",
-                      job.getJobId(), message.getMessageText());
-                  try {
-                    job.cancel();
-                  } catch (Exception e) {
-                    throw Throwables.propagate(e);
-                  }
-                }
-              }
-            }
-          });
-        if (finalState == null || finalState == State.RUNNING) {
-          LOG.info("Dataflow job {} took longer than 10 minutes to complete, cancelling.",
-              job.getJobId());
-          job.cancel();
-        }
-        result = resultFuture.get();
-      } else {
-        job.waitToFinish(-1, TimeUnit.SECONDS, messageHandler);
-        result = checkForSuccess(job);
-      }
-      if (!result.isPresent()) {
-        throw new IllegalStateException(
-            "The dataflow did not output a success or failure metric.");
-      } else if (!result.get()) {
-        throw new IllegalStateException("The dataflow failed.");
-      }
-    } catch (Exception e) {
-      Throwables.propagateIfPossible(e);
-      throw Throwables.propagate(e);
-    }
-    return job;
-  }
-
-  @Override
-  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
-      PTransform<InputT, OutputT> transform, InputT input) {
-    if (transform instanceof DataflowAssert.OneSideInputAssert
-        || transform instanceof DataflowAssert.TwoSideInputAssert) {
-      expectedNumberOfAssertions += 1;
-    }
-
-    return runner.apply(transform, input);
-  }
-
-  Optional<Boolean> checkForSuccess(DataflowPipelineJob job)
-      throws IOException {
-    State state = job.getState();
-    if (state == State.FAILED || state == State.CANCELLED) {
-      LOG.info("The pipeline failed");
-      return Optional.of(false);
-    }
-
-    JobMetrics metrics = job.getDataflowClient().projects().jobs()
-        .getMetrics(job.getProjectId(), job.getJobId()).execute();
-
-    if (metrics == null || metrics.getMetrics() == null) {
-      LOG.warn("Metrics not present for Dataflow job {}.", job.getJobId());
-    } else {
-      int successes = 0;
-      int failures = 0;
-      for (MetricUpdate metric : metrics.getMetrics()) {
-        if (metric.getName() == null || metric.getName().getContext() == null
-            || !metric.getName().getContext().containsKey(TENTATIVE_COUNTER)) {
-          // Don't double count using the non-tentative version of the metric.
-          continue;
-        }
-        if (DataflowAssert.SUCCESS_COUNTER.equals(metric.getName().getName())) {
-          successes += ((BigDecimal) metric.getScalar()).intValue();
-        } else if (DataflowAssert.FAILURE_COUNTER.equals(metric.getName().getName())) {
-          failures += ((BigDecimal) metric.getScalar()).intValue();
-        }
-      }
-
-      if (failures > 0) {
-        LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
-            + "{} expected assertions.", job.getJobId(), successes, failures,
-            expectedNumberOfAssertions);
-        return Optional.of(false);
-      } else if (successes >= expectedNumberOfAssertions) {
-        LOG.info("Found result while running Dataflow job {}. Found {} success, {} failures out of "
-            + "{} expected assertions.", job.getJobId(), successes, failures,
-            expectedNumberOfAssertions);
-        return Optional.of(true);
-      }
-
-      LOG.info("Running Dataflow job {}. Found {} success, {} failures out of {} expected "
-          + "assertions.", job.getJobId(), successes, failures, expectedNumberOfAssertions);
-    }
-
-    return Optional.<Boolean>absent();
-  }
-
-  @Override
-  public String toString() {
-    return "TestDataflowPipelineRunner#" + options.getAppName();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
deleted file mode 100644
index a05a778..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/TestPipeline.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.util.TestCredential;
-import com.google.common.base.Optional;
-import com.google.common.collect.Iterators;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import javax.annotation.Nullable;
-
-/**
- * A creator of test pipelines that can be used inside of tests that can be
- * configured to run locally or against the live service.
- *
- * <p>It is recommended to tag hand-selected tests for this purpose using the
- * RunnableOnService Category annotation, as each test run against the service
- * will spin up and tear down a single VM.
- *
- * <p>In order to run tests on the dataflow pipeline service, the following
- * conditions must be met:
- * <ul>
- * <li> runIntegrationTestOnService System property must be set to true.
- * <li> System property "projectName" must be set to your Cloud project.
- * <li> System property "temp_gcs_directory" must be set to a valid GCS bucket.
- * <li> Jars containing the SDK and test classes must be added to the test classpath.
- * </ul>
- *
- * <p>Use {@link DataflowAssert} for tests, as it integrates with this test
- * harness in both direct and remote execution modes.  For example:
- *
- * <pre>{@code
- * Pipeline p = TestPipeline.create();
- * PCollection<Integer> output = ...
- *
- * DataflowAssert.that(output)
- *     .containsInAnyOrder(1, 2, 3, 4);
- * p.run();
- * }</pre>
- *
- */
-public class TestPipeline extends Pipeline {
-  private static final String PROPERTY_DATAFLOW_OPTIONS = "dataflowOptions";
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-
-  /**
-   * Creates and returns a new test pipeline.
-   *
-   * <p>Use {@link DataflowAssert} to add tests, then call
-   * {@link Pipeline#run} to execute the pipeline and check the tests.
-   */
-  public static TestPipeline create() {
-    return fromOptions(testingPipelineOptions());
-  }
-
-  public static TestPipeline fromOptions(PipelineOptions options) {
-    return new TestPipeline(PipelineRunner.fromOptions(options), options);
-  }
-
-  /**
-   * Returns whether a {@link TestPipeline} supports dynamic work rebalancing, and thus tests
-   * of dynamic work rebalancing are expected to pass.
-   */
-  public boolean supportsDynamicWorkRebalancing() {
-    return getRunner() instanceof DataflowPipelineRunner;
-  }
-
-  private TestPipeline(PipelineRunner<? extends PipelineResult> runner, PipelineOptions options) {
-    super(runner, options);
-  }
-
-  /**
-   * Runs this {@link TestPipeline}, unwrapping any {@code AssertionError}
-   * that is raised during testing.
-   */
-  @Override
-  public PipelineResult run() {
-    try {
-      return super.run();
-    } catch (RuntimeException exc) {
-      Throwable cause = exc.getCause();
-      if (cause instanceof AssertionError) {
-        throw (AssertionError) cause;
-      } else {
-        throw exc;
-      }
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "TestPipeline#" + getOptions().as(ApplicationNameOptions.class).getAppName();
-  }
-
-  /**
-   * Creates {@link PipelineOptions} for testing.
-   */
-  public static PipelineOptions testingPipelineOptions() {
-    try {
-      @Nullable String systemDataflowOptions = System.getProperty(PROPERTY_DATAFLOW_OPTIONS);
-      PipelineOptions options =
-          systemDataflowOptions == null
-              ? PipelineOptionsFactory.create()
-              : PipelineOptionsFactory.fromArgs(
-                      MAPPER.readValue(
-                          System.getProperty(PROPERTY_DATAFLOW_OPTIONS), String[].class))
-                  .as(PipelineOptions.class);
-
-      options.as(ApplicationNameOptions.class).setAppName(getAppName());
-      if (isIntegrationTest()) {
-        // TODO: adjust everyone's integration test frameworks to set the runner class via the
-        // pipeline options via PROPERTY_DATAFLOW_OPTIONS
-        options.setRunner(TestDataflowPipelineRunner.class);
-      } else {
-        options.as(GcpOptions.class).setGcpCredential(new TestCredential());
-      }
-      options.setStableUniqueNames(CheckEnabled.ERROR);
-      return options;
-    } catch (IOException e) {
-      throw new RuntimeException("Unable to instantiate test options from system property "
-          + PROPERTY_DATAFLOW_OPTIONS + ":" + System.getProperty(PROPERTY_DATAFLOW_OPTIONS), e);
-    }
-  }
-
-  /**
-   * Returns whether a {@link TestPipeline} should be treated as an integration test.
-   */
-  private static boolean isIntegrationTest() {
-    return Boolean.parseBoolean(System.getProperty("runIntegrationTestOnService"));
-  }
-
-  /** Returns the class + method name of the test, or a default name. */
-  private static String getAppName() {
-    Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
-    if (stackTraceElement.isPresent()) {
-      String methodName = stackTraceElement.get().getMethodName();
-      String className = stackTraceElement.get().getClassName();
-      if (className.contains(".")) {
-        className = className.substring(className.lastIndexOf(".") + 1);
-      }
-      return className + "-" + methodName;
-    }
-    return "UnitTest";
-  }
-
-  /** Returns the {@link StackTraceElement} of the calling class. */
-  private static Optional<StackTraceElement> findCallersStackTrace() {
-    Iterator<StackTraceElement> elements =
-        Iterators.forArray(Thread.currentThread().getStackTrace());
-    // First find the TestPipeline class in the stack trace.
-    while (elements.hasNext()) {
-      StackTraceElement next = elements.next();
-      if (TestPipeline.class.getName().equals(next.getClassName())) {
-        break;
-      }
-    }
-    // Then find the first instance after that is not the TestPipeline
-    while (elements.hasNext()) {
-      StackTraceElement next = elements.next();
-      if (!TestPipeline.class.getName().equals(next.getClassName())) {
-        return Optional.of(next);
-      }
-    }
-    return Optional.absent();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
deleted file mode 100644
index dc0baf5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/WindowFnTestUtils.java
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import static org.hamcrest.Matchers.greaterThan;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-
-import org.joda.time.Instant;
-import org.joda.time.ReadableInstant;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * A utility class for testing {@link WindowFn}s.
- */
-public class WindowFnTestUtils {
-
-  /**
-   * Creates a Set of elements to be used as expected output in
-   * {@link #runWindowFn}.
-   */
-  public static Set<String> set(long... timestamps) {
-    Set<String> result = new HashSet<>();
-    for (long timestamp : timestamps) {
-      result.add(timestampValue(timestamp));
-    }
-    return result;
-  }
-
-  /**
-   * Runs the {@link WindowFn} over the provided input, returning a map
-   * of windows to the timestamps in those windows.
-   */
-  public static <T, W extends BoundedWindow> Map<W, Set<String>> runWindowFn(
-      WindowFn<T, W> windowFn,
-      List<Long> timestamps) throws Exception {
-
-    final TestWindowSet<W, String> windowSet = new TestWindowSet<W, String>();
-    for (final Long timestamp : timestamps) {
-      for (W window : windowFn.assignWindows(
-          new TestAssignContext<T, W>(new Instant(timestamp), windowFn))) {
-        windowSet.put(window, timestampValue(timestamp));
-      }
-      windowFn.mergeWindows(new TestMergeContext<T, W>(windowSet, windowFn));
-    }
-    Map<W, Set<String>> actual = new HashMap<>();
-    for (W window : windowSet.windows()) {
-      actual.put(window, windowSet.get(window));
-    }
-    return actual;
-  }
-
-  public static <T, W extends BoundedWindow> Collection<W> assignedWindows(
-      WindowFn<T, W> windowFn, long timestamp) throws Exception {
-    return windowFn.assignWindows(new TestAssignContext<T, W>(new Instant(timestamp), windowFn));
-  }
-
-  private static String timestampValue(long timestamp) {
-    return "T" + new Instant(timestamp);
-  }
-
-  /**
-   * Test implementation of AssignContext.
-   */
-  private static class TestAssignContext<T, W extends BoundedWindow>
-      extends WindowFn<T, W>.AssignContext {
-    private Instant timestamp;
-
-    public TestAssignContext(Instant timestamp, WindowFn<T, W> windowFn) {
-      windowFn.super();
-      this.timestamp = timestamp;
-    }
-
-    @Override
-    public T element() {
-      return null;
-    }
-
-    @Override
-    public Instant timestamp() {
-      return timestamp;
-    }
-
-    @Override
-    public Collection<? extends BoundedWindow> windows() {
-      return null;
-    }
-  }
-
-  /**
-   * Test implementation of MergeContext.
-   */
-  private static class TestMergeContext<T, W extends BoundedWindow>
-    extends WindowFn<T, W>.MergeContext {
-    private TestWindowSet<W, ?> windowSet;
-
-    public TestMergeContext(
-        TestWindowSet<W, ?> windowSet, WindowFn<T, W> windowFn) {
-      windowFn.super();
-      this.windowSet = windowSet;
-    }
-
-    @Override
-    public Collection<W> windows() {
-      return windowSet.windows();
-    }
-
-    @Override
-    public void merge(Collection<W> toBeMerged, W mergeResult) {
-      windowSet.merge(toBeMerged, mergeResult);
-    }
-  }
-
-  /**
-   * A WindowSet useful for testing WindowFns that simply
-   * collects the placed elements into multisets.
-   */
-  private static class TestWindowSet<W extends BoundedWindow, V> {
-
-    private Map<W, Set<V>> elements = new HashMap<>();
-
-    public void put(W window, V value) {
-      Set<V> all = elements.get(window);
-      if (all == null) {
-        all = new HashSet<>();
-        elements.put(window, all);
-      }
-      all.add(value);
-    }
-
-    public void merge(Collection<W> otherWindows, W window) {
-      if (otherWindows.isEmpty()) {
-        return;
-      }
-      Set<V> merged = new HashSet<>();
-      if (elements.containsKey(window) && !otherWindows.contains(window)) {
-        merged.addAll(elements.get(window));
-      }
-      for (W w : otherWindows) {
-        if (!elements.containsKey(w)) {
-          throw new IllegalArgumentException("Tried to merge a non-existent window:" + w);
-        }
-        merged.addAll(elements.get(w));
-        elements.remove(w);
-      }
-      elements.put(window, merged);
-    }
-
-    public Collection<W> windows() {
-      return elements.keySet();
-    }
-
-    // For testing.
-
-    public Set<V> get(W window) {
-      return elements.get(window);
-    }
-  }
-
-  /**
-   * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and
-   * verifies that result of {@code windowFn.getOutputTimestamp} for each window is within the
-   * proper bound.
-   */
-  public static <T, W extends BoundedWindow> void validateNonInterferingOutputTimes(
-      WindowFn<T, W> windowFn, long timestamp) throws Exception {
-    Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp);
-
-    Instant instant = new Instant(timestamp);
-    for (W window : windows) {
-      Instant outputTimestamp = windowFn.getOutputTimeFn().assignOutputTime(instant, window);
-      assertFalse("getOutputTime must be greater than or equal to input timestamp",
-          outputTimestamp.isBefore(instant));
-      assertFalse("getOutputTime must be less than or equal to the max timestamp",
-          outputTimestamp.isAfter(window.maxTimestamp()));
-    }
-  }
-
-  /**
-   * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and
-   * verifies that result of {@link WindowFn#getOutputTime windowFn.getOutputTime} for later windows
-   * (as defined by {@code maxTimestamp} won't prevent the watermark from passing the end of earlier
-   * windows.
-   *
-   * <p>This verifies that overlapping windows don't interfere at all. Depending on the
-   * {@code windowFn} this may be stricter than desired.
-   */
-  public static <T, W extends BoundedWindow> void validateGetOutputTimestamp(
-      WindowFn<T, W> windowFn, long timestamp) throws Exception {
-    Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp);
-    List<W> sortedWindows = new ArrayList<>(windows);
-    Collections.sort(sortedWindows, new Comparator<BoundedWindow>() {
-      @Override
-      public int compare(BoundedWindow o1, BoundedWindow o2) {
-        return o1.maxTimestamp().compareTo(o2.maxTimestamp());
-      }
-    });
-
-    Instant instant = new Instant(timestamp);
-    Instant endOfPrevious = null;
-    for (W window : sortedWindows) {
-      Instant outputTimestamp = windowFn.getOutputTimeFn().assignOutputTime(instant, window);
-      if (endOfPrevious == null) {
-        // If this is the first window, the output timestamp can be anything, as long as it is in
-        // the valid range.
-        assertFalse("getOutputTime must be greater than or equal to input timestamp",
-            outputTimestamp.isBefore(instant));
-        assertFalse("getOutputTime must be less than or equal to the max timestamp",
-            outputTimestamp.isAfter(window.maxTimestamp()));
-      } else {
-        // If this is a later window, the output timestamp must be after the end of the previous
-        // window
-        assertTrue("getOutputTime must be greater than the end of the previous window",
-            outputTimestamp.isAfter(endOfPrevious));
-        assertFalse("getOutputTime must be less than or equal to the max timestamp",
-            outputTimestamp.isAfter(window.maxTimestamp()));
-      }
-      endOfPrevious = window.maxTimestamp();
-    }
-  }
-
-  /**
-   * Verifies that later-ending merged windows from any of the timestamps hold up output of
-   * earlier-ending windows, using the provided {@link WindowFn} and {@link OutputTimeFn}.
-   *
-   * <p>Given a list of lists of timestamps, where each list is expected to merge into a single
-   * window with end times in ascending order, assigns and merges windows for each list (as though
-   * each were a separate key/user session). Then maps each timestamp in the list according to
-   * {@link OutputTimeFn#assignOutputTime outputTimeFn.assignOutputTime()} and
-   * {@link OutputTimeFn#combine outputTimeFn.combine()}.
-   *
-   * <p>Verifies that a overlapping windows do not hold each other up via the watermark.
-   */
-  public static <T, W extends IntervalWindow>
-  void validateGetOutputTimestamps(
-      WindowFn<T, W> windowFn,
-      OutputTimeFn<? super W> outputTimeFn,
-      List<List<Long>> timestampsPerWindow) throws Exception {
-
-    // Assign windows to each timestamp, then merge them, storing the merged windows in
-    // a list in corresponding order to timestampsPerWindow
-    final List<W> windows = new ArrayList<>();
-    for (List<Long> timestampsForWindow : timestampsPerWindow) {
-      final Set<W> windowsToMerge = new HashSet<>();
-
-      for (long timestamp : timestampsForWindow) {
-        windowsToMerge.addAll(
-            WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp));
-      }
-
-      windowFn.mergeWindows(windowFn.new MergeContext() {
-        @Override
-        public Collection<W> windows() {
-          return windowsToMerge;
-        }
-
-        @Override
-        public void merge(Collection<W> toBeMerged, W mergeResult) throws Exception {
-          windows.add(mergeResult);
-        }
-      });
-    }
-
-    // Map every list of input timestamps to an output timestamp
-    final List<Instant> combinedOutputTimestamps = new ArrayList<>();
-    for (int i = 0; i < timestampsPerWindow.size(); ++i) {
-      List<Long> timestampsForWindow = timestampsPerWindow.get(i);
-      W window = windows.get(i);
-
-      List<Instant> outputInstants = new ArrayList<>();
-      for (long inputTimestamp : timestampsForWindow) {
-        outputInstants.add(outputTimeFn.assignOutputTime(new Instant(inputTimestamp), window));
-      }
-
-      combinedOutputTimestamps.add(OutputTimeFns.combineOutputTimes(outputTimeFn, outputInstants));
-    }
-
-    // Consider windows in increasing order of max timestamp; ensure the output timestamp is after
-    // the max timestamp of the previous
-    @Nullable W earlierEndingWindow = null;
-    for (int i = 0; i < windows.size(); ++i) {
-      W window = windows.get(i);
-      ReadableInstant outputTimestamp = combinedOutputTimestamps.get(i);
-
-      if (earlierEndingWindow != null) {
-        assertThat(outputTimestamp,
-            greaterThan((ReadableInstant) earlierEndingWindow.maxTimestamp()));
-      }
-
-      earlierEndingWindow = window;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
deleted file mode 100644
index d6f075d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines utilities for unit testing Dataflow pipelines. The tests for the {@code PTransform}s and
- * examples included the Dataflow SDK provide examples of using these utilities.
- */
-package com.google.cloud.dataflow.sdk.testing;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
deleted file mode 100644
index 7e56dda..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Aggregator.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-
-/**
- * An {@code Aggregator<InputT>} enables monitoring of values of type {@code InputT},
- * to be combined across all bundles.
- *
- * <p>Aggregators are created by calling {@link DoFn#createAggregator},
- * typically from the {@link DoFn} constructor. Elements can be added to the
- * {@code Aggregator} by calling {@link Aggregator#addValue}.
- *
- * <p>Aggregators are visible in the monitoring UI, when the pipeline is run
- * using DataflowPipelineRunner or BlockingDataflowPipelineRunner, along with
- * their current value. Aggregators may not become visible until the system
- * begins executing the ParDo transform that created them and/or their initial
- * value is changed.
- *
- * <p>Example:
- * <pre> {@code
- * class MyDoFn extends DoFn<String, String> {
- *   private Aggregator<Integer, Integer> myAggregator;
- *
- *   public MyDoFn() {
- *     myAggregator = createAggregator("myAggregator", new Sum.SumIntegerFn());
- *   }
- *
- *   @Override
- *   public void processElement(ProcessContext c) {
- *     myAggregator.addValue(1);
- *   }
- * }
- * } </pre>
- *
- * @param <InputT> the type of input values
- * @param <OutputT> the type of output values
- */
-public interface Aggregator<InputT, OutputT> {
-
-  /**
-   * Adds a new value into the Aggregator.
-   */
-  void addValue(InputT value);
-
-  /**
-   * Returns the name of the Aggregator.
-   */
-  String getName();
-
-  /**
-   * Returns the {@link CombineFn}, which combines input elements in the
-   * aggregator.
-   */
-  CombineFn<InputT, ?, OutputT> getCombineFn();
-
-  // TODO: Consider the following additional API conveniences:
-  // - In addition to createAggregator(), consider adding getAggregator() to
-  //   avoid the need to store the aggregator locally in a DoFn, i.e., create
-  //   if not already present.
-  // - Add a shortcut for the most common aggregator:
-  //   c.createAggregator("name", new Sum.SumIntegerFn()).
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
deleted file mode 100644
index 4bbea85..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AggregatorRetriever.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import java.util.Collection;
-
-/**
- * An internal class for extracting {@link Aggregator Aggregators} from {@link DoFn DoFns}.
- */
-public final class AggregatorRetriever {
-  private AggregatorRetriever() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns the {@link Aggregator Aggregators} created by the provided {@link DoFn}.
-   */
-  public static Collection<Aggregator<?, ?>> getAggregators(DoFn<?, ?> fn) {
-    return fn.getAggregators();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
deleted file mode 100644
index 7b3d87d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/AppliedPTransform.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Objects;
-
-/**
- * Represents the application of a {@link PTransform} to a specific input to produce
- * a specific output.
- *
- * <p>For internal use.
- *
- * @param <InputT> transform input type
- * @param <OutputT> transform output type
- * @param <TransformT> transform type
- */
-public class AppliedPTransform
-    <InputT extends PInput, OutputT extends POutput,
-     TransformT extends PTransform<? super InputT, OutputT>> {
-
-  private final String fullName;
-  private final InputT input;
-  private final OutputT output;
-  private final TransformT transform;
-
-  private AppliedPTransform(String fullName, InputT input, OutputT output, TransformT transform) {
-    this.input = input;
-    this.output = output;
-    this.transform = transform;
-    this.fullName = fullName;
-  }
-
-  public static <InputT extends PInput, OutputT extends POutput,
-                 TransformT extends PTransform<? super InputT, OutputT>>
-  AppliedPTransform<InputT, OutputT, TransformT> of(
-      String fullName, InputT input, OutputT output, TransformT transform) {
-    return new AppliedPTransform<InputT, OutputT, TransformT>(fullName, input, output, transform);
-  }
-
-  public String getFullName() {
-    return fullName;
-  }
-
-  public InputT getInput() {
-    return input;
-  }
-
-  public OutputT getOutput() {
-    return output;
-  }
-
-  public TransformT getTransform() {
-    return transform;
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hashCode(getFullName(), getInput(), getOutput(), getTransform());
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other instanceof AppliedPTransform) {
-      AppliedPTransform<?, ?, ?> that = (AppliedPTransform<?, ?, ?>) other;
-      return Objects.equal(this.getFullName(), that.getFullName())
-          && Objects.equal(this.getInput(), that.getInput())
-          && Objects.equal(this.getOutput(), that.getOutput())
-          && Objects.equal(this.getTransform(), that.getTransform());
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(getClass())
-        .add("fullName", getFullName())
-        .add("input", getInput())
-        .add("output", getOutput())
-        .add("transform", getTransform())
-        .toString();
-  }
-}

[34/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
deleted file mode 100644
index b9a0293..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerHooks.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.api.services.dataflow.model.Environment;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-
-/**
- * An instance of this class can be passed to the
- * {@link DataflowPipelineRunner} to add user defined hooks to be
- * invoked at various times during pipeline execution.
- */
-@Experimental
-public class DataflowPipelineRunnerHooks {
-  /**
-   * Allows the user to modify the environment of their job before their job is submitted
-   * to the service for execution.
-   *
-   * @param environment The environment of the job. Users can make change to this instance in order
-   *     to change the environment with which their job executes on the service.
-   */
-  public void modifyEnvironmentBeforeSubmission(Environment environment) {}
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
deleted file mode 100644
index 155c454..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineTranslator.java
+++ /dev/null
@@ -1,1104 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.SerializableUtils.serializeToByteArray;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.byteArrayToJsonString;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.jsonStringToByteArray;
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.addDictionary;
-import static com.google.cloud.dataflow.sdk.util.Structs.addList;
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.Structs.getString;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.services.dataflow.model.AutoscalingSettings;
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.dataflow.model.Disk;
-import com.google.api.services.dataflow.model.Environment;
-import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.Step;
-import com.google.api.services.dataflow.model.WorkerPool;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.GroupByKeyAndSortValuesOnly;
-import com.google.cloud.dataflow.sdk.runners.dataflow.BigQueryIOTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.DoFnInfo;
-import com.google.cloud.dataflow.sdk.util.OutputReference;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TypedPValue;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Strings;
-import com.google.common.collect.Lists;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link DataflowPipelineTranslator} knows how to translate {@link Pipeline} objects
- * into Cloud Dataflow Service API {@link Job}s.
- */
-@SuppressWarnings({"rawtypes", "unchecked"})
-public class DataflowPipelineTranslator {
-  // Must be kept in sync with their internal counterparts.
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineTranslator.class);
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-
-  /**
-   * A map from {@link PTransform} subclass to the corresponding
-   * {@link TransformTranslator} to use to translate that transform.
-   *
-   * <p>A static map that contains system-wide defaults.
-   */
-  private static Map<Class, TransformTranslator> transformTranslators =
-      new HashMap<>();
-
-  /** Provided configuration options. */
-  private final DataflowPipelineOptions options;
-
-  /**
-   * Constructs a translator from the provided options.
-   *
-   * @param options Properties that configure the translator.
-   *
-   * @return The newly created translator.
-   */
-  public static DataflowPipelineTranslator fromOptions(
-      DataflowPipelineOptions options) {
-    return new DataflowPipelineTranslator(options);
-  }
-
-  private DataflowPipelineTranslator(DataflowPipelineOptions options) {
-    this.options = options;
-  }
-
-  /**
-   * Translates a {@link Pipeline} into a {@code JobSpecification}.
-   */
-  public JobSpecification translate(
-      Pipeline pipeline,
-      DataflowPipelineRunner runner,
-      List<DataflowPackage> packages) {
-
-    Translator translator = new Translator(pipeline, runner);
-    Job result = translator.translate(packages);
-    return new JobSpecification(result, Collections.unmodifiableMap(translator.stepNames));
-  }
-
-  /**
-   * The result of a job translation.
-   *
-   * <p>Used to pass the result {@link Job} and any state that was used to construct the job that
-   * may be of use to other classes (eg the {@link PTransform} to StepName mapping).
-   */
-  public static class JobSpecification {
-    private final Job job;
-    private final Map<AppliedPTransform<?, ?, ?>, String> stepNames;
-
-    public JobSpecification(Job job, Map<AppliedPTransform<?, ?, ?>, String> stepNames) {
-      this.job = job;
-      this.stepNames = stepNames;
-    }
-
-    public Job getJob() {
-      return job;
-    }
-
-    /**
-     * Returns the mapping of {@link AppliedPTransform AppliedPTransforms} to the internal step
-     * name for that {@code AppliedPTransform}.
-     */
-    public Map<AppliedPTransform<?, ?, ?>, String> getStepNames() {
-      return stepNames;
-    }
-  }
-
-  /**
-   * Renders a {@link Job} as a string.
-   */
-  public static String jobToString(Job job) {
-    try {
-      return MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(job);
-    } catch (JsonProcessingException exc) {
-      throw new IllegalStateException("Failed to render Job as String.", exc);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Records that instances of the specified PTransform class
-   * should be translated by default by the corresponding
-   * {@link TransformTranslator}.
-   */
-  public static <TransformT extends PTransform> void registerTransformTranslator(
-      Class<TransformT> transformClass,
-      TransformTranslator<? extends TransformT> transformTranslator) {
-    if (transformTranslators.put(transformClass, transformTranslator) != null) {
-      throw new IllegalArgumentException(
-          "defining multiple translators for " + transformClass);
-    }
-  }
-
-  /**
-   * Returns the {@link TransformTranslator} to use for instances of the
-   * specified PTransform class, or null if none registered.
-   */
-  public <TransformT extends PTransform>
-      TransformTranslator<TransformT> getTransformTranslator(Class<TransformT> transformClass) {
-    return transformTranslators.get(transformClass);
-  }
-
-  /**
-   * A {@link TransformTranslator} knows how to translate
-   * a particular subclass of {@link PTransform} for the
-   * Cloud Dataflow service. It does so by
-   * mutating the {@link TranslationContext}.
-   */
-  public interface TransformTranslator<TransformT extends PTransform> {
-    public void translate(TransformT transform,
-                          TranslationContext context);
-  }
-
-  /**
-   * The interface provided to registered callbacks for interacting
-   * with the {@link DataflowPipelineRunner}, including reading and writing the
-   * values of {@link PCollection}s and side inputs ({@link PCollectionView}s).
-   */
-  public interface TranslationContext {
-    /**
-     * Returns the configured pipeline options.
-     */
-    DataflowPipelineOptions getPipelineOptions();
-
-    /**
-     * Returns the input of the currently being translated transform.
-     */
-    <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform);
-
-    /**
-     * Returns the output of the currently being translated transform.
-     */
-    <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform);
-
-    /**
-     * Returns the full name of the currently being translated transform.
-     */
-    String getFullName(PTransform<?, ?> transform);
-
-    /**
-     * Adds a step to the Dataflow workflow for the given transform, with
-     * the given Dataflow step type.
-     * This step becomes "current" for the purpose of {@link #addInput} and
-     * {@link #addOutput}.
-     */
-    public void addStep(PTransform<?, ?> transform, String type);
-
-    /**
-     * Adds a pre-defined step to the Dataflow workflow. The given PTransform should be
-     * consistent with the Step, in terms of input, output and coder types.
-     *
-     * <p>This is a low-level operation, when using this method it is up to
-     * the caller to ensure that names do not collide.
-     */
-    public void addStep(PTransform<?, ? extends PValue> transform, Step step);
-
-    /**
-     * Sets the encoding for the current Dataflow step.
-     */
-    public void addEncodingInput(Coder<?> value);
-
-    /**
-     * Adds an input with the given name and value to the current
-     * Dataflow step.
-     */
-    public void addInput(String name, Boolean value);
-
-    /**
-     * Adds an input with the given name and value to the current
-     * Dataflow step.
-     */
-    public void addInput(String name, String value);
-
-    /**
-     * Adds an input with the given name and value to the current
-     * Dataflow step.
-     */
-    public void addInput(String name, Long value);
-
-    /**
-     * Adds an input with the given name to the previously added Dataflow
-     * step, coming from the specified input PValue.
-     */
-    public void addInput(String name, PInput value);
-
-    /**
-     * Adds an input that is a dictionary of strings to objects.
-     */
-    public void addInput(String name, Map<String, Object> elements);
-
-    /**
-     * Adds an input that is a list of objects.
-     */
-    public void addInput(String name, List<? extends Map<String, Object>> elements);
-
-    /**
-     * Adds an output with the given name to the previously added
-     * Dataflow step, producing the specified output {@code PValue},
-     * including its {@code Coder} if a {@code TypedPValue}.  If the
-     * {@code PValue} is a {@code PCollection}, wraps its coder inside
-     * a {@code WindowedValueCoder}.
-     */
-    public void addOutput(String name, PValue value);
-
-    /**
-     * Adds an output with the given name to the previously added
-     * Dataflow step, producing the specified output {@code PValue},
-     * including its {@code Coder} if a {@code TypedPValue}.  If the
-     * {@code PValue} is a {@code PCollection}, wraps its coder inside
-     * a {@code ValueOnlyCoder}.
-     */
-    public void addValueOnlyOutput(String name, PValue value);
-
-    /**
-     * Adds an output with the given name to the previously added
-     * CollectionToSingleton Dataflow step, consuming the specified
-     * input {@code PValue} and producing the specified output
-     * {@code PValue}.  This step requires special treatment for its
-     * output encoding.
-     */
-    public void addCollectionToSingletonOutput(String name,
-                                               PValue inputValue,
-                                               PValue outputValue);
-
-    /**
-     * Encode a PValue reference as an output reference.
-     */
-    public OutputReference asOutputReference(PValue value);
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Translates a Pipeline into the Dataflow representation.
-   */
-  class Translator implements PipelineVisitor, TranslationContext {
-    /** The Pipeline to translate. */
-    private final Pipeline pipeline;
-
-    /** The runner which will execute the pipeline. */
-    private final DataflowPipelineRunner runner;
-
-    /** The Cloud Dataflow Job representation. */
-    private final Job job = new Job();
-
-    /**
-     * Translator is stateful, as addProperty calls refer to the current step.
-     */
-    private Step currentStep;
-
-    /**
-     * A Map from AppliedPTransform to their unique Dataflow step names.
-     */
-    private final Map<AppliedPTransform<?, ?, ?>, String> stepNames = new HashMap<>();
-
-    /**
-     * A Map from PValues to their output names used by their producer
-     * Dataflow steps.
-     */
-    private final Map<POutput, String> outputNames = new HashMap<>();
-
-    /**
-     * A Map from PValues to the Coders used for them.
-     */
-    private final Map<POutput, Coder<?>> outputCoders = new HashMap<>();
-
-    /**
-     * The transform currently being applied.
-     */
-    private AppliedPTransform<?, ?, ?> currentTransform;
-
-    /**
-     * Constructs a Translator that will translate the specified
-     * Pipeline into Dataflow objects.
-     */
-    public Translator(Pipeline pipeline, DataflowPipelineRunner runner) {
-      this.pipeline = pipeline;
-      this.runner = runner;
-    }
-
-    /**
-     * Translates this Translator's pipeline onto its writer.
-     * @return a Job definition filled in with the type of job, the environment,
-     * and the job steps.
-     */
-    public Job translate(List<DataflowPackage> packages) {
-      job.setName(options.getJobName().toLowerCase());
-
-      Environment environment = new Environment();
-      job.setEnvironment(environment);
-
-      try {
-        environment.setSdkPipelineOptions(
-            MAPPER.readValue(MAPPER.writeValueAsBytes(options), Map.class));
-      } catch (IOException e) {
-        throw new IllegalArgumentException(
-            "PipelineOptions specified failed to serialize to JSON.", e);
-      }
-
-      WorkerPool workerPool = new WorkerPool();
-
-      if (options.getTeardownPolicy() != null) {
-        workerPool.setTeardownPolicy(options.getTeardownPolicy().getTeardownPolicyName());
-      }
-
-      if (options.isStreaming()) {
-        job.setType("JOB_TYPE_STREAMING");
-      } else {
-        job.setType("JOB_TYPE_BATCH");
-        workerPool.setDiskType(options.getWorkerDiskType());
-      }
-
-      if (options.getWorkerMachineType() != null) {
-        workerPool.setMachineType(options.getWorkerMachineType());
-      }
-
-      workerPool.setPackages(packages);
-      workerPool.setNumWorkers(options.getNumWorkers());
-
-      if (options.isStreaming()) {
-        // Use separate data disk for streaming.
-        Disk disk = new Disk();
-        disk.setDiskType(options.getWorkerDiskType());
-        workerPool.setDataDisks(Collections.singletonList(disk));
-      }
-      if (!Strings.isNullOrEmpty(options.getZone())) {
-        workerPool.setZone(options.getZone());
-      }
-      if (!Strings.isNullOrEmpty(options.getNetwork())) {
-        workerPool.setNetwork(options.getNetwork());
-      }
-      if (!Strings.isNullOrEmpty(options.getSubnetwork())) {
-        workerPool.setSubnetwork(options.getSubnetwork());
-      }
-      if (options.getDiskSizeGb() > 0) {
-        workerPool.setDiskSizeGb(options.getDiskSizeGb());
-      }
-      AutoscalingSettings settings = new AutoscalingSettings();
-      if (options.getAutoscalingAlgorithm() != null) {
-        settings.setAlgorithm(options.getAutoscalingAlgorithm().getAlgorithm());
-      }
-      settings.setMaxNumWorkers(options.getMaxNumWorkers());
-      workerPool.setAutoscalingSettings(settings);
-
-      List<WorkerPool> workerPools = new LinkedList<>();
-
-      workerPools.add(workerPool);
-      environment.setWorkerPools(workerPools);
-
-      pipeline.traverseTopologically(this);
-      return job;
-    }
-
-    @Override
-    public DataflowPipelineOptions getPipelineOptions() {
-      return options;
-    }
-
-    @Override
-    public <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform) {
-      return (InputT) getCurrentTransform(transform).getInput();
-    }
-
-    @Override
-    public <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform) {
-      return (OutputT) getCurrentTransform(transform).getOutput();
-    }
-
-    @Override
-    public String getFullName(PTransform<?, ?> transform) {
-      return getCurrentTransform(transform).getFullName();
-    }
-
-    private AppliedPTransform<?, ?, ?> getCurrentTransform(PTransform<?, ?> transform) {
-      checkArgument(
-          currentTransform != null && currentTransform.getTransform() == transform,
-          "can only be called with current transform");
-      return currentTransform;
-    }
-
-    @Override
-    public void enterCompositeTransform(TransformTreeNode node) {
-    }
-
-    @Override
-    public void leaveCompositeTransform(TransformTreeNode node) {
-    }
-
-    @Override
-    public void visitTransform(TransformTreeNode node) {
-      PTransform<?, ?> transform = node.getTransform();
-      TransformTranslator translator =
-          getTransformTranslator(transform.getClass());
-      if (translator == null) {
-        throw new IllegalStateException(
-            "no translator registered for " + transform);
-      }
-      LOG.debug("Translating {}", transform);
-      currentTransform = AppliedPTransform.of(
-          node.getFullName(), node.getInput(), node.getOutput(), (PTransform) transform);
-      translator.translate(transform, this);
-      currentTransform = null;
-    }
-
-    @Override
-    public void visitValue(PValue value, TransformTreeNode producer) {
-      LOG.debug("Checking translation of {}", value);
-      if (value.getProducingTransformInternal() == null) {
-        throw new RuntimeException(
-            "internal error: expecting a PValue "
-            + "to have a producingTransform");
-      }
-      if (!producer.isCompositeNode()) {
-        // Primitive transforms are the only ones assigned step names.
-        asOutputReference(value);
-      }
-    }
-
-    @Override
-    public void addStep(PTransform<?, ?> transform, String type) {
-      String stepName = genStepName();
-      if (stepNames.put(getCurrentTransform(transform), stepName) != null) {
-        throw new IllegalArgumentException(
-            transform + " already has a name specified");
-      }
-      // Start the next "steps" list item.
-      List<Step> steps = job.getSteps();
-      if (steps == null) {
-        steps = new LinkedList<>();
-        job.setSteps(steps);
-      }
-
-      currentStep = new Step();
-      currentStep.setName(stepName);
-      currentStep.setKind(type);
-      steps.add(currentStep);
-      addInput(PropertyNames.USER_NAME, getFullName(transform));
-      addDisplayData(PropertyNames.DISPLAY_DATA, DisplayData.from(transform));
-    }
-
-    @Override
-    public void addStep(PTransform<?, ? extends PValue> transform, Step original) {
-      Step step = original.clone();
-      String stepName = step.getName();
-      if (stepNames.put(getCurrentTransform(transform), stepName) != null) {
-        throw new IllegalArgumentException(transform + " already has a name specified");
-      }
-
-      Map<String, Object> properties = step.getProperties();
-      if (properties != null) {
-        @Nullable List<Map<String, Object>> outputInfoList = null;
-        try {
-          // TODO: This should be done via a Structs accessor.
-          @Nullable List<Map<String, Object>> list =
-              (List<Map<String, Object>>) properties.get(PropertyNames.OUTPUT_INFO);
-          outputInfoList = list;
-        } catch (Exception e) {
-          throw new RuntimeException("Inconsistent dataflow pipeline translation", e);
-        }
-        if (outputInfoList != null && outputInfoList.size() > 0) {
-          Map<String, Object> firstOutputPort = outputInfoList.get(0);
-          @Nullable String name;
-          try {
-            name = getString(firstOutputPort, PropertyNames.OUTPUT_NAME);
-          } catch (Exception e) {
-            name = null;
-          }
-          if (name != null) {
-            registerOutputName(getOutput(transform), name);
-          }
-        }
-      }
-
-      List<Step> steps = job.getSteps();
-      if (steps == null) {
-        steps = new LinkedList<>();
-        job.setSteps(steps);
-      }
-      currentStep = step;
-      steps.add(step);
-    }
-
-    @Override
-    public void addEncodingInput(Coder<?> coder) {
-      CloudObject encoding = SerializableUtils.ensureSerializable(coder);
-      addObject(getProperties(), PropertyNames.ENCODING, encoding);
-    }
-
-    @Override
-    public void addInput(String name, Boolean value) {
-      addBoolean(getProperties(), name, value);
-    }
-
-    @Override
-    public void addInput(String name, String value) {
-      addString(getProperties(), name, value);
-    }
-
-    @Override
-    public void addInput(String name, Long value) {
-      addLong(getProperties(), name, value);
-    }
-
-    @Override
-    public void addInput(String name, Map<String, Object> elements) {
-      addDictionary(getProperties(), name, elements);
-    }
-
-    @Override
-    public void addInput(String name, List<? extends Map<String, Object>> elements) {
-      addList(getProperties(), name, elements);
-    }
-
-    @Override
-    public void addInput(String name, PInput value) {
-      if (value instanceof PValue) {
-        addInput(name, asOutputReference((PValue) value));
-      } else {
-        throw new IllegalStateException("Input must be a PValue");
-      }
-    }
-
-    @Override
-    public void addOutput(String name, PValue value) {
-      Coder<?> coder;
-      if (value instanceof TypedPValue) {
-        coder = ((TypedPValue<?>) value).getCoder();
-        if (value instanceof PCollection) {
-          // Wrap the PCollection element Coder inside a WindowedValueCoder.
-          coder = WindowedValue.getFullCoder(
-              coder,
-              ((PCollection<?>) value).getWindowingStrategy().getWindowFn().windowCoder());
-        }
-      } else {
-        // No output coder to encode.
-        coder = null;
-      }
-      addOutput(name, value, coder);
-    }
-
-    @Override
-    public void addValueOnlyOutput(String name, PValue value) {
-      Coder<?> coder;
-      if (value instanceof TypedPValue) {
-        coder = ((TypedPValue<?>) value).getCoder();
-        if (value instanceof PCollection) {
-          // Wrap the PCollection element Coder inside a ValueOnly
-          // WindowedValueCoder.
-          coder = WindowedValue.getValueOnlyCoder(coder);
-        }
-      } else {
-        // No output coder to encode.
-        coder = null;
-      }
-      addOutput(name, value, coder);
-    }
-
-    @Override
-    public void addCollectionToSingletonOutput(String name,
-                                               PValue inputValue,
-                                               PValue outputValue) {
-      Coder<?> inputValueCoder =
-          Preconditions.checkNotNull(outputCoders.get(inputValue));
-      // The inputValueCoder for the input PCollection should be some
-      // WindowedValueCoder of the input PCollection's element
-      // coder.
-      Preconditions.checkState(
-          inputValueCoder instanceof WindowedValue.WindowedValueCoder);
-      // The outputValueCoder for the output should be an
-      // IterableCoder of the inputValueCoder. This is a property
-      // of the backend "CollectionToSingleton" step.
-      Coder<?> outputValueCoder = IterableCoder.of(inputValueCoder);
-      addOutput(name, outputValue, outputValueCoder);
-    }
-
-    /**
-     * Adds an output with the given name to the previously added
-     * Dataflow step, producing the specified output {@code PValue}
-     * with the given {@code Coder} (if not {@code null}).
-     */
-    private void addOutput(String name, PValue value, Coder<?> valueCoder) {
-      registerOutputName(value, name);
-
-      Map<String, Object> properties = getProperties();
-      @Nullable List<Map<String, Object>> outputInfoList = null;
-      try {
-        // TODO: This should be done via a Structs accessor.
-        outputInfoList = (List<Map<String, Object>>) properties.get(PropertyNames.OUTPUT_INFO);
-      } catch (Exception e) {
-        throw new RuntimeException("Inconsistent dataflow pipeline translation", e);
-      }
-      if (outputInfoList == null) {
-        outputInfoList = new ArrayList<>();
-        // TODO: This should be done via a Structs accessor.
-        properties.put(PropertyNames.OUTPUT_INFO, outputInfoList);
-      }
-
-      Map<String, Object> outputInfo = new HashMap<>();
-      addString(outputInfo, PropertyNames.OUTPUT_NAME, name);
-      addString(outputInfo, PropertyNames.USER_NAME, value.getName());
-      if (value instanceof PCollection
-          && runner.doesPCollectionRequireIndexedFormat((PCollection<?>) value)) {
-        addBoolean(outputInfo, PropertyNames.USE_INDEXED_FORMAT, true);
-      }
-      if (valueCoder != null) {
-        // Verify that encoding can be decoded, in order to catch serialization
-        // failures as early as possible.
-        CloudObject encoding = SerializableUtils.ensureSerializable(valueCoder);
-        addObject(outputInfo, PropertyNames.ENCODING, encoding);
-        outputCoders.put(value, valueCoder);
-      }
-
-      outputInfoList.add(outputInfo);
-    }
-
-    private void addDisplayData(String name, DisplayData displayData) {
-      List<Map<String, Object>> serializedItems = Lists.newArrayList();
-      for (DisplayData.Item item : displayData.items()) {
-        serializedItems.add(MAPPER.convertValue(item, Map.class));
-      }
-
-      addList(getProperties(), name, serializedItems);
-    }
-
-    @Override
-    public OutputReference asOutputReference(PValue value) {
-      AppliedPTransform<?, ?, ?> transform =
-          value.getProducingTransformInternal();
-      String stepName = stepNames.get(transform);
-      if (stepName == null) {
-        throw new IllegalArgumentException(transform + " doesn't have a name specified");
-      }
-
-      String outputName = outputNames.get(value);
-      if (outputName == null) {
-        throw new IllegalArgumentException(
-            "output " + value + " doesn't have a name specified");
-      }
-
-      return new OutputReference(stepName, outputName);
-    }
-
-    private Map<String, Object> getProperties() {
-      Map<String, Object> properties = currentStep.getProperties();
-      if (properties == null) {
-        properties = new HashMap<>();
-        currentStep.setProperties(properties);
-      }
-      return properties;
-    }
-
-    /**
-     * Returns a fresh Dataflow step name.
-     */
-    private String genStepName() {
-      return "s" + (stepNames.size() + 1);
-    }
-
-    /**
-     * Records the name of the given output PValue,
-     * within its producing transform.
-     */
-    private void registerOutputName(POutput value, String name) {
-      if (outputNames.put(value, name) != null) {
-        throw new IllegalArgumentException(
-            "output " + value + " already has a name specified");
-      }
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  @Override
-  public String toString() {
-    return "DataflowPipelineTranslator#" + hashCode();
-  }
-
-
-  ///////////////////////////////////////////////////////////////////////////
-
-  static {
-    registerTransformTranslator(
-        View.CreatePCollectionView.class,
-        new TransformTranslator<View.CreatePCollectionView>() {
-          @Override
-          public void translate(
-              View.CreatePCollectionView transform,
-              TranslationContext context) {
-            translateTyped(transform, context);
-          }
-
-          private <ElemT, ViewT> void translateTyped(
-              View.CreatePCollectionView<ElemT, ViewT> transform,
-              TranslationContext context) {
-            context.addStep(transform, "CollectionToSingleton");
-            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-            context.addCollectionToSingletonOutput(
-                PropertyNames.OUTPUT,
-                context.getInput(transform),
-                context.getOutput(transform));
-          }
-        });
-
-    DataflowPipelineTranslator.registerTransformTranslator(
-        Combine.GroupedValues.class,
-        new DataflowPipelineTranslator.TransformTranslator<Combine.GroupedValues>() {
-          @Override
-          public void translate(
-              Combine.GroupedValues transform,
-              DataflowPipelineTranslator.TranslationContext context) {
-            translateHelper(transform, context);
-          }
-
-          private <K, InputT, OutputT> void translateHelper(
-              final Combine.GroupedValues<K, InputT, OutputT> transform,
-              DataflowPipelineTranslator.TranslationContext context) {
-            context.addStep(transform, "CombineValues");
-            translateInputs(context.getInput(transform), transform.getSideInputs(), context);
-
-            AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn =
-                transform.getAppliedFn(
-                    context.getInput(transform).getPipeline().getCoderRegistry(),
-                context.getInput(transform).getCoder(),
-                context.getInput(transform).getWindowingStrategy());
-
-            context.addEncodingInput(fn.getAccumulatorCoder());
-            context.addInput(
-                PropertyNames.SERIALIZED_FN,
-                byteArrayToJsonString(serializeToByteArray(fn)));
-            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-          }
-        });
-
-    registerTransformTranslator(
-        Create.Values.class,
-        new TransformTranslator<Create.Values>() {
-          @Override
-          public void translate(
-              Create.Values transform,
-              TranslationContext context) {
-            createHelper(transform, context);
-          }
-
-          private <T> void createHelper(
-              Create.Values<T> transform,
-              TranslationContext context) {
-            context.addStep(transform, "CreateCollection");
-
-            Coder<T> coder = context.getOutput(transform).getCoder();
-            List<CloudObject> elements = new LinkedList<>();
-            for (T elem : transform.getElements()) {
-              byte[] encodedBytes;
-              try {
-                encodedBytes = encodeToByteArray(coder, elem);
-              } catch (CoderException exn) {
-                // TODO: Put in better element printing:
-                // truncate if too long.
-                throw new IllegalArgumentException(
-                    "Unable to encode element '" + elem + "' of transform '" + transform
-                    + "' using coder '" + coder + "'.",
-                    exn);
-              }
-              String encodedJson = byteArrayToJsonString(encodedBytes);
-              assert Arrays.equals(encodedBytes,
-                                   jsonStringToByteArray(encodedJson));
-              elements.add(CloudObject.forString(encodedJson));
-            }
-            context.addInput(PropertyNames.ELEMENT, elements);
-            context.addValueOnlyOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-          }
-        });
-
-    registerTransformTranslator(
-        Flatten.FlattenPCollectionList.class,
-        new TransformTranslator<Flatten.FlattenPCollectionList>() {
-          @Override
-          public void translate(
-              Flatten.FlattenPCollectionList transform,
-              TranslationContext context) {
-            flattenHelper(transform, context);
-          }
-
-          private <T> void flattenHelper(
-              Flatten.FlattenPCollectionList<T> transform,
-              TranslationContext context) {
-            context.addStep(transform, "Flatten");
-
-            List<OutputReference> inputs = new LinkedList<>();
-            for (PCollection<T> input : context.getInput(transform).getAll()) {
-              inputs.add(context.asOutputReference(input));
-            }
-            context.addInput(PropertyNames.INPUTS, inputs);
-            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-          }
-        });
-
-    registerTransformTranslator(
-        GroupByKeyAndSortValuesOnly.class,
-        new TransformTranslator<GroupByKeyAndSortValuesOnly>() {
-          @Override
-          public void translate(
-              GroupByKeyAndSortValuesOnly transform,
-              TranslationContext context) {
-            groupByKeyAndSortValuesHelper(transform, context);
-          }
-
-          private <K1, K2, V> void groupByKeyAndSortValuesHelper(
-              GroupByKeyAndSortValuesOnly<K1, K2, V> transform,
-              TranslationContext context) {
-            context.addStep(transform, "GroupByKey");
-            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-            context.addInput(PropertyNames.SORT_VALUES, true);
-
-            // TODO: Add support for combiner lifting once the need arises.
-            context.addInput(
-                PropertyNames.DISALLOW_COMBINER_LIFTING, true);
-          }
-        });
-
-    registerTransformTranslator(
-        GroupByKey.class,
-        new TransformTranslator<GroupByKey>() {
-          @Override
-          public void translate(
-              GroupByKey transform,
-              TranslationContext context) {
-            groupByKeyHelper(transform, context);
-          }
-
-          private <K, V> void groupByKeyHelper(
-              GroupByKey<K, V> transform,
-              TranslationContext context) {
-            context.addStep(transform, "GroupByKey");
-            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-
-            WindowingStrategy<?, ?> windowingStrategy =
-                context.getInput(transform).getWindowingStrategy();
-            boolean isStreaming =
-                context.getPipelineOptions().as(StreamingOptions.class).isStreaming();
-            boolean disallowCombinerLifting =
-                !windowingStrategy.getWindowFn().isNonMerging()
-                || (isStreaming && !transform.fewKeys())
-                // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
-                || !(windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger);
-            context.addInput(
-                PropertyNames.DISALLOW_COMBINER_LIFTING, disallowCombinerLifting);
-            context.addInput(
-                PropertyNames.SERIALIZED_FN,
-                byteArrayToJsonString(serializeToByteArray(windowingStrategy)));
-            context.addInput(
-                PropertyNames.IS_MERGING_WINDOW_FN,
-                !windowingStrategy.getWindowFn().isNonMerging());
-          }
-        });
-
-    registerTransformTranslator(
-        ParDo.BoundMulti.class,
-        new TransformTranslator<ParDo.BoundMulti>() {
-          @Override
-          public void translate(
-              ParDo.BoundMulti transform,
-              TranslationContext context) {
-            translateMultiHelper(transform, context);
-          }
-
-          private <InputT, OutputT> void translateMultiHelper(
-              ParDo.BoundMulti<InputT, OutputT> transform,
-              TranslationContext context) {
-            context.addStep(transform, "ParallelDo");
-            translateInputs(context.getInput(transform), transform.getSideInputs(), context);
-            translateFn(transform.getFn(), context.getInput(transform).getWindowingStrategy(),
-                transform.getSideInputs(), context.getInput(transform).getCoder(), context);
-            translateOutputs(context.getOutput(transform), context);
-          }
-        });
-
-    registerTransformTranslator(
-        ParDo.Bound.class,
-        new TransformTranslator<ParDo.Bound>() {
-          @Override
-          public void translate(
-              ParDo.Bound transform,
-              TranslationContext context) {
-            translateSingleHelper(transform, context);
-          }
-
-          private <InputT, OutputT> void translateSingleHelper(
-              ParDo.Bound<InputT, OutputT> transform,
-              TranslationContext context) {
-            context.addStep(transform, "ParallelDo");
-            translateInputs(context.getInput(transform), transform.getSideInputs(), context);
-            translateFn(
-                transform.getFn(),
-                context.getInput(transform).getWindowingStrategy(),
-                transform.getSideInputs(), context.getInput(transform).getCoder(), context);
-            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-          }
-        });
-
-    registerTransformTranslator(
-        Window.Bound.class,
-        new DataflowPipelineTranslator.TransformTranslator<Window.Bound>() {
-          @Override
-          public void translate(
-              Window.Bound transform, TranslationContext context) {
-            translateHelper(transform, context);
-          }
-
-          private <T> void translateHelper(
-              Window.Bound<T> transform, TranslationContext context) {
-            context.addStep(transform, "Bucket");
-            context.addInput(PropertyNames.PARALLEL_INPUT, context.getInput(transform));
-            context.addOutput(PropertyNames.OUTPUT, context.getOutput(transform));
-
-            WindowingStrategy<?, ?> strategy = context.getOutput(transform).getWindowingStrategy();
-            byte[] serializedBytes = serializeToByteArray(strategy);
-            String serializedJson = byteArrayToJsonString(serializedBytes);
-            assert Arrays.equals(serializedBytes,
-                                 jsonStringToByteArray(serializedJson));
-            context.addInput(PropertyNames.SERIALIZED_FN, serializedJson);
-          }
-        });
-
-    ///////////////////////////////////////////////////////////////////////////
-    // IO Translation.
-
-    registerTransformTranslator(
-        BigQueryIO.Read.Bound.class, new BigQueryIOTranslator.ReadTranslator());
-    registerTransformTranslator(
-        BigQueryIO.Write.Bound.class, new BigQueryIOTranslator.WriteTranslator());
-
-    registerTransformTranslator(
-        PubsubIO.Read.Bound.class, new PubsubIOTranslator.ReadTranslator());
-    registerTransformTranslator(
-        DataflowPipelineRunner.StreamingPubsubIOWrite.class,
-        new PubsubIOTranslator.WriteTranslator());
-
-    registerTransformTranslator(Read.Bounded.class, new ReadTranslator());
-  }
-
-  private static void translateInputs(
-      PCollection<?> input,
-      List<PCollectionView<?>> sideInputs,
-      TranslationContext context) {
-    context.addInput(PropertyNames.PARALLEL_INPUT, input);
-    translateSideInputs(sideInputs, context);
-  }
-
-  // Used for ParDo
-  private static void translateSideInputs(
-      List<PCollectionView<?>> sideInputs,
-      TranslationContext context) {
-    Map<String, Object> nonParInputs = new HashMap<>();
-
-    for (PCollectionView<?> view : sideInputs) {
-      nonParInputs.put(
-          view.getTagInternal().getId(),
-          context.asOutputReference(view));
-    }
-
-    context.addInput(PropertyNames.NON_PARALLEL_INPUTS, nonParInputs);
-  }
-
-  private static void translateFn(
-      DoFn fn,
-      WindowingStrategy windowingStrategy,
-      Iterable<PCollectionView<?>> sideInputs,
-      Coder inputCoder,
-      TranslationContext context) {
-    context.addInput(PropertyNames.USER_FN, fn.getClass().getName());
-    context.addInput(
-        PropertyNames.SERIALIZED_FN,
-        byteArrayToJsonString(serializeToByteArray(
-            new DoFnInfo(fn, windowingStrategy, sideInputs, inputCoder))));
-  }
-
-  private static void translateOutputs(
-      PCollectionTuple outputs,
-      TranslationContext context) {
-    for (Map.Entry<TupleTag<?>, PCollection<?>> entry
-             : outputs.getAll().entrySet()) {
-      TupleTag<?> tag = entry.getKey();
-      PCollection<?> output = entry.getValue();
-      context.addOutput(tag.getId(), output);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
deleted file mode 100644
index 6e8301b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowServiceException.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import javax.annotation.Nullable;
-
-/**
- * Signals there was an error retrieving information about a job from the Cloud Dataflow Service.
- */
-public class DataflowServiceException extends DataflowJobException {
-  DataflowServiceException(DataflowPipelineJob job, String message) {
-    this(job, message, null);
-  }
-
-  DataflowServiceException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
-    super(job, message, cause);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
deleted file mode 100644
index 5217a90..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipeline.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
-
-/**
- * A {@link DirectPipeline} is a {@link Pipeline} that returns
- * {@link DirectPipelineRunner.EvaluationResults} when it is
- * {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
- */
-public class DirectPipeline extends Pipeline {
-
-  /**
-   * Creates and returns a new DirectPipeline instance for tests.
-   */
-  public static DirectPipeline createForTest() {
-    DirectPipelineRunner runner = DirectPipelineRunner.createForTest();
-    return new DirectPipeline(runner, runner.getPipelineOptions());
-  }
-
-  private DirectPipeline(DirectPipelineRunner runner, DirectPipelineOptions options) {
-    super(runner, options);
-  }
-
-  @Override
-  public DirectPipelineRunner.EvaluationResults run() {
-    return (DirectPipelineRunner.EvaluationResults) super.run();
-  }
-
-  @Override
-  public DirectPipelineRunner getRunner() {
-    return (DirectPipelineRunner) super.getRunner();
-  }
-
-  @Override
-  public String toString() {
-    return "DirectPipeline#" + hashCode();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
deleted file mode 100644
index f2dd40c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DirectPipelineRegistrar.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.auto.service.AutoService;
-import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar;
-import com.google.common.collect.ImmutableList;
-
-/**
- * Contains the {@link PipelineOptionsRegistrar} and {@link PipelineRunnerRegistrar} for
- * the {@link DirectPipeline}.
- */
-public class DirectPipelineRegistrar {
-  private DirectPipelineRegistrar() { }
-
-  /**
-   * Register the {@link DirectPipelineRunner}.
-   */
-  @AutoService(PipelineRunnerRegistrar.class)
-  public static class Runner implements PipelineRunnerRegistrar {
-    @Override
-    public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
-      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(DirectPipelineRunner.class);
-    }
-  }
-
-  /**
-   * Register the {@link DirectPipelineOptions}.
-   */
-  @AutoService(PipelineOptionsRegistrar.class)
-  public static class Options implements PipelineOptionsRegistrar {
-    @Override
-    public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
-      return ImmutableList.<Class<? extends PipelineOptions>>of(DirectPipelineOptions.class);
-    }
-  }
-}

[60/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

Directory reorganization

Move Java examples from "examples/" into "examples/java".


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/2eaa709c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/2eaa709c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/2eaa709c

Branch: refs/heads/master
Commit: 2eaa709c7c2bb7e101a684025c357b25141fcf1f
Parents: 75cfa4a
Author: Davor Bonaci <da...@google.com>
Authored: Wed Mar 23 17:05:40 2016 -0700
Committer: Davor Bonaci <da...@google.com>
Committed: Wed Mar 23 18:33:27 2016 -0700

----------------------------------------------------------------------
 examples/README.md                              |  95 ----
 examples/java/README.md                         |  95 ++++
 examples/java/pom.xml                           | 394 +++++++++++++
 .../dataflow/examples/DebuggingWordCount.java   | 182 ++++++
 .../dataflow/examples/MinimalWordCount.java     | 117 ++++
 .../dataflow/examples/WindowedWordCount.java    | 269 +++++++++
 .../cloud/dataflow/examples/WordCount.java      | 206 +++++++
 .../examples/common/DataflowExampleOptions.java |  34 ++
 .../examples/common/DataflowExampleUtils.java   | 485 ++++++++++++++++
 .../common/ExampleBigQueryTableOptions.java     |  53 ++
 ...xamplePubsubTopicAndSubscriptionOptions.java |  44 ++
 .../common/ExamplePubsubTopicOptions.java       |  44 ++
 .../examples/common/PubsubFileInjector.java     | 153 +++++
 .../examples/complete/AutoComplete.java         | 516 +++++++++++++++++
 .../cloud/dataflow/examples/complete/README.md  |  44 ++
 .../examples/complete/StreamingWordExtract.java | 163 ++++++
 .../cloud/dataflow/examples/complete/TfIdf.java | 431 ++++++++++++++
 .../examples/complete/TopWikipediaSessions.java | 223 ++++++++
 .../examples/complete/TrafficMaxLaneFlow.java   | 425 ++++++++++++++
 .../examples/complete/TrafficRoutes.java        | 459 +++++++++++++++
 .../examples/cookbook/BigQueryTornadoes.java    | 179 ++++++
 .../cookbook/CombinePerKeyExamples.java         | 223 ++++++++
 .../examples/cookbook/DatastoreWordCount.java   | 269 +++++++++
 .../examples/cookbook/DeDupExample.java         | 100 ++++
 .../examples/cookbook/FilterExamples.java       | 266 +++++++++
 .../examples/cookbook/JoinExamples.java         | 185 ++++++
 .../examples/cookbook/MaxPerKeyExamples.java    | 173 ++++++
 .../cloud/dataflow/examples/cookbook/README.md  |  55 ++
 .../examples/cookbook/TriggerExample.java       | 564 +++++++++++++++++++
 .../examples/DebuggingWordCountTest.java        |  45 ++
 .../cloud/dataflow/examples/WordCountTest.java  |  85 +++
 .../examples/complete/AutoCompleteTest.java     | 181 ++++++
 .../dataflow/examples/complete/TfIdfTest.java   |  67 +++
 .../complete/TopWikipediaSessionsTest.java      |  62 ++
 .../cookbook/BigQueryTornadoesTest.java         |  80 +++
 .../cookbook/CombinePerKeyExamplesTest.java     |  90 +++
 .../examples/cookbook/DeDupExampleTest.java     |  83 +++
 .../examples/cookbook/FilterExamplesTest.java   |  85 +++
 .../examples/cookbook/JoinExamplesTest.java     | 114 ++++
 .../cookbook/MaxPerKeyExamplesTest.java         |  85 +++
 .../examples/cookbook/TriggerExampleTest.java   | 139 +++++
 examples/pom.xml                                | 394 -------------
 .../dataflow/examples/DebuggingWordCount.java   | 182 ------
 .../dataflow/examples/MinimalWordCount.java     | 117 ----
 .../dataflow/examples/WindowedWordCount.java    | 269 ---------
 .../cloud/dataflow/examples/WordCount.java      | 206 -------
 .../examples/common/DataflowExampleOptions.java |  34 --
 .../examples/common/DataflowExampleUtils.java   | 485 ----------------
 .../common/ExampleBigQueryTableOptions.java     |  53 --
 ...xamplePubsubTopicAndSubscriptionOptions.java |  44 --
 .../common/ExamplePubsubTopicOptions.java       |  44 --
 .../examples/common/PubsubFileInjector.java     | 153 -----
 .../examples/complete/AutoComplete.java         | 516 -----------------
 .../cloud/dataflow/examples/complete/README.md  |  44 --
 .../examples/complete/StreamingWordExtract.java | 163 ------
 .../cloud/dataflow/examples/complete/TfIdf.java | 431 --------------
 .../examples/complete/TopWikipediaSessions.java | 223 --------
 .../examples/complete/TrafficMaxLaneFlow.java   | 425 --------------
 .../examples/complete/TrafficRoutes.java        | 459 ---------------
 .../examples/cookbook/BigQueryTornadoes.java    | 179 ------
 .../cookbook/CombinePerKeyExamples.java         | 223 --------
 .../examples/cookbook/DatastoreWordCount.java   | 269 ---------
 .../examples/cookbook/DeDupExample.java         | 100 ----
 .../examples/cookbook/FilterExamples.java       | 266 ---------
 .../examples/cookbook/JoinExamples.java         | 185 ------
 .../examples/cookbook/MaxPerKeyExamples.java    | 173 ------
 .../cloud/dataflow/examples/cookbook/README.md  |  55 --
 .../examples/cookbook/TriggerExample.java       | 564 -------------------
 .../examples/DebuggingWordCountTest.java        |  45 --
 .../cloud/dataflow/examples/WordCountTest.java  |  85 ---
 .../examples/complete/AutoCompleteTest.java     | 181 ------
 .../dataflow/examples/complete/TfIdfTest.java   |  67 ---
 .../complete/TopWikipediaSessionsTest.java      |  62 --
 .../cookbook/BigQueryTornadoesTest.java         |  80 ---
 .../cookbook/CombinePerKeyExamplesTest.java     |  90 ---
 .../examples/cookbook/DeDupExampleTest.java     |  83 ---
 .../examples/cookbook/FilterExamplesTest.java   |  85 ---
 .../examples/cookbook/JoinExamplesTest.java     | 114 ----
 .../cookbook/MaxPerKeyExamplesTest.java         |  85 ---
 .../examples/cookbook/TriggerExampleTest.java   | 139 -----
 pom.xml                                         |   2 +-
 travis/test_wordcount.sh                        |   4 +-
 82 files changed, 7470 insertions(+), 7470 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/README.md
----------------------------------------------------------------------
diff --git a/examples/README.md b/examples/README.md
deleted file mode 100644
index cbcd01f..0000000
--- a/examples/README.md
+++ /dev/null
@@ -1,95 +0,0 @@
-# Example Pipelines
-
-The examples included in this module serve to demonstrate the basic
-functionality of Google Cloud Dataflow, and act as starting points for
-the development of more complex pipelines.
-
-## Word Count
-
-A good starting point for new users is our set of
-[word count](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples) examples, which computes word frequencies.  This series of four successively more detailed pipelines is described in detail in the accompanying [walkthrough](https://cloud.google.com/dataflow/examples/wordcount-example).
-
-1. [`MinimalWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java) is the simplest word count pipeline and introduces basic concepts like [Pipelines](https://cloud.google.com/dataflow/model/pipelines),
-[PCollections](https://cloud.google.com/dataflow/model/pcollection),
-[ParDo](https://cloud.google.com/dataflow/model/par-do),
-and [reading and writing data](https://cloud.google.com/dataflow/model/reading-and-writing-data) from external storage.
-
-1. [`WordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java) introduces Dataflow best practices like [PipelineOptions](https://cloud.google.com/dataflow/pipelines/constructing-your-pipeline#Creating) and custom [PTransforms](https://cloud.google.com/dataflow/model/composite-transforms).
-
-1. [`DebuggingWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java)
-shows how to view live aggregators in the [Dataflow Monitoring Interface](https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf), get the most out of
-[Cloud Logging](https://cloud.google.com/dataflow/pipelines/logging) integration, and start writing
-[good tests](https://cloud.google.com/dataflow/pipelines/testing-your-pipeline).
-
-1. [`WindowedWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java) shows how to run the same pipeline over either unbounded PCollections in streaming mode or bounded PCollections in batch mode.
-
-## Building and Running
-
-The examples in this repository can be built and executed from the root directory by running:
-
-    mvn compile exec:java -pl examples \
-    -Dexec.mainClass=<MAIN CLASS> \
-    -Dexec.args="<EXAMPLE-SPECIFIC ARGUMENTS>"
-
-For example, you can execute the `WordCount` pipeline on your local machine as follows:
-
-    mvn compile exec:java -pl examples \
-    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--inputFile=<LOCAL INPUT FILE> --output=<LOCAL OUTPUT FILE>"
-
-Once you have followed the general Cloud Dataflow
-[Getting Started](https://cloud.google.com/dataflow/getting-started) instructions, you can execute
-the same pipeline on fully managed resources in Google Cloud Platform:
-
-    mvn compile exec:java -pl examples \
-    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
-    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT ID> \
-    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
-    --runner=BlockingDataflowPipelineRunner"
-
-Make sure to use your project id, not the project number or the descriptive name.
-The Cloud Storage location should be entered in the form of
-`gs://bucket/path/to/staging/directory`.
-
-Alternatively, you may choose to bundle all dependencies into a single JAR and
-execute it outside of the Maven environment. For example, you can execute the
-following commands to create the
-bundled JAR of the examples and execute it both locally and in Cloud
-Platform:
-
-    mvn package
-
-    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-<VERSION>.jar \
-    com.google.cloud.dataflow.examples.WordCount \
-    --inputFile=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
-
-    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-<VERSION>.jar \
-    com.google.cloud.dataflow.examples.WordCount \
-    --project=<YOUR CLOUD PLATFORM PROJECT ID> \
-    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
-    --runner=BlockingDataflowPipelineRunner
-
-Other examples can be run similarly by replacing the `WordCount` class path with the example classpath, e.g.
-`com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes`,
-and adjusting runtime options under the `Dexec.args` parameter, as specified in
-the example itself.
-
-Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
-under the `Dexec.args` parameter should be escaped with another backslash. For
-example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
-
-## Beyond Word Count
-
-After you've finished running your first few word count pipelines, take a look at the [`cookbook`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook)
-directory for some common and useful patterns like joining, filtering, and combining.
-
-The [`complete`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete)
-directory contains a few realistic end-to-end pipelines.
-
-See the
-[Java 8](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/examples/src/main/java8/com/google/cloud/dataflow/examples)
-examples as well. This directory includes a Java 8 version of the
-MinimalWordCount example, as well as series of examples in a simple 'mobile
-gaming' domain. This series introduces some advanced concepts and provides
-additional examples of using Java 8 syntax. Other than usage of Java 8 lambda
-expressions, the concepts that are used apply equally well in Java 7.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/README.md
----------------------------------------------------------------------
diff --git a/examples/java/README.md b/examples/java/README.md
new file mode 100644
index 0000000..cbcd01f
--- /dev/null
+++ b/examples/java/README.md
@@ -0,0 +1,95 @@
+# Example Pipelines
+
+The examples included in this module serve to demonstrate the basic
+functionality of Google Cloud Dataflow, and act as starting points for
+the development of more complex pipelines.
+
+## Word Count
+
+A good starting point for new users is our set of
+[word count](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples) examples, which computes word frequencies.  This series of four successively more detailed pipelines is described in detail in the accompanying [walkthrough](https://cloud.google.com/dataflow/examples/wordcount-example).
+
+1. [`MinimalWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java) is the simplest word count pipeline and introduces basic concepts like [Pipelines](https://cloud.google.com/dataflow/model/pipelines),
+[PCollections](https://cloud.google.com/dataflow/model/pcollection),
+[ParDo](https://cloud.google.com/dataflow/model/par-do),
+and [reading and writing data](https://cloud.google.com/dataflow/model/reading-and-writing-data) from external storage.
+
+1. [`WordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java) introduces Dataflow best practices like [PipelineOptions](https://cloud.google.com/dataflow/pipelines/constructing-your-pipeline#Creating) and custom [PTransforms](https://cloud.google.com/dataflow/model/composite-transforms).
+
+1. [`DebuggingWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java)
+shows how to view live aggregators in the [Dataflow Monitoring Interface](https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf), get the most out of
+[Cloud Logging](https://cloud.google.com/dataflow/pipelines/logging) integration, and start writing
+[good tests](https://cloud.google.com/dataflow/pipelines/testing-your-pipeline).
+
+1. [`WindowedWordCount`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java) shows how to run the same pipeline over either unbounded PCollections in streaming mode or bounded PCollections in batch mode.
+
+## Building and Running
+
+The examples in this repository can be built and executed from the root directory by running:
+
+    mvn compile exec:java -pl examples \
+    -Dexec.mainClass=<MAIN CLASS> \
+    -Dexec.args="<EXAMPLE-SPECIFIC ARGUMENTS>"
+
+For example, you can execute the `WordCount` pipeline on your local machine as follows:
+
+    mvn compile exec:java -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
+    -Dexec.args="--inputFile=<LOCAL INPUT FILE> --output=<LOCAL OUTPUT FILE>"
+
+Once you have followed the general Cloud Dataflow
+[Getting Started](https://cloud.google.com/dataflow/getting-started) instructions, you can execute
+the same pipeline on fully managed resources in Google Cloud Platform:
+
+    mvn compile exec:java -pl examples \
+    -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
+    -Dexec.args="--project=<YOUR CLOUD PLATFORM PROJECT ID> \
+    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
+    --runner=BlockingDataflowPipelineRunner"
+
+Make sure to use your project id, not the project number or the descriptive name.
+The Cloud Storage location should be entered in the form of
+`gs://bucket/path/to/staging/directory`.
+
+Alternatively, you may choose to bundle all dependencies into a single JAR and
+execute it outside of the Maven environment. For example, you can execute the
+following commands to create the
+bundled JAR of the examples and execute it both locally and in Cloud
+Platform:
+
+    mvn package
+
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-<VERSION>.jar \
+    com.google.cloud.dataflow.examples.WordCount \
+    --inputFile=<INPUT FILE PATTERN> --output=<OUTPUT FILE>
+
+    java -cp examples/target/google-cloud-dataflow-java-examples-all-bundled-<VERSION>.jar \
+    com.google.cloud.dataflow.examples.WordCount \
+    --project=<YOUR CLOUD PLATFORM PROJECT ID> \
+    --stagingLocation=<YOUR CLOUD STORAGE LOCATION> \
+    --runner=BlockingDataflowPipelineRunner
+
+Other examples can be run similarly by replacing the `WordCount` class path with the example classpath, e.g.
+`com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes`,
+and adjusting runtime options under the `Dexec.args` parameter, as specified in
+the example itself.
+
+Note that when running Maven on Microsoft Windows platform, backslashes (`\`)
+under the `Dexec.args` parameter should be escaped with another backslash. For
+example, input file pattern of `c:\*.txt` should be entered as `c:\\*.txt`.
+
+## Beyond Word Count
+
+After you've finished running your first few word count pipelines, take a look at the [`cookbook`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook)
+directory for some common and useful patterns like joining, filtering, and combining.
+
+The [`complete`](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete)
+directory contains a few realistic end-to-end pipelines.
+
+See the
+[Java 8](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/tree/master/examples/src/main/java8/com/google/cloud/dataflow/examples)
+examples as well. This directory includes a Java 8 version of the
+MinimalWordCount example, as well as series of examples in a simple 'mobile
+gaming' domain. This series introduces some advanced concepts and provides
+additional examples of using Java 8 syntax. Other than usage of Java 8 lambda
+expressions, the concepts that are used apply equally well in Java 7.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java/pom.xml b/examples/java/pom.xml
new file mode 100644
index 0000000..b762c84
--- /dev/null
+++ b/examples/java/pom.xml
@@ -0,0 +1,394 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>parent</artifactId>
+    <version>0.1.0-incubating-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>java-examples-all</artifactId>
+  <name>Apache Beam :: Examples :: Java All</name>
+  <description>Apache Beam SDK provides a simple, Java-based
+  interface for processing virtually any size data. This
+  artifact includes all Apache Beam Java SDK examples.</description>
+
+  <packaging>jar</packaging>
+
+  <profiles>
+    <profile>
+      <id>DataflowPipelineTests</id>
+      <properties>
+        <runIntegrationTestOnService>true</runIntegrationTestOnService>
+        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
+        <testParallelValue>both</testParallelValue>
+      </properties>
+    </profile>
+  </profiles>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>6.6</version>
+          </dependency>
+        </dependencies>
+        <configuration>
+          <configLocation>../../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <includeResources>false</includeResources>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Source plugin for generating source and test-source JARs. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>attach-test-sources</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <configuration>
+          <windowtitle>Apache Beam Examples</windowtitle>
+          <doctitle>Apache Beam Examples</doctitle>
+
+          <subpackages>com.google.cloud.dataflow.examples</subpackages>
+          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util ${dataflow.javadoc_opts}</additionalparam>
+          <use>false</use>
+          <quiet>true</quiet>
+          <bottom><![CDATA[<br>]]></bottom>
+
+          <offlineLinks>
+            <!-- The Dataflow SDK docs -->
+            <offlineLink>
+              <url>https://cloud.google.com/dataflow/java-sdk/JavaDoc/</url>
+              <location>${basedir}/../../sdks/java/javadoc/dataflow-sdk-docs</location>
+            </offlineLink>
+            <!-- Other dependencies -->
+            <offlineLink>
+              <url>https://developers.google.com/api-client-library/java/google-api-java-client/reference/1.20.0/</url>
+              <location>${basedir}/../../sdks/java/javadoc/apiclient-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://avro.apache.org/docs/1.7.7/api/java/</url>
+              <location>${basedir}/../../sdks/java/javadoc/avro-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/</url>
+              <location>${basedir}/../../sdks/java/javadoc/bq-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>https://cloud.google.com/datastore/docs/apis/javadoc/</url>
+              <location>${basedir}/../../sdks/java/javadoc/datastore-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
+              <location>${basedir}/../../sdks/java/javadoc/guava-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.7/</url>
+              <location>${basedir}/../../sdks/java/javadoc/jackson-annotations-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.7/</url>
+              <location>${basedir}/../../sdks/java/javadoc/jackson-databind-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>http://www.joda.org/joda-time/apidocs</url>
+              <location>${basedir}/../../sdks/java/javadoc/joda-docs</location>
+            </offlineLink>
+            <offlineLink>
+              <url>https://developers.google.com/api-client-library/java/google-oauth-java-client/reference/1.20.0/</url>
+              <location>${basedir}/../../sdks/java/javadoc/oauth-docs</location>
+            </offlineLink>
+          </offlineLinks>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <phase>package</phase>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.4.1</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+              <artifactSet>
+                <includes>
+                  <include>*:*</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>default-test-jar</id>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Coverage analysis for unit tests. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-sdk-all</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.api-client</groupId>
+      <artifactId>google-api-client</artifactId>
+      <version>${google-clients.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-dataflow</artifactId>
+      <version>${dataflow.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <version>${bigquery.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client</artifactId>
+      <version>${google-clients.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-datastore-protobuf</artifactId>
+      <version>${datastore.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-pubsub</artifactId>
+      <version>${pubsub.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>${joda.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>${slf4j.version}</version>
+      <scope>runtime</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <version>3.1.0</version>
+    </dependency>
+
+    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
+         which is used in the main code of DebuggingWordCount example. -->
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>${hamcrest.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.10.19</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
new file mode 100644
index 0000000..8823dbc
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.examples.WordCount.WordCountOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+
+/**
+ * An example that verifies word counts in Shakespeare and includes Dataflow best practices.
+ *
+ * <p>This class, {@link DebuggingWordCount}, is the third in a series of four successively more
+ * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
+ * and {@link WordCount}. After you've looked at this example, then see the
+ * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
+ *
+ * <p>Basic concepts, also in the MinimalWordCount and WordCount examples:
+ * Reading text files; counting a PCollection; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Logging to Cloud Logging
+ *   2. Controlling Dataflow worker log levels
+ *   3. Creating a custom aggregator
+ *   4. Testing your Pipeline via DataflowAssert
+ * </pre>
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service and the additional logging discussed
+ * below, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * }
+ * </pre>
+ *
+ * <p>Note that when you run via <code>mvn exec</code>, you may need to escape
+ * the quotations as appropriate for your shell. For example, in <code>bash</code>:
+ * <pre>
+ * mvn compile exec:java ... \
+ *   -Dexec.args="... \
+ *     --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
+ * </pre>
+ *
+ * <p>Concept #2: Dataflow workers which execute user code are configured to log to Cloud
+ * Logging by default at "INFO" log level and higher. One may override log levels for specific
+ * logging namespaces by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
+ * </code></pre>
+ * For example, by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * </code></pre>
+ * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
+ * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
+ * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
+ * logging configuration can be overridden by specifying
+ * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
+ * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
+ * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
+ * that changing the default worker log level to TRACE or DEBUG will significantly increase
+ * the amount of logs output.
+ *
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --inputFile}.
+ */
+public class DebuggingWordCount {
+  /** A DoFn that filters for a specific key based upon a regular expression. */
+  public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
+    /**
+     * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
+     * as the logger. All log statements emitted by this logger will be referenced by this name
+     * and will be visible in the Cloud Logging UI. Learn more at https://cloud.google.com/logging
+     * about the Cloud Logging UI.
+     */
+    private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class);
+
+    private final Pattern filter;
+    public FilterTextFn(String pattern) {
+      filter = Pattern.compile(pattern);
+    }
+
+    /**
+     * Concept #3: A custom aggregator can track values in your pipeline as it runs. Those
+     * values will be displayed in the Dataflow Monitoring UI when this pipeline is run using the
+     * Dataflow service. These aggregators below track the number of matched and unmatched words.
+     * Learn more at https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf about
+     * the Dataflow Monitoring UI.
+     */
+    private final Aggregator<Long, Long> matchedWords =
+        createAggregator("matchedWords", new Sum.SumLongFn());
+    private final Aggregator<Long, Long> unmatchedWords =
+        createAggregator("umatchedWords", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (filter.matcher(c.element().getKey()).matches()) {
+        // Log at the "DEBUG" level each element that we match. When executing this pipeline
+        // using the Dataflow service, these log lines will appear in the Cloud Logging UI
+        // only if the log level is set to "DEBUG" or lower.
+        LOG.debug("Matched: " + c.element().getKey());
+        matchedWords.addValue(1L);
+        c.output(c.element());
+      } else {
+        // Log at the "TRACE" level each element that is not matched. Different log levels
+        // can be used to control the verbosity of logging providing an effective mechanism
+        // to filter less important information.
+        LOG.trace("Did not match: " + c.element().getKey());
+        unmatchedWords.addValue(1L);
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
+    Pipeline p = Pipeline.create(options);
+
+    PCollection<KV<String, Long>> filteredWords =
+        p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
+         .apply(new WordCount.CountWords())
+         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+
+    /**
+     * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
+     * Hamcrest's collection matchers that can be used when writing Pipeline level tests
+     * to validate the contents of PCollections. DataflowAssert is best used in unit tests
+     * with small data sets but is demonstrated here as a teaching tool.
+     *
+     * <p>Below we verify that the set of filtered words matches our expected counts. Note
+     * that DataflowAssert does not provide any output and that successful completion of the
+     * Pipeline implies that the expectations were met. Learn more at
+     * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
+     * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
+     */
+    List<KV<String, Long>> expectedResults = Arrays.asList(
+        KV.of("Flourish", 3L),
+        KV.of("stomach", 1L));
+    DataflowAssert.that(filteredWords).containsInAnyOrder(expectedResults);
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
new file mode 100644
index 0000000..4ed0520
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+
+/**
+ * An example that counts words in Shakespeare.
+ *
+ * <p>This class, {@link MinimalWordCount}, is the first in a series of four successively more
+ * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
+ * argument processing, and focus on construction of the pipeline, which chains together the
+ * application of core transforms.
+ *
+ * <p>Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
+ * the {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
+ * concepts.
+ *
+ * <p>Concepts:
+ * <pre>
+ *   1. Reading data from text files
+ *   2. Specifying 'inline' transforms
+ *   3. Counting a PCollection
+ *   4. Writing data to Cloud Storage as text files
+ * </pre>
+ *
+ * <p>To execute this pipeline, first edit the code to set your project ID, the staging
+ * location, and the output location. The specified GCS bucket(s) must already exist.
+ *
+ * <p>Then, run the pipeline as described in the README. It will be deployed and run using the
+ * Dataflow service. No args are required to run the pipeline. You can see the results in your
+ * output bucket in the GCS browser.
+ */
+public class MinimalWordCount {
+
+  public static void main(String[] args) {
+    // Create a DataflowPipelineOptions object. This object lets us set various execution
+    // options for our pipeline, such as the associated Cloud Platform project and the location
+    // in Google Cloud Storage to stage files.
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+      .as(DataflowPipelineOptions.class);
+    options.setRunner(BlockingDataflowPipelineRunner.class);
+    // CHANGE 1/3: Your project ID is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET_YOUR_PROJECT_ID_HERE");
+    // CHANGE 2/3: Your Google Cloud Storage path is required for staging local files.
+    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
+
+    // Create the Pipeline object with the options we defined above.
+    Pipeline p = Pipeline.create(options);
+
+    // Apply the pipeline's transforms.
+
+    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
+    // of input text files. TextIO.Read returns a PCollection where each element is one line from
+    // the input text (a set of Shakespeare's texts).
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
+     // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
+     // The ParDo returns a PCollection<String>, where each element is an individual word in
+     // Shakespeare's collected texts.
+     .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
+                       @Override
+                       public void processElement(ProcessContext c) {
+                         for (String word : c.element().split("[^a-zA-Z']+")) {
+                           if (!word.isEmpty()) {
+                             c.output(word);
+                           }
+                         }
+                       }
+                     }))
+     // Concept #3: Apply the Count transform to our PCollection of individual words. The Count
+     // transform returns a new PCollection of key/value pairs, where each key represents a unique
+     // word in the text. The associated value is the occurrence count for that word.
+     .apply(Count.<String>perElement())
+     // Apply a MapElements transform that formats our PCollection of word counts into a printable
+     // string, suitable for writing to an output file.
+     .apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {
+                       @Override
+                       public String apply(KV<String, Long> input) {
+                         return input.getKey() + ": " + input.getValue();
+                       }
+                     }))
+     // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
+     // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
+     // formatted strings) to a series of text files in Google Cloud Storage.
+     // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to.
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
+
+    // Run the pipeline.
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
new file mode 100644
index 0000000..2adac55
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * An example that counts words in text, and can run over either unbounded or bounded input
+ * collections.
+ *
+ * <p>This class, {@link WindowedWordCount}, is the last in a series of four successively more
+ * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
+ * {@link WordCount}, and {@link DebuggingWordCount}.
+ *
+ * <p>Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
+ * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns; creating a custom aggregator;
+ * user-defined PTransforms; defining PipelineOptions.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Unbounded and bounded pipeline input modes
+ *   2. Adding timestamps to data
+ *   3. PubSub topics as sources
+ *   4. Windowing
+ *   5. Re-using PTransforms over windowed PCollections
+ *   6. Writing to BigQuery
+ * </pre>
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ *
+ * <p>Optionally specify the input file path via:
+ * {@code --inputFile=gs://INPUT_PATH},
+ * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
+ *
+ * <p>Specify an output BigQuery dataset and optionally, a table for the output. If you don't
+ * specify the table, one will be created for you using the job name. If you don't specify the
+ * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
+ * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
+ *
+ * <p>Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
+ * 'unbounded' input (such as a PubSub topic). To run with unbounded input, set
+ * {@code --unbounded=true}. Then, optionally specify the Google Cloud PubSub topic to read from
+ * via {@code --pubsubTopic=projects/PROJECT_ID/topics/YOUR_TOPIC_NAME}. If the topic does not
+ * exist, the pipeline will create one for you. It will delete this topic when it terminates.
+ * The pipeline will automatically launch an auxiliary batch pipeline to populate the given PubSub
+ * topic with the contents of the {@code --inputFile}, in order to make the example easy to run.
+ * If you want to use an independently-populated PubSub topic, indicate this by setting
+ * {@code --inputFile=""}. In that case, the auxiliary pipeline will not be started.
+ *
+ * <p>By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
+ * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
+ * for 10-minute windows.
+ */
+public class WindowedWordCount {
+    private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
+    static final int WINDOW_SIZE = 1;  // Default window duration in minutes
+
+  /**
+   * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
+   * this example, for the bounded data case.
+   *
+   * <p>Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
+   * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
+   * 2-hour period.
+   */
+  static class AddTimestampFn extends DoFn<String, String> {
+    private static final long RAND_RANGE = 7200000; // 2 hours in ms
+
+    @Override
+    public void processElement(ProcessContext c) {
+      // Generate a timestamp that falls somewhere in the past two hours.
+      long randomTimestamp = System.currentTimeMillis()
+        - (int) (Math.random() * RAND_RANGE);
+      /**
+       * Concept #2: Set the data element with that timestamp.
+       */
+      c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a BigQuery table row. */
+  static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("word", c.element().getKey())
+          .set("count", c.element().getValue())
+          // include a field for the window timestamp
+         .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Helper method that defines the BigQuery schema used for the output.
+   */
+  private static TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  /**
+   * Concept #6: We'll stream the results to a BigQuery table. The BigQuery output source is one
+   * that supports both bounded and unbounded data. This is a helper method that creates a
+   * TableReference from input options, to tell the pipeline where to write its BigQuery results.
+   */
+  private static TableReference getTableReference(Options options) {
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+    return tableRef;
+  }
+
+  /**
+   * Options supported by {@link WindowedWordCount}.
+   *
+   * <p>Inherits standard example configuration options, which allow specification of the BigQuery
+   * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
+   * specification of the input file.
+   */
+  public static interface Options extends WordCount.WordCountOptions,
+      DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+    @Description("Fixed window duration, in minutes")
+    @Default.Integer(WINDOW_SIZE)
+    Integer getWindowSize();
+    void setWindowSize(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
+  }
+
+  public static void main(String[] args) throws IOException {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    options.setBigQuerySchema(getSchema());
+    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
+    // Pipeline.
+    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
+      options.isUnbounded());
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    /**
+     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
+     * unbounded input source.
+     */
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      LOG.info("Reading from PubSub.");
+      /**
+       * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
+       * specified as an argument. The data elements' timestamps will come from the pubsub
+       * injection.
+       */
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()));
+    } else {
+      /** Else, this is a bounded pipeline. Read from the GCS file. */
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
+          // See AddTimestampFn for more detail on this.
+          .apply(ParDo.of(new AddTimestampFn()));
+    }
+
+    /**
+     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
+     * minute (you can change this with a command-line option). See the documentation for more
+     * information on how fixed windows work, and for information on the other types of windowing
+     * available (e.g., sliding windows).
+     */
+    PCollection<String> windowedWords = input
+      .apply(Window.<String>into(
+        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
+
+    /**
+     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
+     * windows over a PCollection containing windowed values.
+     */
+    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
+
+    /**
+     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
+     * The BigQuery output source supports both bounded and unbounded data.
+     */
+    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
+        .apply(BigQueryIO.Write
+          .to(getTableReference(options))
+          .withSchema(getSchema())
+          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));
+
+    PipelineResult result = pipeline.run();
+
+    /**
+     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
+     * runs for a limited time, and publishes to the input PubSub topic.
+     *
+     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
+     * are done with it, so that you do not continue to be charged for the instances. You can do
+     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
+     * pipelines. The PubSub topic will also be deleted at this time.
+     */
+    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
new file mode 100644
index 0000000..1086106
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+
+/**
+ * An example that counts words in Shakespeare and includes Dataflow best practices.
+ *
+ * <p>This class, {@link WordCount}, is the second in a series of four successively more detailed
+ * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
+ * After you've looked at this example, then see the {@link DebuggingWordCount}
+ * pipeline, for introduction of additional concepts.
+ *
+ * <p>For a detailed walkthrough of this example, see
+ *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
+ *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
+ *   </a>
+ *
+ * <p>Basic concepts, also in the MinimalWordCount example:
+ * Reading text files; counting a PCollection; writing to GCS.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Executing a Pipeline both locally and using the Dataflow service
+ *   2. Using ParDo with static DoFns defined out-of-line
+ *   3. Building a composite transform
+ *   4. Defining your own pipeline options
+ * </pre>
+ *
+ * <p>Concept #1: you can execute this pipeline either locally or using the Dataflow service.
+ * These are now command-line options and not hard-coded as they were in the MinimalWordCount
+ * example.
+ * To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and a local output file or output prefix on GCS:
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and an output prefix on GCS:
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
+ *
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --inputFile}.
+ */
+public class WordCount {
+
+  /**
+   * Concept #2: You can make your pipeline code less verbose by defining your DoFns statically out-
+   * of-line. This DoFn tokenizes lines of text into individual words; we pass it to a ParDo in the
+   * pipeline.
+   */
+  static class ExtractWordsFn extends DoFn<String, String> {
+    private final Aggregator<Long, Long> emptyLines =
+        createAggregator("emptyLines", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (c.element().trim().isEmpty()) {
+        emptyLines.addValue(1L);
+      }
+
+      // Split the line into words.
+      String[] words = c.element().split("[^a-zA-Z']+");
+
+      // Output each word encountered into the output PCollection.
+      for (String word : words) {
+        if (!word.isEmpty()) {
+          c.output(word);
+        }
+      }
+    }
+  }
+
+  /** A SimpleFunction that converts a Word and Count into a printable string. */
+  public static class FormatAsTextFn extends SimpleFunction<KV<String, Long>, String> {
+    @Override
+    public String apply(KV<String, Long> input) {
+      return input.getKey() + ": " + input.getValue();
+    }
+  }
+
+  /**
+   * A PTransform that converts a PCollection containing lines of text into a PCollection of
+   * formatted word counts.
+   *
+   * <p>Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
+   * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
+   * modular testing, and an improved monitoring experience.
+   */
+  public static class CountWords extends PTransform<PCollection<String>,
+      PCollection<KV<String, Long>>> {
+    @Override
+    public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
+
+      // Convert lines of text into individual words.
+      PCollection<String> words = lines.apply(
+          ParDo.of(new ExtractWordsFn()));
+
+      // Count the number of times each word occurs.
+      PCollection<KV<String, Long>> wordCounts =
+          words.apply(Count.<String>perElement());
+
+      return wordCounts;
+    }
+  }
+
+  /**
+   * Options supported by {@link WordCount}.
+   *
+   * <p>Concept #4: Defining your own configuration options. Here, you can add your own arguments
+   * to be processed by the command-line parser, and specify default values for them. You can then
+   * access the options values in your pipeline code.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  public static interface WordCountOptions extends PipelineOptions {
+    @Description("Path of the file to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInputFile();
+    void setInputFile(String value);
+
+    @Description("Path of the file to write to")
+    @Default.InstanceFactory(OutputFactory.class)
+    String getOutput();
+    void setOutput(String value);
+
+    /**
+     * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.
+     */
+    public static class OutputFactory implements DefaultValueFactory<String> {
+      @Override
+      public String create(PipelineOptions options) {
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        if (dataflowOptions.getStagingLocation() != null) {
+          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
+              .resolve("counts.txt").toString();
+        } else {
+          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
+        }
+      }
+    }
+
+  }
+
+  public static void main(String[] args) {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
+    Pipeline p = Pipeline.create(options);
+
+    // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
+    // static FormatAsTextFn() to the ParDo transform.
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
+     .apply(new CountWords())
+     .apply(MapElements.via(new FormatAsTextFn()))
+     .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
new file mode 100644
index 0000000..606bfb4
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+
+/**
+ * Options that can be used to configure the Dataflow examples.
+ */
+public interface DataflowExampleOptions extends DataflowPipelineOptions {
+  @Description("Whether to keep jobs running on the Dataflow service after local process exit")
+  @Default.Boolean(false)
+  boolean getKeepJobsRunning();
+  void setKeepJobsRunning(boolean keepJobsRunning);
+
+  @Description("Number of workers to use when executing the injector pipeline")
+  @Default.Integer(1)
+  int getInjectorNumWorkers();
+  void setInjectorNumWorkers(int numWorkers);
+}

[10/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
deleted file mode 100644
index 8b2d56f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PackageUtil.java
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.hash.Funnels;
-import com.google.common.hash.Hasher;
-import com.google.common.hash.Hashing;
-import com.google.common.io.CountingOutputStream;
-import com.google.common.io.Files;
-
-import com.fasterxml.jackson.core.Base64Variants;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Objects;
-
-/** Helper routines for packages. */
-public class PackageUtil {
-  private static final Logger LOG = LoggerFactory.getLogger(PackageUtil.class);
-  /**
-   * A reasonable upper bound on the number of jars required to launch a Dataflow job.
-   */
-  public static final int SANE_CLASSPATH_SIZE = 1000;
-  /**
-   * The initial interval to use between package staging attempts.
-   */
-  private static final long INITIAL_BACKOFF_INTERVAL_MS = 5000L;
-  /**
-   * The maximum number of attempts when staging a file.
-   */
-  private static final int MAX_ATTEMPTS = 5;
-
-  /**
-   * Translates exceptions from API calls.
-   */
-  private static final ApiErrorExtractor ERROR_EXTRACTOR = new ApiErrorExtractor();
-
-  /**
-   * Creates a DataflowPackage containing information about how a classpath element should be
-   * staged, including the staging destination as well as its size and hash.
-   *
-   * @param classpathElement The local path for the classpath element.
-   * @param stagingPath The base location for staged classpath elements.
-   * @param overridePackageName If non-null, use the given value as the package name
-   *                            instead of generating one automatically.
-   * @return The package.
-   */
-  @Deprecated
-  public static DataflowPackage createPackage(File classpathElement,
-      String stagingPath, String overridePackageName) {
-    return createPackageAttributes(classpathElement, stagingPath, overridePackageName)
-        .getDataflowPackage();
-  }
-
-  /**
-   * Compute and cache the attributes of a classpath element that we will need to stage it.
-   *
-   * @param classpathElement the file or directory to be staged.
-   * @param stagingPath The base location for staged classpath elements.
-   * @param overridePackageName If non-null, use the given value as the package name
-   *                            instead of generating one automatically.
-   * @return a {@link PackageAttributes} that containing metadata about the object to be staged.
-   */
-  static PackageAttributes createPackageAttributes(File classpathElement,
-      String stagingPath, String overridePackageName) {
-    try {
-      boolean directory = classpathElement.isDirectory();
-
-      // Compute size and hash in one pass over file or directory.
-      Hasher hasher = Hashing.md5().newHasher();
-      OutputStream hashStream = Funnels.asOutputStream(hasher);
-      CountingOutputStream countingOutputStream = new CountingOutputStream(hashStream);
-
-      if (!directory) {
-        // Files are staged as-is.
-        Files.asByteSource(classpathElement).copyTo(countingOutputStream);
-      } else {
-        // Directories are recursively zipped.
-        ZipFiles.zipDirectory(classpathElement, countingOutputStream);
-      }
-
-      long size = countingOutputStream.getCount();
-      String hash = Base64Variants.MODIFIED_FOR_URL.encode(hasher.hash().asBytes());
-
-      // Create the DataflowPackage with staging name and location.
-      String uniqueName = getUniqueContentName(classpathElement, hash);
-      String resourcePath = IOChannelUtils.resolve(stagingPath, uniqueName);
-      DataflowPackage target = new DataflowPackage();
-      target.setName(overridePackageName != null ? overridePackageName : uniqueName);
-      target.setLocation(resourcePath);
-
-      return new PackageAttributes(size, hash, directory, target);
-    } catch (IOException e) {
-      throw new RuntimeException("Package setup failure for " + classpathElement, e);
-    }
-  }
-
-  /**
-   * Transfers the classpath elements to the staging location.
-   *
-   * @param classpathElements The elements to stage.
-   * @param stagingPath The base location to stage the elements to.
-   * @return A list of cloud workflow packages, each representing a classpath element.
-   */
-  public static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, String stagingPath) {
-    return stageClasspathElements(classpathElements, stagingPath, Sleeper.DEFAULT);
-  }
-
-  // Visible for testing.
-  static List<DataflowPackage> stageClasspathElements(
-      Collection<String> classpathElements, String stagingPath,
-      Sleeper retrySleeper) {
-    LOG.info("Uploading {} files from PipelineOptions.filesToStage to staging location to "
-        + "prepare for execution.", classpathElements.size());
-
-    if (classpathElements.size() > SANE_CLASSPATH_SIZE) {
-      LOG.warn("Your classpath contains {} elements, which Google Cloud Dataflow automatically "
-          + "copies to all workers. Having this many entries on your classpath may be indicative "
-          + "of an issue in your pipeline. You may want to consider trimming the classpath to "
-          + "necessary dependencies only, using --filesToStage pipeline option to override "
-          + "what files are being staged, or bundling several dependencies into one.",
-          classpathElements.size());
-    }
-
-    ArrayList<DataflowPackage> packages = new ArrayList<>();
-
-    if (stagingPath == null) {
-      throw new IllegalArgumentException(
-          "Can't stage classpath elements on because no staging location has been provided");
-    }
-
-    int numUploaded = 0;
-    int numCached = 0;
-    for (String classpathElement : classpathElements) {
-      String packageName = null;
-      if (classpathElement.contains("=")) {
-        String[] components = classpathElement.split("=", 2);
-        packageName = components[0];
-        classpathElement = components[1];
-      }
-
-      File file = new File(classpathElement);
-      if (!file.exists()) {
-        LOG.warn("Skipping non-existent classpath element {} that was specified.",
-            classpathElement);
-        continue;
-      }
-
-      PackageAttributes attributes = createPackageAttributes(file, stagingPath, packageName);
-
-      DataflowPackage workflowPackage = attributes.getDataflowPackage();
-      packages.add(workflowPackage);
-      String target = workflowPackage.getLocation();
-
-      // TODO: Should we attempt to detect the Mime type rather than
-      // always using MimeTypes.BINARY?
-      try {
-        try {
-          long remoteLength = IOChannelUtils.getSizeBytes(target);
-          if (remoteLength == attributes.getSize()) {
-            LOG.debug("Skipping classpath element already staged: {} at {}",
-                classpathElement, target);
-            numCached++;
-            continue;
-          }
-        } catch (FileNotFoundException expected) {
-          // If the file doesn't exist, it means we need to upload it.
-        }
-
-        // Upload file, retrying on failure.
-        AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff(
-            MAX_ATTEMPTS,
-            INITIAL_BACKOFF_INTERVAL_MS);
-        while (true) {
-          try {
-            LOG.debug("Uploading classpath element {} to {}", classpathElement, target);
-            try (WritableByteChannel writer = IOChannelUtils.create(target, MimeTypes.BINARY)) {
-              copyContent(classpathElement, writer);
-            }
-            numUploaded++;
-            break;
-          } catch (IOException e) {
-            if (ERROR_EXTRACTOR.accessDenied(e)) {
-              String errorMessage = String.format(
-                  "Uploaded failed due to permissions error, will NOT retry staging "
-                  + "of classpath %s. Please verify credentials are valid and that you have "
-                  + "write access to %s. Stale credentials can be resolved by executing "
-                  + "'gcloud auth login'.", classpathElement, target);
-              LOG.error(errorMessage);
-              throw new IOException(errorMessage, e);
-            } else if (!backoff.atMaxAttempts()) {
-              LOG.warn("Upload attempt failed, sleeping before retrying staging of classpath: {}",
-                  classpathElement, e);
-              BackOffUtils.next(retrySleeper, backoff);
-            } else {
-              // Rethrow last error, to be included as a cause in the catch below.
-              LOG.error("Upload failed, will NOT retry staging of classpath: {}",
-                  classpathElement, e);
-              throw e;
-            }
-          }
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Could not stage classpath element: " + classpathElement, e);
-      }
-    }
-
-    LOG.info("Uploading PipelineOptions.filesToStage complete: {} files newly uploaded, "
-        + "{} files cached",
-        numUploaded, numCached);
-
-    return packages;
-  }
-
-  /**
-   * Returns a unique name for a file with a given content hash.
-   *
-   * <p>Directory paths are removed. Example:
-   * <pre>
-   * dir="a/b/c/d", contentHash="f000" => d-f000.jar
-   * file="a/b/c/d.txt", contentHash="f000" => d-f000.txt
-   * file="a/b/c/d", contentHash="f000" => d-f000
-   * </pre>
-   */
-  static String getUniqueContentName(File classpathElement, String contentHash) {
-    String fileName = Files.getNameWithoutExtension(classpathElement.getAbsolutePath());
-    String fileExtension = Files.getFileExtension(classpathElement.getAbsolutePath());
-    if (classpathElement.isDirectory()) {
-      return fileName + "-" + contentHash + ".jar";
-    } else if (fileExtension.isEmpty()) {
-      return fileName + "-" + contentHash;
-    }
-    return fileName + "-" + contentHash + "." + fileExtension;
-  }
-
-  /**
-   * Copies the contents of the classpathElement to the output channel.
-   *
-   * <p>If the classpathElement is a directory, a Zip stream is constructed on the fly,
-   * otherwise the file contents are copied as-is.
-   *
-   * <p>The output channel is not closed.
-   */
-  private static void copyContent(String classpathElement, WritableByteChannel outputChannel)
-      throws IOException {
-    final File classpathElementFile = new File(classpathElement);
-    if (classpathElementFile.isDirectory()) {
-      ZipFiles.zipDirectory(classpathElementFile, Channels.newOutputStream(outputChannel));
-    } else {
-      Files.asByteSource(classpathElementFile).copyTo(Channels.newOutputStream(outputChannel));
-    }
-  }
-  /**
-   * Holds the metadata necessary to stage a file or confirm that a staged file has not changed.
-   */
-  static class PackageAttributes {
-    private final boolean directory;
-    private final long size;
-    private final String hash;
-    private DataflowPackage dataflowPackage;
-
-    public PackageAttributes(long size, String hash, boolean directory,
-        DataflowPackage dataflowPackage) {
-      this.size = size;
-      this.hash = Objects.requireNonNull(hash, "hash");
-      this.directory = directory;
-      this.dataflowPackage = Objects.requireNonNull(dataflowPackage, "dataflowPackage");
-    }
-
-    /**
-     * @return the dataflowPackage
-     */
-    public DataflowPackage getDataflowPackage() {
-      return dataflowPackage;
-    }
-
-    /**
-     * @return the directory
-     */
-    public boolean isDirectory() {
-      return directory;
-    }
-
-    /**
-     * @return the size
-     */
-    public long getSize() {
-      return size;
-    }
-
-    /**
-     * @return the hash
-     */
-    public String getHash() {
-      return hash;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
deleted file mode 100644
index a7818a3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PaneInfoTracker.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
-import com.google.cloud.dataflow.sdk.util.state.ReadableState;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.ValueState;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Instant;
-
-/**
- * Determine the timing and other properties of a new pane for a given computation, key and window.
- * Incorporates any previous pane, whether the pane has been produced because an
- * on-time {@link AfterWatermark} trigger firing, and the relation between the element's timestamp
- * and the current output watermark.
- */
-public class PaneInfoTracker {
-  private TimerInternals timerInternals;
-
-  public PaneInfoTracker(TimerInternals timerInternals) {
-    this.timerInternals = timerInternals;
-  }
-
-  @VisibleForTesting
-  static final StateTag<Object, ValueState<PaneInfo>> PANE_INFO_TAG =
-      StateTags.makeSystemTagInternal(StateTags.value("pane", PaneInfoCoder.INSTANCE));
-
-  public void clear(StateAccessor<?> state) {
-    state.access(PANE_INFO_TAG).clear();
-  }
-
-  /**
-   * Return a ({@link ReadableState} for) the pane info appropriate for {@code context}. The pane
-   * info includes the timing for the pane, who's calculation is quite subtle.
-   *
-   * @param isFinal should be {@code true} only if the triggering machinery can guarantee
-   * no further firings for the
-   */
-  public ReadableState<PaneInfo> getNextPaneInfo(
-      ReduceFn<?, ?, ?, ?>.Context context, final boolean isFinal) {
-    final Object key = context.key();
-    final ReadableState<PaneInfo> previousPaneFuture =
-        context.state().access(PaneInfoTracker.PANE_INFO_TAG);
-    final Instant windowMaxTimestamp = context.window().maxTimestamp();
-
-    return new ReadableState<PaneInfo>() {
-      @Override
-      public ReadableState<PaneInfo> readLater() {
-        previousPaneFuture.readLater();
-        return this;
-      }
-
-      @Override
-      public PaneInfo read() {
-        PaneInfo previousPane = previousPaneFuture.read();
-        return describePane(key, windowMaxTimestamp, previousPane, isFinal);
-      }
-    };
-  }
-
-  public void storeCurrentPaneInfo(ReduceFn<?, ?, ?, ?>.Context context, PaneInfo currentPane) {
-    context.state().access(PANE_INFO_TAG).write(currentPane);
-  }
-
-  private <W> PaneInfo describePane(
-      Object key, Instant windowMaxTimestamp, PaneInfo previousPane, boolean isFinal) {
-    boolean isFirst = previousPane == null;
-    Timing previousTiming = isFirst ? null : previousPane.getTiming();
-    long index = isFirst ? 0 : previousPane.getIndex() + 1;
-    long nonSpeculativeIndex = isFirst ? 0 : previousPane.getNonSpeculativeIndex() + 1;
-    Instant outputWM = timerInternals.currentOutputWatermarkTime();
-    Instant inputWM = timerInternals.currentInputWatermarkTime();
-
-    // True if it is not possible to assign the element representing this pane a timestamp
-    // which will make an ON_TIME pane for any following computation.
-    // Ie true if the element's latest possible timestamp is before the current output watermark.
-    boolean isLateForOutput = outputWM != null && windowMaxTimestamp.isBefore(outputWM);
-
-    // True if all emitted panes (if any) were EARLY panes.
-    // Once the ON_TIME pane has fired, all following panes must be considered LATE even
-    // if the output watermark is behind the end of the window.
-    boolean onlyEarlyPanesSoFar = previousTiming == null || previousTiming == Timing.EARLY;
-
-    // True is the input watermark hasn't passed the window's max timestamp.
-    boolean isEarlyForInput = inputWM == null || !inputWM.isAfter(windowMaxTimestamp);
-
-    Timing timing;
-    if (isLateForOutput || !onlyEarlyPanesSoFar) {
-      // The output watermark has already passed the end of this window, or we have already
-      // emitted a non-EARLY pane. Irrespective of how this pane was triggered we must
-      // consider this pane LATE.
-      timing = Timing.LATE;
-    } else if (isEarlyForInput) {
-      // This is an EARLY firing.
-      timing = Timing.EARLY;
-      nonSpeculativeIndex = -1;
-    } else {
-      // This is the unique ON_TIME firing for the window.
-      timing = Timing.ON_TIME;
-    }
-
-    WindowTracing.debug(
-        "describePane: {} pane (prev was {}) for key:{}; windowMaxTimestamp:{}; "
-        + "inputWatermark:{}; outputWatermark:{}; isLateForOutput:{}",
-        timing, previousTiming, key, windowMaxTimestamp, inputWM, outputWM, isLateForOutput);
-
-    if (previousPane != null) {
-      // Timing transitions should follow EARLY* ON_TIME? LATE*
-      switch (previousTiming) {
-        case EARLY:
-          Preconditions.checkState(
-              timing == Timing.EARLY || timing == Timing.ON_TIME || timing == Timing.LATE,
-              "EARLY cannot transition to %s", timing);
-          break;
-        case ON_TIME:
-          Preconditions.checkState(
-              timing == Timing.LATE, "ON_TIME cannot transition to %s", timing);
-          break;
-        case LATE:
-          Preconditions.checkState(timing == Timing.LATE, "LATE cannot transtion to %s", timing);
-          break;
-        case UNKNOWN:
-          break;
-      }
-      Preconditions.checkState(!previousPane.isLast(), "Last pane was not last after all.");
-    }
-
-    return PaneInfo.createPane(isFirst, isFinal, timing, index, nonSpeculativeIndex);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
deleted file mode 100644
index 658de2a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PathValidator.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * Interface for controlling validation of paths.
- */
-public interface PathValidator {
-  /**
-   * Validate that a file pattern is conforming.
-   *
-   * @param filepattern The file pattern to verify.
-   * @return The post-validation filepattern.
-   */
-  public String validateInputFilePatternSupported(String filepattern);
-
-  /**
-   * Validate that an output file prefix is conforming.
-   *
-   * @param filePrefix the file prefix to verify.
-   * @return The post-validation filePrefix.
-   */
-  public String validateOutputFilePrefixSupported(String filePrefix);
-
-  /**
-   * Validate that a path is a valid path and that the path
-   * is accessible.
-   *
-   * @param path The path to verify.
-   * @return The post-validation path.
-   */
-  public String verifyPath(String path);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
deleted file mode 100644
index b5f328f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunner.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-
-import java.io.Serializable;
-import java.util.Collection;
-
-/**
- * An interface that runs a {@link PerKeyCombineFn} with unified APIs.
- *
- * <p>Different keyed combine functions have their own implementations.
- * For example, the implementation can skip allocating {@code Combine.Context},
- * if the keyed combine function doesn't use it.
- */
-public interface PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> extends Serializable {
-  /**
-   * Returns the {@link PerKeyCombineFn} it holds.
-   *
-   * <p>It can be a {@code KeyedCombineFn} or a {@code KeyedCombineFnWithContext}.
-   */
-  public PerKeyCombineFn<K, InputT, AccumT, OutputT> fn();
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to create the accumulator in a {@link DoFn}.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
-   * if it is required.
-   */
-  public AccumT createAccumulator(K key, DoFn<?, ?>.ProcessContext c);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to add the input in a {@link DoFn}.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
-   * if it is required.
-   */
-  public AccumT addInput(K key, AccumT accumulator, InputT input, DoFn<?, ?>.ProcessContext c);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to merge accumulators in a {@link DoFn}.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
-   * if it is required.
-   */
-  public AccumT mergeAccumulators(
-      K key, Iterable<AccumT> accumulators, DoFn<?, ?>.ProcessContext c);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to extract the output in a {@link DoFn}.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
-   * if it is required.
-   */
-  public OutputT extractOutput(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to compact the accumulator in a {@link DoFn}.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
-   * if it is required.
-   */
-  public AccumT compact(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to combine the inputs and extract output
-   * in a {@link DoFn}.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
-   * if it is required.
-   */
-  public OutputT apply(K key, Iterable<? extends InputT> inputs, DoFn<?, ?>.ProcessContext c);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to add all inputs in a {@link DoFn}.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from {@code DoFn.ProcessContext}
-   * if it is required.
-   */
-  public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContext c);
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to create the accumulator.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from
-   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
-   */
-  public AccumT createAccumulator(K key, PipelineOptions options,
-      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to add the input.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from
-   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
-   */
-  public AccumT addInput(K key, AccumT accumulator, InputT value, PipelineOptions options,
-      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to merge accumulators.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from
-   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
-   */
-  public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, PipelineOptions options,
-      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to extract the output.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from
-   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
-   */
-  public OutputT extractOutput(K key, AccumT accumulator, PipelineOptions options,
-      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
-
-  /**
-   * Forwards the call to a {@link PerKeyCombineFn} to compact the accumulator.
-   *
-   * <p>It constructs a {@code CombineWithContext.Context} from
-   * {@link PipelineOptions} and {@link SideInputReader} if it is required.
-   */
-  public AccumT compact(K key, AccumT accumulator, PipelineOptions options,
-      SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
deleted file mode 100644
index 6606c54..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PerKeyCombineFnRunners.java
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.common.collect.Iterables;
-
-import java.util.Collection;
-
-/**
- * Static utility methods that provide {@link PerKeyCombineFnRunner} implementations
- * for different keyed combine functions.
- */
-public class PerKeyCombineFnRunners {
-  /**
-   * Returns a {@link PerKeyCombineFnRunner} from a {@link PerKeyCombineFn}.
-   */
-  public static <K, InputT, AccumT, OutputT> PerKeyCombineFnRunner<K, InputT, AccumT, OutputT>
-  create(PerKeyCombineFn<K, InputT, AccumT, OutputT> perKeyCombineFn) {
-    if (perKeyCombineFn instanceof RequiresContextInternal) {
-      return new KeyedCombineFnWithContextRunner<>(
-          (KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>) perKeyCombineFn);
-    } else {
-      return new KeyedCombineFnRunner<>(
-          (KeyedCombineFn<K, InputT, AccumT, OutputT>) perKeyCombineFn);
-    }
-  }
-
-  /**
-   * An implementation of {@link PerKeyCombineFnRunner} with {@link KeyedCombineFn}.
-   *
-   * It forwards functions calls to the {@link KeyedCombineFn}.
-   */
-  private static class KeyedCombineFnRunner<K, InputT, AccumT, OutputT>
-      implements PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> {
-    private final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn;
-
-    private KeyedCombineFnRunner(
-        KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn) {
-      this.keyedCombineFn = keyedCombineFn;
-    }
-
-    @Override
-    public KeyedCombineFn<K, InputT, AccumT, OutputT> fn() {
-      return keyedCombineFn;
-    }
-
-    @Override
-    public AccumT createAccumulator(K key, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFn.createAccumulator(key);
-    }
-
-    @Override
-    public AccumT addInput(
-        K key, AccumT accumulator, InputT input, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFn.addInput(key, accumulator, input);
-    }
-
-    @Override
-    public AccumT mergeAccumulators(
-        K key, Iterable<AccumT> accumulators, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFn.mergeAccumulators(key, accumulators);
-    }
-
-    @Override
-    public OutputT extractOutput(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFn.extractOutput(key, accumulator);
-    }
-
-    @Override
-    public AccumT compact(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFn.compact(key, accumulator);
-    }
-
-    @Override
-    public OutputT apply(K key, Iterable<? extends InputT> inputs, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFn.apply(key, inputs);
-    }
-
-    @Override
-    public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContext c) {
-      AccumT accum = keyedCombineFn.createAccumulator(key);
-      for (InputT input : inputs) {
-        accum = keyedCombineFn.addInput(key, accum, input);
-      }
-      return accum;
-    }
-
-    @Override
-    public String toString() {
-      return keyedCombineFn.toString();
-    }
-
-    @Override
-    public AccumT createAccumulator(K key, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFn.createAccumulator(key);
-    }
-
-    @Override
-    public AccumT addInput(K key, AccumT accumulator, InputT input, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFn.addInput(key, accumulator, input);
-    }
-
-    @Override
-    public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFn.mergeAccumulators(key, accumulators);
-    }
-
-    @Override
-    public OutputT extractOutput(K key, AccumT accumulator, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFn.extractOutput(key, accumulator);
-    }
-
-    @Override
-    public AccumT compact(K key, AccumT accumulator, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFn.compact(key, accumulator);
-    }
-  }
-
-  /**
-   * An implementation of {@link PerKeyCombineFnRunner} with {@link KeyedCombineFnWithContext}.
-   *
-   * It forwards functions calls to the {@link KeyedCombineFnWithContext}.
-   */
-  private static class KeyedCombineFnWithContextRunner<K, InputT, AccumT, OutputT>
-      implements PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> {
-    private final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> keyedCombineFnWithContext;
-
-    private KeyedCombineFnWithContextRunner(
-        KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> keyedCombineFnWithContext) {
-      this.keyedCombineFnWithContext = keyedCombineFnWithContext;
-    }
-
-    @Override
-    public KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> fn() {
-      return keyedCombineFnWithContext;
-    }
-
-    @Override
-    public AccumT createAccumulator(K key, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFnWithContext.createAccumulator(key,
-          CombineContextFactory.createFromProcessContext(c));
-    }
-
-    @Override
-    public AccumT addInput(
-        K key, AccumT accumulator, InputT value, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFnWithContext.addInput(key, accumulator, value,
-          CombineContextFactory.createFromProcessContext(c));
-    }
-
-    @Override
-    public AccumT mergeAccumulators(
-        K key, Iterable<AccumT> accumulators, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFnWithContext.mergeAccumulators(
-          key, accumulators, CombineContextFactory.createFromProcessContext(c));
-    }
-
-    @Override
-    public OutputT extractOutput(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFnWithContext.extractOutput(key, accumulator,
-          CombineContextFactory.createFromProcessContext(c));
-    }
-
-    @Override
-    public AccumT compact(K key, AccumT accumulator, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFnWithContext.compact(key, accumulator,
-          CombineContextFactory.createFromProcessContext(c));
-    }
-
-    @Override
-    public OutputT apply(K key, Iterable<? extends InputT> inputs, DoFn<?, ?>.ProcessContext c) {
-      return keyedCombineFnWithContext.apply(key, inputs,
-          CombineContextFactory.createFromProcessContext(c));
-    }
-
-    @Override
-    public AccumT addInputs(K key, Iterable<InputT> inputs, DoFn<?, ?>.ProcessContext c) {
-      CombineWithContext.Context combineContext = CombineContextFactory.createFromProcessContext(c);
-      AccumT accum = keyedCombineFnWithContext.createAccumulator(key, combineContext);
-      for (InputT input : inputs) {
-        accum = keyedCombineFnWithContext.addInput(key, accum, input, combineContext);
-      }
-      return accum;
-    }
-
-    @Override
-    public String toString() {
-      return keyedCombineFnWithContext.toString();
-    }
-
-    @Override
-    public AccumT createAccumulator(K key, PipelineOptions options, SideInputReader sideInputReader,
-        Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFnWithContext.createAccumulator(key,
-        CombineContextFactory.createFromComponents(
-          options, sideInputReader, Iterables.getOnlyElement(windows)));
-    }
-
-    @Override
-    public AccumT addInput(K key, AccumT accumulator, InputT input, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFnWithContext.addInput(key, accumulator, input,
-        CombineContextFactory.createFromComponents(
-          options, sideInputReader, Iterables.getOnlyElement(windows)));
-    }
-
-    @Override
-    public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFnWithContext.mergeAccumulators(key, accumulators,
-        CombineContextFactory.createFromComponents(
-          options, sideInputReader, Iterables.getOnlyElement(windows)));
-    }
-
-    @Override
-    public OutputT extractOutput(K key, AccumT accumulator, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFnWithContext.extractOutput(key, accumulator,
-        CombineContextFactory.createFromComponents(
-          options, sideInputReader, Iterables.getOnlyElement(windows)));
-    }
-
-    @Override
-    public AccumT compact(K key, AccumT accumulator, PipelineOptions options,
-        SideInputReader sideInputReader, Collection<? extends BoundedWindow> windows) {
-      return keyedCombineFnWithContext.compact(key, accumulator,
-        CombineContextFactory.createFromComponents(
-          options, sideInputReader, Iterables.getOnlyElement(windows)));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
deleted file mode 100644
index 81572ea..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/PropertyNames.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * Constant property names used by the SDK in CloudWorkflow specifications.
- */
-public class PropertyNames {
-  public static final String ALLOWED_ENCODINGS = "allowed_encodings";
-  public static final String APPEND_TRAILING_NEWLINES = "append_trailing_newlines";
-  public static final String BIGQUERY_CREATE_DISPOSITION = "create_disposition";
-  public static final String BIGQUERY_DATASET = "dataset";
-  public static final String BIGQUERY_PROJECT = "project";
-  public static final String BIGQUERY_SCHEMA = "schema";
-  public static final String BIGQUERY_TABLE = "table";
-  public static final String BIGQUERY_QUERY = "bigquery_query";
-  public static final String BIGQUERY_FLATTEN_RESULTS = "bigquery_flatten_results";
-  public static final String BIGQUERY_WRITE_DISPOSITION = "write_disposition";
-  public static final String BIGQUERY_EXPORT_FORMAT = "bigquery_export_format";
-  public static final String BIGQUERY_EXPORT_SCHEMA = "bigquery_export_schema";
-  public static final String CO_GBK_RESULT_SCHEMA = "co_gbk_result_schema";
-  public static final String COMBINE_FN = "combine_fn";
-  public static final String COMPONENT_ENCODINGS = "component_encodings";
-  public static final String COMPRESSION_TYPE = "compression_type";
-  public static final String CUSTOM_SOURCE_FORMAT = "custom_source";
-  public static final String CONCAT_SOURCE_SOURCES = "sources";
-  public static final String CONCAT_SOURCE_BASE_SPECS = "base_specs";
-  public static final String SOURCE_STEP_INPUT = "custom_source_step_input";
-  public static final String SOURCE_SPEC = "spec";
-  public static final String SOURCE_METADATA = "metadata";
-  public static final String SOURCE_DOES_NOT_NEED_SPLITTING = "does_not_need_splitting";
-  public static final String SOURCE_PRODUCES_SORTED_KEYS = "produces_sorted_keys";
-  public static final String SOURCE_IS_INFINITE = "is_infinite";
-  public static final String SOURCE_ESTIMATED_SIZE_BYTES = "estimated_size_bytes";
-  public static final String ELEMENT = "element";
-  public static final String ELEMENTS = "elements";
-  public static final String ENCODING = "encoding";
-  public static final String ENCODING_ID = "encoding_id";
-  public static final String END_INDEX = "end_index";
-  public static final String END_OFFSET = "end_offset";
-  public static final String END_SHUFFLE_POSITION = "end_shuffle_position";
-  public static final String ENVIRONMENT_VERSION_JOB_TYPE_KEY = "job_type";
-  public static final String ENVIRONMENT_VERSION_MAJOR_KEY = "major";
-  public static final String FILENAME = "filename";
-  public static final String FILENAME_PREFIX = "filename_prefix";
-  public static final String FILENAME_SUFFIX = "filename_suffix";
-  public static final String FILEPATTERN = "filepattern";
-  public static final String FOOTER = "footer";
-  public static final String FORMAT = "format";
-  public static final String HEADER = "header";
-  public static final String INPUTS = "inputs";
-  public static final String INPUT_CODER = "input_coder";
-  public static final String IS_GENERATED = "is_generated";
-  public static final String IS_MERGING_WINDOW_FN = "is_merging_window_fn";
-  public static final String IS_PAIR_LIKE = "is_pair_like";
-  public static final String IS_STREAM_LIKE = "is_stream_like";
-  public static final String IS_WRAPPER = "is_wrapper";
-  public static final String DISALLOW_COMBINER_LIFTING = "disallow_combiner_lifting";
-  public static final String NON_PARALLEL_INPUTS = "non_parallel_inputs";
-  public static final String NUM_SHARD_CODERS = "num_shard_coders";
-  public static final String NUM_METADATA_SHARD_CODERS = "num_metadata_shard_coders";
-  public static final String NUM_SHARDS = "num_shards";
-  public static final String OBJECT_TYPE_NAME = "@type";
-  public static final String OUTPUT = "output";
-  public static final String OUTPUT_INFO = "output_info";
-  public static final String OUTPUT_NAME = "output_name";
-  public static final String PARALLEL_INPUT = "parallel_input";
-  public static final String PHASE = "phase";
-  public static final String PUBSUB_ID_LABEL = "pubsub_id_label";
-  public static final String PUBSUB_SUBSCRIPTION = "pubsub_subscription";
-  public static final String PUBSUB_TIMESTAMP_LABEL = "pubsub_timestamp_label";
-  public static final String PUBSUB_TOPIC = "pubsub_topic";
-  public static final String SCALAR_FIELD_NAME = "value";
-  public static final String SERIALIZED_FN = "serialized_fn";
-  public static final String SHARD_NAME_TEMPLATE = "shard_template";
-  public static final String SHUFFLE_KIND = "shuffle_kind";
-  public static final String SHUFFLE_READER_CONFIG = "shuffle_reader_config";
-  public static final String SHUFFLE_WRITER_CONFIG = "shuffle_writer_config";
-  public static final String SORT_VALUES = "sort_values";
-  public static final String START_INDEX = "start_index";
-  public static final String START_OFFSET = "start_offset";
-  public static final String START_SHUFFLE_POSITION = "start_shuffle_position";
-  public static final String STRIP_TRAILING_NEWLINES = "strip_trailing_newlines";
-  public static final String TUPLE_TAGS = "tuple_tags";
-  public static final String USE_INDEXED_FORMAT = "use_indexed_format";
-  public static final String USER_FN = "user_fn";
-  public static final String USER_NAME = "user_name";
-  public static final String USES_KEYED_STATE = "uses_keyed_state";
-  public static final String VALIDATE_SINK = "validate_sink";
-  public static final String VALIDATE_SOURCE = "validate_source";
-  public static final String VALUE = "value";
-  public static final String DISPLAY_DATA = "display_data";
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
deleted file mode 100644
index 6c96c8e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RandomAccessData.java
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.common.base.MoreObjects;
-import com.google.common.io.ByteStreams;
-import com.google.common.primitives.UnsignedBytes;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.Comparator;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * An elastic-sized byte array which allows you to manipulate it as a stream, or access
- * it directly. This allows for a quick succession of moving bytes from an {@link InputStream}
- * to this wrapper to be used as an {@link OutputStream} and vice versa. This wrapper
- * also provides random access to bytes stored within. This wrapper allows users to finely
- * control the number of byte copies that occur.
- *
- * Anything stored within the in-memory buffer from offset {@link #size()} is considered temporary
- * unused storage.
- */
-@NotThreadSafe
-public class RandomAccessData {
-  /**
-   * A {@link Coder} which encodes the valid parts of this stream.
-   * This follows the same encoding scheme as {@link ByteArrayCoder}.
-   * This coder is deterministic and consistent with equals.
-   *
-   * This coder does not support encoding positive infinity.
-   */
-  public static class RandomAccessDataCoder extends AtomicCoder<RandomAccessData> {
-    private static final RandomAccessDataCoder INSTANCE = new RandomAccessDataCoder();
-
-    @JsonCreator
-    public static RandomAccessDataCoder of() {
-      return INSTANCE;
-    }
-
-    @Override
-    public void encode(RandomAccessData value, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      if (value == POSITIVE_INFINITY) {
-        throw new CoderException("Positive infinity can not be encoded.");
-      }
-      if (!context.isWholeStream) {
-        VarInt.encode(value.size, outStream);
-      }
-      value.writeTo(outStream, 0, value.size);
-    }
-
-    @Override
-    public RandomAccessData decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      RandomAccessData rval = new RandomAccessData();
-      if (!context.isWholeStream) {
-        int length = VarInt.decodeInt(inStream);
-        rval.readFrom(inStream, 0, length);
-      } else {
-        ByteStreams.copy(inStream, rval.asOutputStream());
-      }
-      return rval;
-    }
-
-    @Override
-    public boolean consistentWithEquals() {
-      return true;
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(
-        RandomAccessData value, Coder.Context context) {
-      return true;
-    }
-
-    @Override
-    protected long getEncodedElementByteSize(RandomAccessData value, Coder.Context context)
-        throws Exception {
-      if (value == null) {
-        throw new CoderException("cannot encode a null in memory stream");
-      }
-      long size = 0;
-      if (!context.isWholeStream) {
-        size += VarInt.getLength(value.size);
-      }
-      return size + value.size;
-    }
-  }
-
-  public static final UnsignedLexicographicalComparator UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
-      new UnsignedLexicographicalComparator();
-
-  /**
-   * A {@link Comparator} that compares two byte arrays lexicographically. It compares
-   * values as a list of unsigned bytes. The first pair of values that follow any common prefix,
-   * or when one array is a prefix of the other, treats the shorter array as the lesser.
-   * For example, [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02] < POSITIVE INFINITY.
-   *
-   * <p>Note that a token type of positive infinity is supported and is greater than
-   * all other {@link RandomAccessData}.
-   */
-  public static final class UnsignedLexicographicalComparator
-      implements Comparator<RandomAccessData> {
-    // Do not instantiate
-    private UnsignedLexicographicalComparator() {
-    }
-
-    @Override
-    public int compare(RandomAccessData o1, RandomAccessData o2) {
-      return compare(o1, o2, 0 /* start from the beginning */);
-    }
-
-    /**
-     * Compare the two sets of bytes starting at the given offset.
-     */
-    public int compare(RandomAccessData o1, RandomAccessData o2, int startOffset) {
-      if (o1 == o2) {
-        return 0;
-      }
-      if (o1 == POSITIVE_INFINITY) {
-        return 1;
-      }
-      if (o2 == POSITIVE_INFINITY) {
-        return -1;
-      }
-
-      int minBytesLen = Math.min(o1.size, o2.size);
-      for (int i = startOffset; i < minBytesLen; i++) {
-        // unsigned comparison
-        int b1 = o1.buffer[i] & 0xFF;
-        int b2 = o2.buffer[i] & 0xFF;
-        if (b1 == b2) {
-          continue;
-        }
-        // Return the stream with the smaller byte as the smaller value.
-        return b1 - b2;
-      }
-      // If one is a prefix of the other, return the shorter one as the smaller one.
-      // If both lengths are equal, then both streams are equal.
-      return o1.size - o2.size;
-    }
-
-    /**
-     * Compute the length of the common prefix of the two provided sets of bytes.
-     */
-    public int commonPrefixLength(RandomAccessData o1, RandomAccessData o2) {
-      int minBytesLen = Math.min(o1.size, o2.size);
-      for (int i = 0; i < minBytesLen; i++) {
-        // unsigned comparison
-        int b1 = o1.buffer[i] & 0xFF;
-        int b2 = o2.buffer[i] & 0xFF;
-        if (b1 != b2) {
-          return i;
-        }
-      }
-      return minBytesLen;
-    }
-  }
-
-  /** A token type representing positive infinity. */
-  static final RandomAccessData POSITIVE_INFINITY = new RandomAccessData(0);
-
-  /**
-   * Returns a RandomAccessData that is the smallest value of same length which
-   * is strictly greater than this. Note that if this is empty or is all 0xFF then
-   * a token value of positive infinity is returned.
-   *
-   * The {@link UnsignedLexicographicalComparator} supports comparing {@link RandomAccessData}
-   * with support for positive infinitiy.
-   */
-  public RandomAccessData increment() throws IOException {
-    RandomAccessData copy = copy();
-    for (int i = copy.size - 1; i >= 0; --i) {
-      if (copy.buffer[i] != UnsignedBytes.MAX_VALUE) {
-        copy.buffer[i] = UnsignedBytes.checkedCast(UnsignedBytes.toInt(copy.buffer[i]) + 1);
-        return copy;
-      }
-    }
-    return POSITIVE_INFINITY;
-  }
-
-  private static final int DEFAULT_INITIAL_BUFFER_SIZE = 128;
-
-  /** Constructs a RandomAccessData with a default buffer size. */
-  public RandomAccessData() {
-    this(DEFAULT_INITIAL_BUFFER_SIZE);
-  }
-
-  /** Constructs a RandomAccessData with the initial buffer. */
-  public RandomAccessData(byte[] initialBuffer) {
-    checkNotNull(initialBuffer);
-    this.buffer = initialBuffer;
-    this.size = initialBuffer.length;
-  }
-
-  /** Constructs a RandomAccessData with the given buffer size. */
-  public RandomAccessData(int initialBufferSize) {
-    checkArgument(initialBufferSize >= 0, "Expected initial buffer size to be greater than zero.");
-    this.buffer = new byte[initialBufferSize];
-  }
-
-  private byte[] buffer;
-  private int size;
-
-  /** Returns the backing array. */
-  public byte[] array() {
-    return buffer;
-  }
-
-  /** Returns the number of bytes in the backing array that are valid. */
-  public int size() {
-    return size;
-  }
-
-  /** Resets the end of the stream to the specified position. */
-  public void resetTo(int position) {
-    ensureCapacity(position);
-    size = position;
-  }
-
-  private final OutputStream outputStream = new OutputStream() {
-    @Override
-    public void write(int b) throws IOException {
-      ensureCapacity(size + 1);
-      buffer[size] = (byte) b;
-      size += 1;
-    }
-
-    @Override
-    public void write(byte[] b, int offset, int length) throws IOException {
-      ensureCapacity(size + length);
-      System.arraycopy(b, offset, buffer, size, length);
-      size += length;
-    }
-  };
-
-  /**
-   * Returns an output stream which writes to the backing buffer from the current position.
-   * Note that the internal buffer will grow as required to accomodate all data written.
-   */
-  public OutputStream asOutputStream() {
-    return outputStream;
-  }
-
-  /**
-   * Returns an {@link InputStream} wrapper which supplies the portion of this backing byte buffer
-   * starting at {@code offset} and up to {@code length} bytes. Note that the returned
-   * {@link InputStream} is only a wrapper and any modifications to the underlying
-   * {@link RandomAccessData} will be visible by the {@link InputStream}.
-   */
-  public InputStream asInputStream(final int offset, final int length) {
-    return new ByteArrayInputStream(buffer, offset, length);
-  }
-
-  /**
-   * Writes {@code length} bytes starting at {@code offset} from the backing data store to the
-   * specified output stream.
-   */
-  public void writeTo(OutputStream out, int offset, int length) throws IOException {
-    out.write(buffer, offset, length);
-  }
-
-  /**
-   * Reads {@code length} bytes from the specified input stream writing them into the backing
-   * data store starting at {@code offset}.
-   *
-   * <p>Note that the in memory stream will be grown to ensure there is enough capacity.
-   */
-  public void readFrom(InputStream inStream, int offset, int length) throws IOException {
-    ensureCapacity(offset + length);
-    ByteStreams.readFully(inStream, buffer, offset, length);
-    size = offset + length;
-  }
-
-  /** Returns a copy of this RandomAccessData. */
-  public RandomAccessData copy() throws IOException {
-    RandomAccessData copy = new RandomAccessData(size);
-    writeTo(copy.asOutputStream(), 0, size);
-    return copy;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other == this) {
-      return true;
-    }
-    if (!(other instanceof RandomAccessData)) {
-      return false;
-    }
-    return UNSIGNED_LEXICOGRAPHICAL_COMPARATOR.compare(this, (RandomAccessData) other) == 0;
-  }
-
-  @Override
-  public int hashCode() {
-    int result = 1;
-    for (int i = 0; i < size; ++i) {
-        result = 31 * result + buffer[i];
-    }
-
-    return result;
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(this)
-        .add("buffer", Arrays.copyOf(buffer, size))
-        .add("size", size)
-        .toString();
-  }
-
-  private void ensureCapacity(int minCapacity) {
-    // If we have enough space, don't grow the buffer.
-    if (minCapacity <= buffer.length) {
-        return;
-    }
-
-    // Try to double the size of the buffer, if thats not enough, just use the new capacity.
-    // Note that we use Math.min(long, long) to not cause overflow on the multiplication.
-    int newCapacity = (int) Math.min(Integer.MAX_VALUE, buffer.length * 2L);
-    if (newCapacity < minCapacity) {
-        newCapacity = minCapacity;
-    }
-    buffer = Arrays.copyOf(buffer, newCapacity);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
deleted file mode 100644
index c5ef2ea..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFn.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.ReadableState;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-
-import org.joda.time.Instant;
-
-import java.io.Serializable;
-
-/**
- * Specification for processing to happen after elements have been grouped by key.
- *
- * @param <K> The type of key being processed.
- * @param <InputT> The type of input values associated with the key.
- * @param <OutputT> The output type that will be produced for each key.
- * @param <W> The type of windows this operates on.
- */
-public abstract class ReduceFn<K, InputT, OutputT, W extends BoundedWindow>
-    implements Serializable {
-
-  /** Information accessible to all the processing methods in this {@code ReduceFn}. */
-  public abstract class Context {
-    /** Return the key that is being processed. */
-    public abstract K key();
-
-    /** The window that is being processed. */
-    public abstract W window();
-
-    /** Access the current {@link WindowingStrategy}. */
-    public abstract WindowingStrategy<?, W> windowingStrategy();
-
-    /** Return the interface for accessing state. */
-    public abstract StateAccessor<K> state();
-
-    /** Return the interface for accessing timers. */
-    public abstract Timers timers();
-  }
-
-  /** Information accessible within {@link #processValue}. */
-  public abstract class ProcessValueContext extends Context {
-    /** Return the actual value being processed. */
-    public abstract InputT value();
-
-    /** Return the timestamp associated with the value. */
-    public abstract Instant timestamp();
-  }
-
-  /** Information accessible within {@link #onMerge}. */
-  public abstract class OnMergeContext extends Context {
-    /** Return the interface for accessing state. */
-    @Override
-    public abstract MergingStateAccessor<K, W> state();
-  }
-
-  /** Information accessible within {@link #onTrigger}. */
-  public abstract class OnTriggerContext extends Context {
-    /** Returns the {@link PaneInfo} for the trigger firing being processed. */
-    public abstract PaneInfo paneInfo();
-
-    /** Output the given value in the current window. */
-    public abstract void output(OutputT value);
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Called for each value of type {@code InputT} associated with the current key.
-   */
-  public abstract void processValue(ProcessValueContext c) throws Exception;
-
-  /**
-   * Called when windows are merged.
-   */
-  public abstract void onMerge(OnMergeContext context) throws Exception;
-
-  /**
-   * Called when triggers fire.
-   *
-   * <p>Implementations of {@link ReduceFn} should call {@link OnTriggerContext#output} to emit
-   * any results that should be included in the pane produced by this trigger firing.
-   */
-  public abstract void onTrigger(OnTriggerContext context) throws Exception;
-
-  /**
-   * Called before {@link #onMerge} is invoked to provide an opportunity to prefetch any needed
-   * state.
-   *
-   * @param c Context to use prefetch from.
-   */
-  public void prefetchOnMerge(MergingStateAccessor<K, W> c) throws Exception {}
-
-  /**
-   * Called before {@link #onTrigger} is invoked to provide an opportunity to prefetch any needed
-   * state.
-   *
-   * @param context Context to use prefetch from.
-   */
-  public void prefetchOnTrigger(StateAccessor<K> context) {}
-
-  /**
-   * Called to clear any persisted state that the {@link ReduceFn} may be holding. This will be
-   * called when the windowing is closing and will receive no future interactions.
-   */
-  public abstract void clearState(Context context) throws Exception;
-
-  /**
-   * Returns true if the there is no buffered state.
-   */
-  public abstract ReadableState<Boolean> isEmpty(StateAccessor<K> context);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
deleted file mode 100644
index bdbaf10..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnContextFactory.java
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.ReadableState;
-import com.google.cloud.dataflow.sdk.util.state.State;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
-import com.google.cloud.dataflow.sdk.util.state.StateContexts;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableMap;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * Factory for creating instances of the various {@link ReduceFn} contexts.
- */
-class ReduceFnContextFactory<K, InputT, OutputT, W extends BoundedWindow> {
-  public interface OnTriggerCallbacks<OutputT> {
-    void output(OutputT toOutput);
-  }
-
-  private final K key;
-  private final ReduceFn<K, InputT, OutputT, W> reduceFn;
-  private final WindowingStrategy<?, W> windowingStrategy;
-  private final StateInternals<K> stateInternals;
-  private final ActiveWindowSet<W> activeWindows;
-  private final TimerInternals timerInternals;
-  private final WindowingInternals<?, ?> windowingInternals;
-  private final PipelineOptions options;
-
-  ReduceFnContextFactory(K key, ReduceFn<K, InputT, OutputT, W> reduceFn,
-      WindowingStrategy<?, W> windowingStrategy, StateInternals<K> stateInternals,
-      ActiveWindowSet<W> activeWindows, TimerInternals timerInternals,
-      WindowingInternals<?, ?> windowingInternals, PipelineOptions options) {
-    this.key = key;
-    this.reduceFn = reduceFn;
-    this.windowingStrategy = windowingStrategy;
-    this.stateInternals = stateInternals;
-    this.activeWindows = activeWindows;
-    this.timerInternals = timerInternals;
-    this.windowingInternals = windowingInternals;
-    this.options = options;
-  }
-
-  /** Where should we look for state associated with a given window? */
-  public static enum StateStyle {
-    /** All state is associated with the window itself. */
-    DIRECT,
-    /** State is associated with the 'state address' windows tracked by the active window set. */
-    RENAMED
-  }
-
-  private StateAccessorImpl<K, W> stateAccessor(W window, StateStyle style) {
-    return new StateAccessorImpl<K, W>(
-        activeWindows, windowingStrategy.getWindowFn().windowCoder(),
-        stateInternals, StateContexts.createFromComponents(options, windowingInternals, window),
-        style);
-  }
-
-  public ReduceFn<K, InputT, OutputT, W>.Context base(W window, StateStyle style) {
-    return new ContextImpl(stateAccessor(window, style));
-  }
-
-  public ReduceFn<K, InputT, OutputT, W>.ProcessValueContext forValue(
-      W window, InputT value, Instant timestamp, StateStyle style) {
-    return new ProcessValueContextImpl(stateAccessor(window, style), value, timestamp);
-  }
-
-  public ReduceFn<K, InputT, OutputT, W>.OnTriggerContext forTrigger(W window,
-      ReadableState<PaneInfo> pane, StateStyle style, OnTriggerCallbacks<OutputT> callbacks) {
-    return new OnTriggerContextImpl(stateAccessor(window, style), pane, callbacks);
-  }
-
-  public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forMerge(
-      Collection<W> activeToBeMerged, W mergeResult, StateStyle style) {
-    return new OnMergeContextImpl(
-        new MergingStateAccessorImpl<K, W>(activeWindows,
-            windowingStrategy.getWindowFn().windowCoder(),
-            stateInternals, style, activeToBeMerged, mergeResult));
-  }
-
-  public ReduceFn<K, InputT, OutputT, W>.OnMergeContext forPremerge(W window) {
-    return new OnPremergeContextImpl(new PremergingStateAccessorImpl<K, W>(
-        activeWindows, windowingStrategy.getWindowFn().windowCoder(), stateInternals, window));
-  }
-
-  private class TimersImpl implements Timers {
-    private final StateNamespace namespace;
-
-    public TimersImpl(StateNamespace namespace) {
-      Preconditions.checkArgument(namespace instanceof WindowNamespace);
-      this.namespace = namespace;
-    }
-
-    @Override
-    public void setTimer(Instant timestamp, TimeDomain timeDomain) {
-      timerInternals.setTimer(TimerData.of(namespace, timestamp, timeDomain));
-    }
-
-    @Override
-    public void deleteTimer(Instant timestamp, TimeDomain timeDomain) {
-      timerInternals.deleteTimer(TimerData.of(namespace, timestamp, timeDomain));
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return timerInternals.currentProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentSynchronizedProcessingTime() {
-      return timerInternals.currentSynchronizedProcessingTime();
-    }
-
-    @Override
-    @Nullable
-    public Instant currentEventTime() {
-      return timerInternals.currentInputWatermarkTime();
-    }
-  }
-
-  // ======================================================================
-  // StateAccessors
-  // ======================================================================
-  static class StateAccessorImpl<K, W extends BoundedWindow> implements StateAccessor<K> {
-
-
-    protected final ActiveWindowSet<W> activeWindows;
-    protected final StateContext<W> context;
-    protected final StateNamespace windowNamespace;
-    protected final Coder<W> windowCoder;
-    protected final StateInternals<K> stateInternals;
-    protected final StateStyle style;
-
-    public StateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
-        StateInternals<K> stateInternals, StateContext<W> context, StateStyle style) {
-
-      this.activeWindows = activeWindows;
-      this.windowCoder = windowCoder;
-      this.stateInternals = stateInternals;
-      this.context = checkNotNull(context);
-      this.windowNamespace = namespaceFor(context.window());
-      this.style = style;
-    }
-
-    protected StateNamespace namespaceFor(W window) {
-      return StateNamespaces.window(windowCoder, window);
-    }
-
-    protected StateNamespace windowNamespace() {
-      return windowNamespace;
-    }
-
-    W window() {
-      return context.window();
-    }
-
-    StateNamespace namespace() {
-      return windowNamespace();
-    }
-
-    @Override
-    public <StateT extends State> StateT access(StateTag<? super K, StateT> address) {
-      switch (style) {
-        case DIRECT:
-          return stateInternals.state(windowNamespace(), address, context);
-        case RENAMED:
-          return stateInternals.state(
-              namespaceFor(activeWindows.writeStateAddress(context.window())), address, context);
-      }
-      throw new RuntimeException(); // cases are exhaustive.
-    }
-  }
-
-  static class MergingStateAccessorImpl<K, W extends BoundedWindow>
-      extends StateAccessorImpl<K, W> implements MergingStateAccessor<K, W> {
-    private final Collection<W> activeToBeMerged;
-
-    public MergingStateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
-        StateInternals<K> stateInternals, StateStyle style, Collection<W> activeToBeMerged,
-        W mergeResult) {
-      super(activeWindows, windowCoder, stateInternals,
-          StateContexts.windowOnly(mergeResult), style);
-      this.activeToBeMerged = activeToBeMerged;
-    }
-
-    @Override
-    public <StateT extends State> StateT access(StateTag<? super K, StateT> address) {
-      switch (style) {
-        case DIRECT:
-          return stateInternals.state(windowNamespace(), address, context);
-        case RENAMED:
-          return stateInternals.state(
-              namespaceFor(activeWindows.mergedWriteStateAddress(
-                  activeToBeMerged, context.window())),
-              address,
-              context);
-      }
-      throw new RuntimeException(); // cases are exhaustive.
-    }
-
-    @Override
-    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-        StateTag<? super K, StateT> address) {
-      ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
-      for (W mergingWindow : activeToBeMerged) {
-        StateNamespace namespace = null;
-        switch (style) {
-          case DIRECT:
-            namespace = namespaceFor(mergingWindow);
-            break;
-          case RENAMED:
-            namespace = namespaceFor(activeWindows.writeStateAddress(mergingWindow));
-            break;
-        }
-        Preconditions.checkNotNull(namespace); // cases are exhaustive.
-        builder.put(mergingWindow, stateInternals.state(namespace, address, context));
-      }
-      return builder.build();
-    }
-  }
-
-  static class PremergingStateAccessorImpl<K, W extends BoundedWindow>
-      extends StateAccessorImpl<K, W> implements MergingStateAccessor<K, W> {
-    public PremergingStateAccessorImpl(ActiveWindowSet<W> activeWindows, Coder<W> windowCoder,
-        StateInternals<K> stateInternals, W window) {
-      super(activeWindows, windowCoder, stateInternals,
-          StateContexts.windowOnly(window), StateStyle.RENAMED);
-    }
-
-    Collection<W> mergingWindows() {
-      return activeWindows.readStateAddresses(context.window());
-    }
-
-    @Override
-    public <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-        StateTag<? super K, StateT> address) {
-      ImmutableMap.Builder<W, StateT> builder = ImmutableMap.builder();
-      for (W stateAddressWindow : activeWindows.readStateAddresses(context.window())) {
-        StateT stateForWindow =
-            stateInternals.state(namespaceFor(stateAddressWindow), address, context);
-        builder.put(stateAddressWindow, stateForWindow);
-      }
-      return builder.build();
-    }
-  }
-
-  // ======================================================================
-  // Contexts
-  // ======================================================================
-
-  private class ContextImpl extends ReduceFn<K, InputT, OutputT, W>.Context {
-    private final StateAccessorImpl<K, W> state;
-    private final TimersImpl timers;
-
-    private ContextImpl(StateAccessorImpl<K, W> state) {
-      reduceFn.super();
-      this.state = state;
-      this.timers = new TimersImpl(state.namespace());
-    }
-
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public W window() {
-      return state.window();
-    }
-
-    @Override
-    public WindowingStrategy<?, W> windowingStrategy() {
-      return windowingStrategy;
-    }
-
-    @Override
-    public StateAccessor<K> state() {
-      return state;
-    }
-
-    @Override
-    public Timers timers() {
-      return timers;
-    }
-  }
-
-  private class ProcessValueContextImpl
-      extends ReduceFn<K, InputT, OutputT, W>.ProcessValueContext {
-    private final InputT value;
-    private final Instant timestamp;
-    private final StateAccessorImpl<K, W> state;
-    private final TimersImpl timers;
-
-    private ProcessValueContextImpl(StateAccessorImpl<K, W> state,
-        InputT value, Instant timestamp) {
-      reduceFn.super();
-      this.state = state;
-      this.value = value;
-      this.timestamp = timestamp;
-      this.timers = new TimersImpl(state.namespace());
-    }
-
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public W window() {
-      return state.window();
-    }
-
-    @Override
-    public WindowingStrategy<?, W> windowingStrategy() {
-      return windowingStrategy;
-    }
-
-    @Override
-    public StateAccessor<K> state() {
-      return state;
-    }
-
-    @Override
-    public InputT value() {
-      return value;
-    }
-
-    @Override
-    public Instant timestamp() {
-      return timestamp;
-    }
-
-    @Override
-    public Timers timers() {
-      return timers;
-    }
-  }
-
-  private class OnTriggerContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnTriggerContext {
-    private final StateAccessorImpl<K, W> state;
-    private final ReadableState<PaneInfo> pane;
-    private final OnTriggerCallbacks<OutputT> callbacks;
-    private final TimersImpl timers;
-
-    private OnTriggerContextImpl(StateAccessorImpl<K, W> state, ReadableState<PaneInfo> pane,
-        OnTriggerCallbacks<OutputT> callbacks) {
-      reduceFn.super();
-      this.state = state;
-      this.pane = pane;
-      this.callbacks = callbacks;
-      this.timers = new TimersImpl(state.namespace());
-    }
-
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public W window() {
-      return state.window();
-    }
-
-    @Override
-    public WindowingStrategy<?, W> windowingStrategy() {
-      return windowingStrategy;
-    }
-
-    @Override
-    public StateAccessor<K> state() {
-      return state;
-    }
-
-    @Override
-    public PaneInfo paneInfo() {
-      return pane.read();
-    }
-
-    @Override
-    public void output(OutputT value) {
-      callbacks.output(value);
-    }
-
-    @Override
-    public Timers timers() {
-      return timers;
-    }
-  }
-
-  private class OnMergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
-    private final MergingStateAccessorImpl<K, W> state;
-    private final TimersImpl timers;
-
-    private OnMergeContextImpl(MergingStateAccessorImpl<K, W> state) {
-      reduceFn.super();
-      this.state = state;
-      this.timers = new TimersImpl(state.namespace());
-    }
-
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public WindowingStrategy<?, W> windowingStrategy() {
-      return windowingStrategy;
-    }
-
-    @Override
-    public MergingStateAccessor<K, W> state() {
-      return state;
-    }
-
-    @Override
-    public W window() {
-      return state.window();
-    }
-
-    @Override
-    public Timers timers() {
-      return timers;
-    }
-  }
-
-  private class OnPremergeContextImpl extends ReduceFn<K, InputT, OutputT, W>.OnMergeContext {
-    private final PremergingStateAccessorImpl<K, W> state;
-    private final TimersImpl timers;
-
-    private OnPremergeContextImpl(PremergingStateAccessorImpl<K, W> state) {
-      reduceFn.super();
-      this.state = state;
-      this.timers = new TimersImpl(state.namespace());
-    }
-
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public WindowingStrategy<?, W> windowingStrategy() {
-      return windowingStrategy;
-    }
-
-    @Override
-    public MergingStateAccessor<K, W> state() {
-      return state;
-    }
-
-    @Override
-    public W window() {
-      return state.window();
-    }
-
-    @Override
-    public Timers timers() {
-      return timers;
-    }
-  }
-}

[66/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/pom.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/pom.xml b/maven-archetypes/pom.xml
deleted file mode 100644
index 4565253..0000000
--- a/maven-archetypes/pom.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>parent</artifactId>
-    <version>0.1.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>maven-archetypes-parent</artifactId>
-  <packaging>pom</packaging>
-
-  <name>Apache Beam :: Maven Archetypes</name>
-
-  <modules>
-    <module>starter</module>
-    <module>examples</module>
-  </modules>
-
-</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/pom.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/pom.xml b/maven-archetypes/starter/pom.xml
deleted file mode 100644
index 933e8b1..0000000
--- a/maven-archetypes/starter/pom.xml
+++ /dev/null
@@ -1,57 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>maven-archetypes-parent</artifactId>
-    <version>0.1.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <groupId>org.apache.beam</groupId>
-  <artifactId>maven-archetypes-starter</artifactId>
-  <name>Apache Beam :: Maven Archetypes :: Starter</name>
-  <description>A Maven archetype to create a simple starter pipeline to
-    get started using the Apache Beam Java SDK. </description>
-
-  <packaging>maven-archetype</packaging>
-
-  <build>
-    <extensions>
-      <extension>
-        <groupId>org.apache.maven.archetype</groupId>
-        <artifactId>archetype-packaging</artifactId>
-        <version>2.4</version>
-      </extension>
-    </extensions>
-
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <artifactId>maven-archetype-plugin</artifactId>
-          <version>2.4</version>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
deleted file mode 100644
index bf75798..0000000
--- a/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ /dev/null
@@ -1,21 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<archetype-descriptor
-    xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
-    name="Google Cloud Dataflow Starter Pipeline Archetype"
-    xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  <requiredProperties>
-    <requiredProperty key="targetPlatform">
-      <defaultValue>1.7</defaultValue>
-    </requiredProperty>
-  </requiredProperties>
-
-  <fileSets>
-    <fileSet filtered="true" packaged="true" encoding="UTF-8">
-      <directory>src/main/java</directory>
-      <includes>
-        <include>**/*.java</include>
-      </includes>
-    </fileSet>
-  </fileSets>
-</archetype-descriptor>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
deleted file mode 100644
index 19e7d2d..0000000
--- a/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
+++ /dev/null
@@ -1,43 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <groupId>${groupId}</groupId>
-  <artifactId>${artifactId}</artifactId>
-  <version>${version}</version>
-
-  <build>
-   <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.3</version>
-        <configuration>
-          <source>${targetPlatform}</source>
-          <target>${targetPlatform}</target>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>java-sdk-all</artifactId>
-      <version>[0-incubating, 1-incubating)</version>
-    </dependency>
-
-    <!-- slf4j API frontend binding with JUL backend -->
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
deleted file mode 100644
index ffabbc0..0000000
--- a/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package};
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A starter example for writing Google Cloud Dataflow programs.
- *
- * <p>The example takes two strings, converts them to their upper-case
- * representation and logs them.
- *
- * <p>To run this starter example locally using DirectPipelineRunner, just
- * execute it without any additional parameters from your favorite development
- * environment.
- *
- * <p>To run this starter example using managed resource in Google Cloud
- * Platform, you should specify the following command-line options:
- *   --project=<YOUR_PROJECT_ID>
- *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
- *   --runner=BlockingDataflowPipelineRunner
- */
-public class StarterPipeline {
-  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
-
-  public static void main(String[] args) {
-    Pipeline p = Pipeline.create(
-        PipelineOptionsFactory.fromArgs(args).withValidation().create());
-
-    p.apply(Create.of("Hello", "World"))
-    .apply(ParDo.of(new DoFn<String, String>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        c.output(c.element().toUpperCase());
-      }
-    }))
-    .apply(ParDo.of(new DoFn<String, Void>() {
-      @Override
-      public void processElement(ProcessContext c)  {
-        LOG.info(c.element());
-      }
-    }));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
deleted file mode 100644
index c59e77a..0000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
+++ /dev/null
@@ -1,5 +0,0 @@
-package=it.pkg
-version=0.1-SNAPSHOT
-groupId=archetype.it
-artifactId=basic
-targetPlatform=1.7

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt b/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
deleted file mode 100644
index 0b59873..0000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
+++ /dev/null
@@ -1 +0,0 @@
-verify

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
deleted file mode 100644
index d29424a..0000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
+++ /dev/null
@@ -1,43 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <groupId>archetype.it</groupId>
-  <artifactId>basic</artifactId>
-  <version>0.1-SNAPSHOT</version>
-
-  <build>
-   <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.3</version>
-        <configuration>
-          <source>1.7</source>
-          <target>1.7</target>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>java-sdk-all</artifactId>
-      <version>[0-incubating, 1-incubating)</version>
-    </dependency>
-
-    <!-- slf4j API frontend binding with JUL backend -->
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
deleted file mode 100644
index 2e7c4e1..0000000
--- a/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package it.pkg;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A starter example for writing Google Cloud Dataflow programs.
- *
- * <p>The example takes two strings, converts them to their upper-case
- * representation and logs them.
- *
- * <p>To run this starter example locally using DirectPipelineRunner, just
- * execute it without any additional parameters from your favorite development
- * environment.
- *
- * <p>To run this starter example using managed resource in Google Cloud
- * Platform, you should specify the following command-line options:
- *   --project=<YOUR_PROJECT_ID>
- *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
- *   --runner=BlockingDataflowPipelineRunner
- */
-public class StarterPipeline {
-  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
-
-  public static void main(String[] args) {
-    Pipeline p = Pipeline.create(
-        PipelineOptionsFactory.fromArgs(args).withValidation().create());
-
-    p.apply(Create.of("Hello", "World"))
-    .apply(ParDo.of(new DoFn<String, String>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        c.output(c.element().toUpperCase());
-      }
-    }))
-    .apply(ParDo.of(new DoFn<String, Void>() {
-      @Override
-      public void processElement(ProcessContext c)  {
-        LOG.info(c.element());
-      }
-    }));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 6b2fd93..b79ddf6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -128,7 +128,7 @@
     <module>sdks/java/core</module>
     <module>runners</module>
     <module>examples/java</module>
-    <module>maven-archetypes</module>
+    <module>sdks/java/maven-archetypes</module>
   </modules>
 
   <profiles>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/pom.xml b/sdks/java/maven-archetypes/examples/pom.xml
new file mode 100644
index 0000000..7e74b9d
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/pom.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>maven-archetypes-parent</artifactId>
+    <version>0.1.0-incubating-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>maven-archetypes-examples</artifactId>
+  <name>Apache Beam :: Maven Archetypes :: Examples</name>
+  <description>A Maven Archetype to create a project containing all the
+    example pipelines from the Apache Beam Java SDK.</description>
+
+  <packaging>maven-archetype</packaging>
+
+  <build>
+    <extensions>
+      <extension>
+        <groupId>org.apache.maven.archetype</groupId>
+        <artifactId>archetype-packaging</artifactId>
+        <version>2.4</version>
+      </extension>
+    </extensions>
+
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-archetype-plugin</artifactId>
+          <version>2.4</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
new file mode 100644
index 0000000..7742af4
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<archetype-descriptor
+    xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
+    name="Google Cloud Dataflow Example Pipelines Archetype"
+    xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+  <requiredProperties>
+    <requiredProperty key="targetPlatform">
+      <defaultValue>1.7</defaultValue>
+    </requiredProperty>
+  </requiredProperties>
+
+  <fileSets>
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/main/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/test/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+  </fileSets>
+</archetype-descriptor>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
new file mode 100644
index 0000000..d19d0c6
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -0,0 +1,204 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ~ Copyright (C) 2015 Google Inc.
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+  ~ use this file except in compliance with the License. You may obtain a copy of
+  ~ the License at
+  ~
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  ~ License for the specific language governing permissions and limitations under
+  ~ the License.
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>${groupId}</groupId>
+  <artifactId>${artifactId}</artifactId>
+  <version>${version}</version>
+
+  <packaging>jar</packaging>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>${targetPlatform}</source>
+          <target>${targetPlatform}</target>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>2.3</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
+              <artifactSet>
+                <includes>
+                  <include>*:*</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.18.1</version>
+        <configuration>
+          <parallel>all</parallel>
+          <threadCount>4</threadCount>
+          <redirectTestOutputToFile>true</redirectTestOutputToFile>
+        </configuration>
+        <dependencies>
+          <dependency>
+            <groupId>org.apache.maven.surefire</groupId>
+            <artifactId>surefire-junit47</artifactId>
+            <version>2.18.1</version>
+          </dependency>
+        </dependencies>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <!-- Adds a dependency on a specific version of the Dataflow SDK. -->
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-sdk-all</artifactId>
+      <version>[0-incubating, 2-incubating)</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.api-client</groupId>
+      <artifactId>google-api-client</artifactId>
+      <version>1.21.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <!-- Dependencies below this line are specific dependencies needed by the examples code. -->
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <version>v2-rev248-1.21.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client</artifactId>
+      <version>1.21.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-pubsub</artifactId>
+      <version>v1-rev7-1.21.0</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>2.4</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>18.0</version>
+    </dependency>
+
+     <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>javax.servlet-api</artifactId>
+      <version>3.1.0</version>
+    </dependency>
+
+    <!-- Add slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+      <!-- When loaded at runtime this will wire up slf4j to the JUL backend -->
+      <scope>runtime</scope>
+    </dependency>
+
+    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
+         which is used in the main code of DebuggingWordCount example. -->
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>1.3</version>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.11</version>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
new file mode 100644
index 0000000..3cf2bc0
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import ${package}.WordCount.WordCountOptions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+
+/**
+ * An example that verifies word counts in Shakespeare and includes Dataflow best practices.
+ *
+ * <p>This class, {@link DebuggingWordCount}, is the third in a series of four successively more
+ * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
+ * and {@link WordCount}. After you've looked at this example, then see the
+ * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
+ *
+ * <p>Basic concepts, also in the MinimalWordCount and WordCount examples:
+ * Reading text files; counting a PCollection; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Logging to Cloud Logging
+ *   2. Controlling Dataflow worker log levels
+ *   3. Creating a custom aggregator
+ *   4. Testing your Pipeline via DataflowAssert
+ * </pre>
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service and the additional logging discussed
+ * below, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * }
+ * </pre>
+ *
+ * <p>Note that when you run via <code>mvn exec</code>, you may need to escape
+ * the quotations as appropriate for your shell. For example, in <code>bash</code>:
+ * <pre>
+ * mvn compile exec:java ... \
+ *   -Dexec.args="... \
+ *     --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
+ * </pre>
+ *
+ * <p>Concept #2: Dataflow workers which execute user code are configured to log to Cloud
+ * Logging by default at "INFO" log level and higher. One may override log levels for specific
+ * logging namespaces by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
+ * </code></pre>
+ * For example, by specifying:
+ * <pre><code>
+ *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
+ * </code></pre>
+ * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
+ * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
+ * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
+ * logging configuration can be overridden by specifying
+ * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
+ * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
+ * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
+ * that changing the default worker log level to TRACE or DEBUG will significantly increase
+ * the amount of logs output.
+ *
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --inputFile}.
+ */
+public class DebuggingWordCount {
+  /** A DoFn that filters for a specific key based upon a regular expression. */
+  public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
+    /**
+     * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
+     * as the logger. All log statements emitted by this logger will be referenced by this name
+     * and will be visible in the Cloud Logging UI. Learn more at https://cloud.google.com/logging
+     * about the Cloud Logging UI.
+     */
+    private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class);
+
+    private final Pattern filter;
+    public FilterTextFn(String pattern) {
+      filter = Pattern.compile(pattern);
+    }
+
+    /**
+     * Concept #3: A custom aggregator can track values in your pipeline as it runs. Those
+     * values will be displayed in the Dataflow Monitoring UI when this pipeline is run using the
+     * Dataflow service. These aggregators below track the number of matched and unmatched words.
+     * Learn more at https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf about
+     * the Dataflow Monitoring UI.
+     */
+    private final Aggregator<Long, Long> matchedWords =
+        createAggregator("matchedWords", new Sum.SumLongFn());
+    private final Aggregator<Long, Long> unmatchedWords =
+        createAggregator("umatchedWords", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (filter.matcher(c.element().getKey()).matches()) {
+        // Log at the "DEBUG" level each element that we match. When executing this pipeline
+        // using the Dataflow service, these log lines will appear in the Cloud Logging UI
+        // only if the log level is set to "DEBUG" or lower.
+        LOG.debug("Matched: " + c.element().getKey());
+        matchedWords.addValue(1L);
+        c.output(c.element());
+      } else {
+        // Log at the "TRACE" level each element that is not matched. Different log levels
+        // can be used to control the verbosity of logging providing an effective mechanism
+        // to filter less important information.
+        LOG.trace("Did not match: " + c.element().getKey());
+        unmatchedWords.addValue(1L);
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
+    Pipeline p = Pipeline.create(options);
+
+    PCollection<KV<String, Long>> filteredWords =
+        p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
+         .apply(new WordCount.CountWords())
+         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+
+    /**
+     * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
+     * Hamcrest's collection matchers that can be used when writing Pipeline level tests
+     * to validate the contents of PCollections. DataflowAssert is best used in unit tests
+     * with small data sets but is demonstrated here as a teaching tool.
+     *
+     * <p>Below we verify that the set of filtered words matches our expected counts. Note
+     * that DataflowAssert does not provide any output and that successful completion of the
+     * Pipeline implies that the expectations were met. Learn more at
+     * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
+     * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
+     */
+    List<KV<String, Long>> expectedResults = Arrays.asList(
+        KV.of("Flourish", 3L),
+        KV.of("stomach", 1L));
+    DataflowAssert.that(filteredWords).containsInAnyOrder(expectedResults);
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
new file mode 100644
index 0000000..035db01
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+
+/**
+ * An example that counts words in Shakespeare.
+ *
+ * <p>This class, {@link MinimalWordCount}, is the first in a series of four successively more
+ * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
+ * argument processing, and focus on construction of the pipeline, which chains together the
+ * application of core transforms.
+ *
+ * <p>Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
+ * the {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
+ * concepts.
+ *
+ * <p>Concepts:
+ * <pre>
+ *   1. Reading data from text files
+ *   2. Specifying 'inline' transforms
+ *   3. Counting a PCollection
+ *   4. Writing data to Cloud Storage as text files
+ * </pre>
+ *
+ * <p>To execute this pipeline, first edit the code to set your project ID, the staging
+ * location, and the output location. The specified GCS bucket(s) must already exist.
+ *
+ * <p>Then, run the pipeline as described in the README. It will be deployed and run using the
+ * Dataflow service. No args are required to run the pipeline. You can see the results in your
+ * output bucket in the GCS browser.
+ */
+public class MinimalWordCount {
+
+  public static void main(String[] args) {
+    // Create a DataflowPipelineOptions object. This object lets us set various execution
+    // options for our pipeline, such as the associated Cloud Platform project and the location
+    // in Google Cloud Storage to stage files.
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+      .as(DataflowPipelineOptions.class);
+    options.setRunner(BlockingDataflowPipelineRunner.class);
+    // CHANGE 1/3: Your project ID is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET_YOUR_PROJECT_ID_HERE");
+    // CHANGE 2/3: Your Google Cloud Storage path is required for staging local files.
+    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
+
+    // Create the Pipeline object with the options we defined above.
+    Pipeline p = Pipeline.create(options);
+
+    // Apply the pipeline's transforms.
+
+    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
+    // of input text files. TextIO.Read returns a PCollection where each element is one line from
+    // the input text (a set of Shakespeare's texts).
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
+     // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
+     // The ParDo returns a PCollection<String>, where each element is an individual word in
+     // Shakespeare's collected texts.
+     .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
+                       @Override
+                       public void processElement(ProcessContext c) {
+                         for (String word : c.element().split("[^a-zA-Z']+")) {
+                           if (!word.isEmpty()) {
+                             c.output(word);
+                           }
+                         }
+                       }
+                     }))
+     // Concept #3: Apply the Count transform to our PCollection of individual words. The Count
+     // transform returns a new PCollection of key/value pairs, where each key represents a unique
+     // word in the text. The associated value is the occurrence count for that word.
+     .apply(Count.<String>perElement())
+     // Apply another ParDo transform that formats our PCollection of word counts into a printable
+     // string, suitable for writing to an output file.
+     .apply(ParDo.named("FormatResults").of(new DoFn<KV<String, Long>, String>() {
+                       @Override
+                       public void processElement(ProcessContext c) {
+                         c.output(c.element().getKey() + ": " + c.element().getValue());
+                       }
+                     }))
+     // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
+     // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
+     // formatted strings) to a series of text files in Google Cloud Storage.
+     // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to.
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
+
+    // Run the pipeline.
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
new file mode 100644
index 0000000..29921e2
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import ${package}.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * An example that counts words in text, and can run over either unbounded or bounded input
+ * collections.
+ *
+ * <p>This class, {@link WindowedWordCount}, is the last in a series of four successively more
+ * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
+ * {@link WordCount}, and {@link DebuggingWordCount}.
+ *
+ * <p>Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
+ * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
+ * and using the Dataflow service; defining DoFns; creating a custom aggregator;
+ * user-defined PTransforms; defining PipelineOptions.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Unbounded and bounded pipeline input modes
+ *   2. Adding timestamps to data
+ *   3. PubSub topics as sources
+ *   4. Windowing
+ *   5. Re-using PTransforms over windowed PCollections
+ *   6. Writing to BigQuery
+ * </pre>
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ *
+ * <p>Optionally specify the input file path via:
+ * {@code --inputFile=gs://INPUT_PATH},
+ * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
+ *
+ * <p>Specify an output BigQuery dataset and optionally, a table for the output. If you don't
+ * specify the table, one will be created for you using the job name. If you don't specify the
+ * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
+ * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
+ *
+ * <p>Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
+ * 'unbounded' input (such as a PubSub topic). To run with unbounded input, set
+ * {@code --unbounded=true}. Then, optionally specify the Google Cloud PubSub topic to read from
+ * via {@code --pubsubTopic=projects/PROJECT_ID/topics/YOUR_TOPIC_NAME}. If the topic does not
+ * exist, the pipeline will create one for you. It will delete this topic when it terminates.
+ * The pipeline will automatically launch an auxiliary batch pipeline to populate the given PubSub
+ * topic with the contents of the {@code --inputFile}, in order to make the example easy to run.
+ * If you want to use an independently-populated PubSub topic, indicate this by setting
+ * {@code --inputFile=""}. In that case, the auxiliary pipeline will not be started.
+ *
+ * <p>By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
+ * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
+ * for 10-minute windows.
+ */
+public class WindowedWordCount {
+    private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
+    static final int WINDOW_SIZE = 1;  // Default window duration in minutes
+
+  /**
+   * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
+   * this example, for the bounded data case.
+   *
+   * <p>Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
+   * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
+   * 2-hour period.
+   */
+  static class AddTimestampFn extends DoFn<String, String> {
+    private static final long RAND_RANGE = 7200000; // 2 hours in ms
+
+    @Override
+    public void processElement(ProcessContext c) {
+      // Generate a timestamp that falls somewhere in the past two hours.
+      long randomTimestamp = System.currentTimeMillis()
+        - (int) (Math.random() * RAND_RANGE);
+      /**
+       * Concept #2: Set the data element with that timestamp.
+       */
+      c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a BigQuery table row. */
+  static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("word", c.element().getKey())
+          .set("count", c.element().getValue())
+          // include a field for the window timestamp
+         .set("window_timestamp", c.timestamp().toString());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Helper method that defines the BigQuery schema used for the output.
+   */
+  private static TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  /**
+   * Concept #6: We'll stream the results to a BigQuery table. The BigQuery output source is one
+   * that supports both bounded and unbounded data. This is a helper method that creates a
+   * TableReference from input options, to tell the pipeline where to write its BigQuery results.
+   */
+  private static TableReference getTableReference(Options options) {
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(options.getProject());
+    tableRef.setDatasetId(options.getBigQueryDataset());
+    tableRef.setTableId(options.getBigQueryTable());
+    return tableRef;
+  }
+
+  /**
+   * Options supported by {@link WindowedWordCount}.
+   *
+   * <p>Inherits standard example configuration options, which allow specification of the BigQuery
+   * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
+   * specification of the input file.
+   */
+  public static interface Options
+        extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
+    @Description("Fixed window duration, in minutes")
+    @Default.Integer(WINDOW_SIZE)
+    Integer getWindowSize();
+    void setWindowSize(Integer value);
+
+    @Description("Whether to run the pipeline with unbounded input")
+    boolean isUnbounded();
+    void setUnbounded(boolean value);
+  }
+
+  public static void main(String[] args) throws IOException {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    options.setBigQuerySchema(getSchema());
+    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
+    // Pipeline.
+    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
+      options.isUnbounded());
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    /**
+     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
+     * unbounded input source.
+     */
+    PCollection<String> input;
+    if (options.isUnbounded()) {
+      LOG.info("Reading from PubSub.");
+      /**
+       * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
+       * specified as an argument. The data elements' timestamps will come from the pubsub
+       * injection.
+       */
+      input = pipeline
+          .apply(PubsubIO.Read.topic(options.getPubsubTopic()));
+    } else {
+      /** Else, this is a bounded pipeline. Read from the GCS file. */
+      input = pipeline
+          .apply(TextIO.Read.from(options.getInputFile()))
+          // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
+          // See AddTimestampFn for more detail on this.
+          .apply(ParDo.of(new AddTimestampFn()));
+    }
+
+    /**
+     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
+     * minute (you can change this with a command-line option). See the documentation for more
+     * information on how fixed windows work, and for information on the other types of windowing
+     * available (e.g., sliding windows).
+     */
+    PCollection<String> windowedWords = input
+      .apply(Window.<String>into(
+        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
+
+    /**
+     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
+     * windows over a PCollection containing windowed values.
+     */
+    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
+
+    /**
+     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
+     * The BigQuery output source supports both bounded and unbounded data.
+     */
+    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
+        .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema()));
+
+    PipelineResult result = pipeline.run();
+
+    /**
+     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
+     * runs for a limited time, and publishes to the input PubSub topic.
+     *
+     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
+     * are done with it, so that you do not continue to be charged for the instances. You can do
+     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
+     * pipelines. The PubSub topic will also be deleted at this time.
+     */
+    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
new file mode 100644
index 0000000..150b60d
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+
+/**
+ * An example that counts words in Shakespeare and includes Dataflow best practices.
+ *
+ * <p>This class, {@link WordCount}, is the second in a series of four successively more detailed
+ * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
+ * After you've looked at this example, then see the {@link DebuggingWordCount}
+ * pipeline, for introduction of additional concepts.
+ *
+ * <p>For a detailed walkthrough of this example, see
+ *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
+ *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
+ *   </a>
+ *
+ * <p>Basic concepts, also in the MinimalWordCount example:
+ * Reading text files; counting a PCollection; writing to GCS.
+ *
+ * <p>New Concepts:
+ * <pre>
+ *   1. Executing a Pipeline both locally and using the Dataflow service
+ *   2. Using ParDo with static DoFns defined out-of-line
+ *   3. Building a composite transform
+ *   4. Defining your own pipeline options
+ * </pre>
+ *
+ * <p>Concept #1: you can execute this pipeline either locally or using the Dataflow service.
+ * These are now command-line options and not hard-coded as they were in the MinimalWordCount
+ * example.
+ * To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and a local output file or output prefix on GCS:
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and an output prefix on GCS:
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
+ *
+ * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
+ * overridden with {@code --inputFile}.
+ */
+public class WordCount {
+
+  /**
+   * Concept #2: You can make your pipeline code less verbose by defining your DoFns statically out-
+   * of-line. This DoFn tokenizes lines of text into individual words; we pass it to a ParDo in the
+   * pipeline.
+   */
+  static class ExtractWordsFn extends DoFn<String, String> {
+    private final Aggregator<Long, Long> emptyLines =
+        createAggregator("emptyLines", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      if (c.element().trim().isEmpty()) {
+        emptyLines.addValue(1L);
+      }
+
+      // Split the line into words.
+      String[] words = c.element().split("[^a-zA-Z']+");
+
+      // Output each word encountered into the output PCollection.
+      for (String word : words) {
+        if (!word.isEmpty()) {
+          c.output(word);
+        }
+      }
+    }
+  }
+
+  /** A DoFn that converts a Word and Count into a printable string. */
+  public static class FormatAsTextFn extends DoFn<KV<String, Long>, String> {
+    @Override
+    public void processElement(ProcessContext c) {
+      c.output(c.element().getKey() + ": " + c.element().getValue());
+    }
+  }
+
+  /**
+   * A PTransform that converts a PCollection containing lines of text into a PCollection of
+   * formatted word counts.
+   *
+   * <p>Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
+   * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
+   * modular testing, and an improved monitoring experience.
+   */
+  public static class CountWords extends PTransform<PCollection<String>,
+      PCollection<KV<String, Long>>> {
+    @Override
+    public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
+
+      // Convert lines of text into individual words.
+      PCollection<String> words = lines.apply(
+          ParDo.of(new ExtractWordsFn()));
+
+      // Count the number of times each word occurs.
+      PCollection<KV<String, Long>> wordCounts =
+          words.apply(Count.<String>perElement());
+
+      return wordCounts;
+    }
+  }
+
+  /**
+   * Options supported by {@link WordCount}.
+   *
+   * <p>Concept #4: Defining your own configuration options. Here, you can add your own arguments
+   * to be processed by the command-line parser, and specify default values for them. You can then
+   * access the options values in your pipeline code.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  public static interface WordCountOptions extends PipelineOptions {
+    @Description("Path of the file to read from")
+    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
+    String getInputFile();
+    void setInputFile(String value);
+
+    @Description("Path of the file to write to")
+    @Default.InstanceFactory(OutputFactory.class)
+    String getOutput();
+    void setOutput(String value);
+
+    /**
+     * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.
+     */
+    public static class OutputFactory implements DefaultValueFactory<String> {
+      @Override
+      public String create(PipelineOptions options) {
+        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+        if (dataflowOptions.getStagingLocation() != null) {
+          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
+              .resolve("counts.txt").toString();
+        } else {
+          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
+        }
+      }
+    }
+
+  }
+
+  public static void main(String[] args) {
+    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
+      .as(WordCountOptions.class);
+    Pipeline p = Pipeline.create(options);
+
+    // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
+    // static FormatAsTextFn() to the ParDo transform.
+    p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
+     .apply(new CountWords())
+     .apply(ParDo.of(new FormatAsTextFn()))
+     .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
new file mode 100644
index 0000000..e182f4c
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+
+/**
+ * Options that can be used to configure the Dataflow examples.
+ */
+public interface DataflowExampleOptions extends DataflowPipelineOptions {
+  @Description("Whether to keep jobs running on the Dataflow service after local process exit")
+  @Default.Boolean(false)
+  boolean getKeepJobsRunning();
+  void setKeepJobsRunning(boolean keepJobsRunning);
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
new file mode 100644
index 0000000..9861769
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
+import com.google.api.services.bigquery.Bigquery;
+import com.google.api.services.bigquery.Bigquery.Datasets;
+import com.google.api.services.bigquery.Bigquery.Tables;
+import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.DatasetReference;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.api.services.dataflow.Dataflow;
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.Topic;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * The utility class that sets up and tears down external resources, starts the Google Cloud Pub/Sub
+ * injector, and cancels the streaming and the injector pipelines once the program terminates.
+ *
+ * <p>It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
+ */
+public class DataflowExampleUtils {
+
+  private final DataflowPipelineOptions options;
+  private Bigquery bigQueryClient = null;
+  private Pubsub pubsubClient = null;
+  private Dataflow dataflowClient = null;
+  private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
+  private List<String> pendingMessages = Lists.newArrayList();
+
+  /**
+   * Define an interface that supports the PubSub and BigQuery example options.
+   */
+  public static interface DataflowExampleUtilsOptions
+        extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
+  }
+
+  public DataflowExampleUtils(DataflowPipelineOptions options) {
+    this.options = options;
+  }
+
+  /**
+   * Do resources and runner options setup.
+   */
+  public DataflowExampleUtils(DataflowPipelineOptions options, boolean isUnbounded)
+      throws IOException {
+    this.options = options;
+    setupResourcesAndRunner(isUnbounded);
+  }
+
+  /**
+   * Sets up external resources that are required by the example,
+   * such as Pub/Sub topics and BigQuery tables.
+   *
+   * @throws IOException if there is a problem setting up the resources
+   */
+  public void setup() throws IOException {
+    setupPubsubTopic();
+    setupBigQueryTable();
+  }
+
+  /**
+   * Set up external resources, and configure the runner appropriately.
+   */
+  public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
+    if (isUnbounded) {
+      options.setStreaming(true);
+    }
+    setup();
+    setupRunner();
+  }
+
+  /**
+   * Sets up the Google Cloud Pub/Sub topic.
+   *
+   * <p>If the topic doesn't exist, a new topic with the given name will be created.
+   *
+   * @throws IOException if there is a problem setting up the Pub/Sub topic
+   */
+  public void setupPubsubTopic() throws IOException {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      pendingMessages.add("*******************Set Up Pubsub Topic*********************");
+      setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
+      pendingMessages.add("The Pub/Sub topic has been set up for this example: "
+          + pubsubTopicOptions.getPubsubTopic());
+    }
+  }
+
+  /**
+   * Sets up the BigQuery table with the given schema.
+   *
+   * <p>If the table already exists, the schema has to match the given one. Otherwise, the example
+   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
+   * will be created.
+   *
+   * @throws IOException if there is a problem setting up the BigQuery table
+   */
+  public void setupBigQueryTable() throws IOException {
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("******************Set Up Big Query Table*******************");
+      setupBigQueryTable(bigQueryTableOptions.getProject(),
+                         bigQueryTableOptions.getBigQueryDataset(),
+                         bigQueryTableOptions.getBigQueryTable(),
+                         bigQueryTableOptions.getBigQuerySchema());
+      pendingMessages.add("The BigQuery table has been set up for this example: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+    }
+  }
+
+  /**
+   * Tears down external resources that can be deleted upon the example's completion.
+   */
+  private void tearDown() {
+    pendingMessages.add("*************************Tear Down*************************");
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      try {
+        deletePubsubTopic(pubsubTopicOptions.getPubsubTopic());
+        pendingMessages.add("The Pub/Sub topic has been deleted: "
+            + pubsubTopicOptions.getPubsubTopic());
+      } catch (IOException e) {
+        pendingMessages.add("Failed to delete the Pub/Sub topic : "
+            + pubsubTopicOptions.getPubsubTopic());
+      }
+    }
+
+    ExampleBigQueryTableOptions bigQueryTableOptions =
+        options.as(ExampleBigQueryTableOptions.class);
+    if (bigQueryTableOptions.getBigQueryDataset() != null
+        && bigQueryTableOptions.getBigQueryTable() != null
+        && bigQueryTableOptions.getBigQuerySchema() != null) {
+      pendingMessages.add("The BigQuery table might contain the example's output, "
+          + "and it is not deleted automatically: "
+          + bigQueryTableOptions.getProject()
+          + ":" + bigQueryTableOptions.getBigQueryDataset()
+          + "." + bigQueryTableOptions.getBigQueryTable());
+      pendingMessages.add("Please go to the Developers Console to delete it manually."
+          + " Otherwise, you may be charged for its usage.");
+    }
+  }
+
+  private void setupBigQueryTable(String projectId, String datasetId, String tableId,
+      TableSchema schema) throws IOException {
+    if (bigQueryClient == null) {
+      bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
+    }
+
+    Datasets datasetService = bigQueryClient.datasets();
+    if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
+      Dataset newDataset = new Dataset().setDatasetReference(
+          new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
+      datasetService.insert(projectId, newDataset).execute();
+    }
+
+    Tables tableService = bigQueryClient.tables();
+    Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
+    if (table == null) {
+      Table newTable = new Table().setSchema(schema).setTableReference(
+          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId));
+      tableService.insert(projectId, datasetId, newTable).execute();
+    } else if (!table.getSchema().equals(schema)) {
+      throw new RuntimeException(
+          "Table exists and schemas do not match, expecting: " + schema.toPrettyString()
+          + ", actual: " + table.getSchema().toPrettyString());
+    }
+  }
+
+  private void setupPubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) {
+      pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute();
+    }
+  }
+
+  /**
+   * Deletes the Google Cloud Pub/Sub topic.
+   *
+   * @throws IOException if there is a problem deleting the Pub/Sub topic
+   */
+  private void deletePubsubTopic(String topic) throws IOException {
+    if (pubsubClient == null) {
+      pubsubClient = Transport.newPubsubClient(options).build();
+    }
+    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) {
+      pubsubClient.projects().topics().delete(topic).execute();
+    }
+  }
+
+  /**
+   * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
+   * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
+   * creating the topic if necessary.
+   */
+  public void startInjectorIfNeeded(String inputFile) {
+    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
+    if (pubsubTopicOptions.isStreaming()
+        && inputFile != null && !inputFile.isEmpty()
+        && pubsubTopicOptions.getPubsubTopic() != null
+        && !pubsubTopicOptions.getPubsubTopic().isEmpty()) {
+      runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
+    }
+  }
+
+  /**
+   * Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
+   * streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
+   * flag value.
+   */
+  public void setupRunner() {
+    if (options.isStreaming()) {
+      if (options.getRunner() == DirectPipelineRunner.class) {
+        throw new IllegalArgumentException(
+          "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
+      }
+      // In order to cancel the pipelines automatically,
+      // {@literal DataflowPipelineRunner} is forced to be used.
+      options.setRunner(DataflowPipelineRunner.class);
+    }
+  }
+
+  /**
+   * Runs the batch injector for the streaming pipeline.
+   *
+   * <p>The injector pipeline will read from the given text file, and inject data
+   * into the Google Cloud Pub/Sub topic.
+   */
+  public void runInjectorPipeline(String inputFile, String topic) {
+    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
+    copiedOptions.setStreaming(false);
+    copiedOptions.setNumWorkers(
+        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
+    copiedOptions.setJobName(options.getJobName() + "-injector");
+    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
+    injectorPipeline.apply(TextIO.Read.from(inputFile))
+                    .apply(IntraBundleParallelization
+                        .of(PubsubFileInjector.publish(topic))
+                        .withMaxParallelism(20));
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
+  }
+
+  /**
+   * Runs the provided injector pipeline for the streaming pipeline.
+   */
+  public void runInjectorPipeline(Pipeline injectorPipeline) {
+    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
+    jobsToCancel.add(injectorJob);
+  }
+
+  /**
+   * Start the auxiliary injector pipeline, then wait for this pipeline to finish.
+   */
+  public void mockUnboundedSource(String inputFile, PipelineResult result) {
+    startInjectorIfNeeded(inputFile);
+    waitToFinish(result);
+  }
+
+  /**
+   * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
+   * waits for the pipeline to finish and cancels it (and the injector) before the program exists.
+   */
+  public void waitToFinish(PipelineResult result) {
+    if (result instanceof DataflowPipelineJob) {
+      final DataflowPipelineJob job = (DataflowPipelineJob) result;
+      jobsToCancel.add(job);
+      if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
+        addShutdownHook(jobsToCancel);
+      }
+      try {
+        job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
+      } catch (Exception e) {
+        throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
+      }
+    } else {
+      // Do nothing if the given PipelineResult doesn't support waitToFinish(),
+      // such as EvaluationResults returned by DirectPipelineRunner.
+    }
+  }
+
+  private void addShutdownHook(final Collection<DataflowPipelineJob> jobs) {
+    if (dataflowClient == null) {
+      dataflowClient = options.getDataflowClient();
+    }
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
+        tearDown();
+        printPendingMessages();
+        for (DataflowPipelineJob job : jobs) {
+          System.out.println("Canceling example pipeline: " + job.getJobId());
+          try {
+            job.cancel();
+          } catch (IOException e) {
+            System.out.println("Failed to cancel the job,"
+                + " please go to the Developers Console to cancel it manually");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+
+        for (DataflowPipelineJob job : jobs) {
+          boolean cancellationVerified = false;
+          for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) {
+            if (job.getState().isTerminal()) {
+              cancellationVerified = true;
+              System.out.println("Canceled example pipeline: " + job.getJobId());
+              break;
+            } else {
+              System.out.println(
+                  "The example pipeline is still running. Verifying the cancellation.");
+            }
+            try {
+              Thread.sleep(10000);
+            } catch (InterruptedException e) {
+              // Ignore
+            }
+          }
+          if (!cancellationVerified) {
+            System.out.println("Failed to verify the cancellation for job: " + job.getJobId());
+            System.out.println("Please go to the Developers Console to verify manually:");
+            System.out.println(
+                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
+          }
+        }
+      }
+    });
+  }
+
+  private void printPendingMessages() {
+    System.out.println();
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+    for (String message : pendingMessages) {
+      System.out.println(message);
+    }
+    System.out.println("***********************************************************");
+    System.out.println("***********************************************************");
+  }
+
+  private static <T> T executeNullIfNotFound(
+      AbstractGoogleClientRequest<T> request) throws IOException {
+    try {
+      return request.execute();
+    } catch (GoogleJsonResponseException e) {
+      if (e.getStatusCode() == HttpServletResponse.SC_NOT_FOUND) {
+        return null;
+      } else {
+        throw e;
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
new file mode 100644
index 0000000..bef5bfd
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure BigQuery tables in Dataflow examples.
+ * The project defaults to the project being used to run the example.
+ */
+public interface ExampleBigQueryTableOptions extends DataflowPipelineOptions {
+  @Description("BigQuery dataset name")
+  @Default.String("dataflow_examples")
+  String getBigQueryDataset();
+  void setBigQueryDataset(String dataset);
+
+  @Description("BigQuery table name")
+  @Default.InstanceFactory(BigQueryTableFactory.class)
+  String getBigQueryTable();
+  void setBigQueryTable(String table);
+
+  @Description("BigQuery table schema")
+  TableSchema getBigQuerySchema();
+  void setBigQuerySchema(TableSchema schema);
+
+  /**
+   * Returns the job name as the default BigQuery table name.
+   */
+  static class BigQueryTableFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      return options.as(DataflowPipelineOptions.class).getJobName()
+          .replace('-', '_');
+    }
+  }
+}

[28/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
deleted file mode 100644
index 6c9643c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/DataflowAssert.java
+++ /dev/null
@@ -1,825 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.MapCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.common.base.Optional;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-
-/**
- * An assertion on the contents of a {@link PCollection}
- * incorporated into the pipeline.  Such an assertion
- * can be checked no matter what kind of {@link PipelineRunner} is
- * used.
- *
- * <p>Note that the {@code DataflowAssert} call must precede the call
- * to {@link Pipeline#run}.
- *
- * <p>Examples of use:
- * <pre>{@code
- * Pipeline p = TestPipeline.create();
- * ...
- * PCollection<String> output =
- *      input
- *      .apply(ParDo.of(new TestDoFn()));
- * DataflowAssert.that(output)
- *     .containsInAnyOrder("out1", "out2", "out3");
- * ...
- * PCollection<Integer> ints = ...
- * PCollection<Integer> sum =
- *     ints
- *     .apply(Combine.globally(new SumInts()));
- * DataflowAssert.that(sum)
- *     .is(42);
- * ...
- * p.run();
- * }</pre>
- *
- * <p>JUnit and Hamcrest must be linked in by any code that uses DataflowAssert.
- */
-public class DataflowAssert {
-
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowAssert.class);
-
-  static final String SUCCESS_COUNTER = "DataflowAssertSuccess";
-  static final String FAILURE_COUNTER = "DataflowAssertFailure";
-
-  private static int assertCount = 0;
-
-  // Do not instantiate.
-  private DataflowAssert() {}
-
-  /**
-   * Constructs an {@link IterableAssert} for the elements of the provided
-   * {@link PCollection}.
-   */
-  public static <T> IterableAssert<T> that(PCollection<T> actual) {
-    return new IterableAssert<>(
-        new CreateActual<T, Iterable<T>>(actual, View.<T>asIterable()),
-         actual.getPipeline())
-         .setCoder(actual.getCoder());
-  }
-
-  /**
-   * Constructs an {@link IterableAssert} for the value of the provided
-   * {@link PCollection} which must contain a single {@code Iterable<T>}
-   * value.
-   */
-  public static <T> IterableAssert<T>
-      thatSingletonIterable(PCollection<? extends Iterable<T>> actual) {
-
-    List<? extends Coder<?>> maybeElementCoder = actual.getCoder().getCoderArguments();
-    Coder<T> tCoder;
-    try {
-      @SuppressWarnings("unchecked")
-      Coder<T> tCoderTmp = (Coder<T>) Iterables.getOnlyElement(maybeElementCoder);
-      tCoder = tCoderTmp;
-    } catch (NoSuchElementException | IllegalArgumentException exc) {
-      throw new IllegalArgumentException(
-        "DataflowAssert.<T>thatSingletonIterable requires a PCollection<Iterable<T>>"
-        + " with a Coder<Iterable<T>> where getCoderArguments() yields a"
-        + " single Coder<T> to apply to the elements.");
-    }
-
-    @SuppressWarnings("unchecked") // Safe covariant cast
-    PCollection<Iterable<T>> actualIterables = (PCollection<Iterable<T>>) actual;
-
-    return new IterableAssert<>(
-        new CreateActual<Iterable<T>, Iterable<T>>(
-            actualIterables, View.<Iterable<T>>asSingleton()),
-        actual.getPipeline())
-        .setCoder(tCoder);
-  }
-
-  /**
-   * Constructs an {@link IterableAssert} for the value of the provided
-   * {@code PCollectionView PCollectionView<Iterable<T>>}.
-   */
-  public static <T> IterableAssert<T> thatIterable(PCollectionView<Iterable<T>> actual) {
-    return new IterableAssert<>(new PreExisting<Iterable<T>>(actual), actual.getPipeline());
-  }
-
-  /**
-   * Constructs a {@link SingletonAssert} for the value of the provided
-   * {@code PCollection PCollection<T>}, which must be a singleton.
-   */
-  public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
-    return new SingletonAssert<>(
-        new CreateActual<T, T>(actual, View.<T>asSingleton()), actual.getPipeline())
-        .setCoder(actual.getCoder());
-  }
-
-  /**
-   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}.
-   *
-   * <p>Note that the actual value must be coded by a {@link KvCoder},
-   * not just any {@code Coder<K, V>}.
-   */
-  public static <K, V> SingletonAssert<Map<K, Iterable<V>>>
-      thatMultimap(PCollection<KV<K, V>> actual) {
-    @SuppressWarnings("unchecked")
-    KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
-
-    return new SingletonAssert<>(
-        new CreateActual<>(actual, View.<K, V>asMultimap()), actual.getPipeline())
-        .setCoder(MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())));
-  }
-
-  /**
-   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection},
-   * which must have at most one value per key.
-   *
-   * <p>Note that the actual value must be coded by a {@link KvCoder},
-   * not just any {@code Coder<K, V>}.
-   */
-  public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> actual) {
-    @SuppressWarnings("unchecked")
-    KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
-
-    return new SingletonAssert<>(
-        new CreateActual<>(actual, View.<K, V>asMap()), actual.getPipeline())
-        .setCoder(MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()));
-  }
-
-  ////////////////////////////////////////////////////////////
-
-  /**
-   * An assertion about the contents of a {@link PCollectionView} yielding an {@code Iterable<T>}.
-   */
-  public static class IterableAssert<T> implements Serializable {
-    private final Pipeline pipeline;
-    private final PTransform<PBegin, PCollectionView<Iterable<T>>> createActual;
-    private Optional<Coder<T>> coder;
-
-    protected IterableAssert(
-        PTransform<PBegin, PCollectionView<Iterable<T>>> createActual, Pipeline pipeline) {
-      this.createActual = createActual;
-      this.pipeline = pipeline;
-      this.coder = Optional.absent();
-    }
-
-    /**
-     * Sets the coder to use for elements of type {@code T}, as needed for internal purposes.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    public IterableAssert<T> setCoder(Coder<T> coderOrNull) {
-      this.coder = Optional.fromNullable(coderOrNull);
-      return this;
-    }
-
-    /**
-     * Gets the coder, which may yet be absent.
-     */
-    public Coder<T> getCoder() {
-      if (coder.isPresent()) {
-        return coder.get();
-      } else {
-        throw new IllegalStateException(
-            "Attempting to access the coder of an IterableAssert"
-                + " that has not been set yet.");
-      }
-    }
-
-    /**
-     * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    public IterableAssert<T> satisfies(SerializableFunction<Iterable<T>, Void> checkerFn) {
-      pipeline.apply(
-          "DataflowAssert$" + (assertCount++),
-          new OneSideInputAssert<Iterable<T>>(createActual, checkerFn));
-      return this;
-    }
-
-    /**
-     * Applies a {@link SerializableFunction} to check the elements of the {@code Iterable}.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    public IterableAssert<T> satisfies(
-        AssertRelation<Iterable<T>, Iterable<T>> relation,
-        final Iterable<T> expectedElements) {
-      pipeline.apply(
-          "DataflowAssert$" + (assertCount++),
-          new TwoSideInputAssert<Iterable<T>, Iterable<T>>(createActual,
-              new CreateExpected<T, Iterable<T>>(expectedElements, coder, View.<T>asIterable()),
-              relation));
-
-      return this;
-    }
-
-    /**
-     * Applies a {@link SerializableMatcher} to check the elements of the {@code Iterable}.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    IterableAssert<T> satisfies(final SerializableMatcher<Iterable<? extends T>> matcher) {
-      // Safe covariant cast. Could be elided by changing a lot of this file to use
-      // more flexible bounds.
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      SerializableFunction<Iterable<T>, Void> checkerFn =
-        (SerializableFunction) new MatcherCheckerFn<>(matcher);
-      pipeline.apply(
-          "DataflowAssert$" + (assertCount++),
-          new OneSideInputAssert<Iterable<T>>(
-              createActual,
-              checkerFn));
-      return this;
-    }
-
-    private static class MatcherCheckerFn<T> implements SerializableFunction<T, Void> {
-      private SerializableMatcher<T> matcher;
-
-      public MatcherCheckerFn(SerializableMatcher<T> matcher) {
-        this.matcher = matcher;
-      }
-
-      @Override
-      public Void apply(T actual) {
-        assertThat(actual, matcher);
-        return null;
-      }
-    }
-
-    /**
-     * Checks that the {@code Iterable} is empty.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    public IterableAssert<T> empty() {
-      return satisfies(new AssertContainsInAnyOrderRelation<T>(), Collections.<T>emptyList());
-    }
-
-    /**
-     * @throws UnsupportedOperationException always
-     * @deprecated {@link Object#equals(Object)} is not supported on DataflowAssert objects.
-     *    If you meant to test object equality, use a variant of {@link #containsInAnyOrder}
-     *    instead.
-     */
-    @Deprecated
-    @Override
-    public boolean equals(Object o) {
-      throw new UnsupportedOperationException(
-          "If you meant to test object equality, use .containsInAnyOrder instead.");
-    }
-
-    /**
-     * @throws UnsupportedOperationException always.
-     * @deprecated {@link Object#hashCode()} is not supported on DataflowAssert objects.
-     */
-    @Deprecated
-    @Override
-    public int hashCode() {
-      throw new UnsupportedOperationException(
-          String.format("%s.hashCode() is not supported.", IterableAssert.class.getSimpleName()));
-    }
-
-    /**
-     * Checks that the {@code Iterable} contains the expected elements, in any
-     * order.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    public IterableAssert<T> containsInAnyOrder(Iterable<T> expectedElements) {
-      return satisfies(new AssertContainsInAnyOrderRelation<T>(), expectedElements);
-    }
-
-    /**
-     * Checks that the {@code Iterable} contains the expected elements, in any
-     * order.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    @SafeVarargs
-    public final IterableAssert<T> containsInAnyOrder(T... expectedElements) {
-      return satisfies(
-        new AssertContainsInAnyOrderRelation<T>(),
-        Arrays.asList(expectedElements));
-    }
-
-    /**
-     * Checks that the {@code Iterable} contains elements that match the provided matchers,
-     * in any order.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    @SafeVarargs
-    final IterableAssert<T> containsInAnyOrder(
-        SerializableMatcher<? super T>... elementMatchers) {
-      return satisfies(SerializableMatchers.<T>containsInAnyOrder(elementMatchers));
-    }
-  }
-
-  /**
-   * An assertion about the single value of type {@code T}
-   * associated with a {@link PCollectionView}.
-   */
-  public static class SingletonAssert<T> implements Serializable {
-    private final Pipeline pipeline;
-    private final CreateActual<?, T> createActual;
-    private Optional<Coder<T>> coder;
-
-    protected SingletonAssert(
-        CreateActual<?, T> createActual, Pipeline pipeline) {
-      this.pipeline = pipeline;
-      this.createActual = createActual;
-      this.coder = Optional.absent();
-    }
-
-    /**
-     * Always throws an {@link UnsupportedOperationException}: users are probably looking for
-     * {@link #isEqualTo}.
-     */
-    @Deprecated
-    @Override
-    public boolean equals(Object o) {
-      throw new UnsupportedOperationException(
-          String.format(
-              "tests for Java equality of the %s object, not the PCollection in question. "
-                  + "Call a test method, such as isEqualTo.",
-              getClass().getSimpleName()));
-    }
-
-    /**
-     * @throws UnsupportedOperationException always.
-     * @deprecated {@link Object#hashCode()} is not supported on DataflowAssert objects.
-     */
-    @Deprecated
-    @Override
-    public int hashCode() {
-      throw new UnsupportedOperationException(
-          String.format("%s.hashCode() is not supported.", SingletonAssert.class.getSimpleName()));
-    }
-
-    /**
-     * Sets the coder to use for elements of type {@code T}, as needed
-     * for internal purposes.
-     *
-     * <p>Returns this {@code IterableAssert}.
-     */
-    public SingletonAssert<T> setCoder(Coder<T> coderOrNull) {
-      this.coder = Optional.fromNullable(coderOrNull);
-      return this;
-    }
-
-    /**
-     * Gets the coder, which may yet be absent.
-     */
-    public Coder<T> getCoder() {
-      if (coder.isPresent()) {
-        return coder.get();
-      } else {
-        throw new IllegalStateException(
-            "Attempting to access the coder of an IterableAssert that has not been set yet.");
-      }
-    }
-
-    /**
-     * Applies a {@link SerializableFunction} to check the value of this
-     * {@code SingletonAssert}'s view.
-     *
-     * <p>Returns this {@code SingletonAssert}.
-     */
-    public SingletonAssert<T> satisfies(SerializableFunction<T, Void> checkerFn) {
-      pipeline.apply(
-          "DataflowAssert$" + (assertCount++),
-          new OneSideInputAssert<T>(createActual, checkerFn));
-      return this;
-    }
-
-    /**
-     * Applies an {@link AssertRelation} to check the provided relation against the
-     * value of this assert and the provided expected value.
-     *
-     * <p>Returns this {@code SingletonAssert}.
-     */
-    public SingletonAssert<T> satisfies(
-        AssertRelation<T, T> relation,
-        final T expectedValue) {
-      pipeline.apply(
-          "DataflowAssert$" + (assertCount++),
-          new TwoSideInputAssert<T, T>(createActual,
-              new CreateExpected<T, T>(Arrays.asList(expectedValue), coder, View.<T>asSingleton()),
-              relation));
-
-      return this;
-    }
-
-    /**
-     * Checks that the value of this {@code SingletonAssert}'s view is equal
-     * to the expected value.
-     *
-     * <p>Returns this {@code SingletonAssert}.
-     */
-    public SingletonAssert<T> isEqualTo(T expectedValue) {
-      return satisfies(new AssertIsEqualToRelation<T>(), expectedValue);
-    }
-
-    /**
-     * Checks that the value of this {@code SingletonAssert}'s view is not equal
-     * to the expected value.
-     *
-     * <p>Returns this {@code SingletonAssert}.
-     */
-    public SingletonAssert<T> notEqualTo(T expectedValue) {
-      return satisfies(new AssertNotEqualToRelation<T>(), expectedValue);
-    }
-
-    /**
-     * Checks that the value of this {@code SingletonAssert}'s view is equal to
-     * the expected value.
-     *
-     * @deprecated replaced by {@link #isEqualTo}
-     */
-    @Deprecated
-    public SingletonAssert<T> is(T expectedValue) {
-      return isEqualTo(expectedValue);
-    }
-
-  }
-
-  ////////////////////////////////////////////////////////////////////////
-
-  private static class CreateActual<T, ActualT>
-      extends PTransform<PBegin, PCollectionView<ActualT>> {
-
-    private final transient PCollection<T> actual;
-    private final transient PTransform<PCollection<T>, PCollectionView<ActualT>> actualView;
-
-    private CreateActual(PCollection<T> actual,
-        PTransform<PCollection<T>, PCollectionView<ActualT>> actualView) {
-      this.actual = actual;
-      this.actualView = actualView;
-    }
-
-    @Override
-    public PCollectionView<ActualT> apply(PBegin input) {
-      final Coder<T> coder = actual.getCoder();
-      return actual
-          .apply(Window.<T>into(new GlobalWindows()))
-          .apply(ParDo.of(new DoFn<T, T>() {
-            @Override
-            public void processElement(ProcessContext context) throws CoderException {
-              context.output(CoderUtils.clone(coder, context.element()));
-            }
-          }))
-          .apply(actualView);
-    }
-  }
-
-  private static class CreateExpected<T, ExpectedT>
-      extends PTransform<PBegin, PCollectionView<ExpectedT>> {
-
-    private final Iterable<T> elements;
-    private final Optional<Coder<T>> coder;
-    private final transient PTransform<PCollection<T>, PCollectionView<ExpectedT>> view;
-
-    private CreateExpected(Iterable<T> elements, Optional<Coder<T>> coder,
-        PTransform<PCollection<T>, PCollectionView<ExpectedT>> view) {
-      this.elements = elements;
-      this.coder = coder;
-      this.view = view;
-    }
-
-    @Override
-    public PCollectionView<ExpectedT> apply(PBegin input) {
-      Create.Values<T> createTransform = Create.<T>of(elements);
-      if (coder.isPresent()) {
-        createTransform = createTransform.withCoder(coder.get());
-      }
-      return input.apply(createTransform).apply(view);
-    }
-  }
-
-  private static class PreExisting<T> extends PTransform<PBegin, PCollectionView<T>> {
-
-    private final PCollectionView<T> view;
-
-    private PreExisting(PCollectionView<T> view) {
-      this.view = view;
-    }
-
-    @Override
-    public PCollectionView<T> apply(PBegin input) {
-      return view;
-    }
-  }
-
-  /**
-   * An assertion checker that takes a single
-   * {@link PCollectionView PCollectionView&lt;ActualT&gt;}
-   * and an assertion over {@code ActualT}, and checks it within a dataflow
-   * pipeline.
-   *
-   * <p>Note that the entire assertion must be serializable. If
-   * you need to make assertions involving multiple inputs
-   * that are each not serializable, use TwoSideInputAssert.
-   *
-   * <p>This is generally useful for assertion functions that
-   * are serializable but whose underlying data may not have a coder.
-   */
-  static class OneSideInputAssert<ActualT>
-      extends PTransform<PBegin, PDone> implements Serializable {
-    private final transient PTransform<PBegin, PCollectionView<ActualT>> createActual;
-    private final SerializableFunction<ActualT, Void> checkerFn;
-
-    public OneSideInputAssert(
-        PTransform<PBegin, PCollectionView<ActualT>> createActual,
-        SerializableFunction<ActualT, Void> checkerFn) {
-      this.createActual = createActual;
-      this.checkerFn = checkerFn;
-    }
-
-    @Override
-    public PDone apply(PBegin input) {
-      final PCollectionView<ActualT> actual = input.apply("CreateActual", createActual);
-
-      input
-          .apply(Create.<Void>of((Void) null).withCoder(VoidCoder.of()))
-          .apply(ParDo.named("RunChecks").withSideInputs(actual)
-              .of(new CheckerDoFn<>(checkerFn, actual)));
-
-      return PDone.in(input.getPipeline());
-    }
-  }
-
-  /**
-   * A {@link DoFn} that runs a checking {@link SerializableFunction} on the contents of
-   * a {@link PCollectionView}, and adjusts counters and thrown exceptions for use in testing.
-   */
-  private static class CheckerDoFn<ActualT> extends DoFn<Void, Void> {
-    private final SerializableFunction<ActualT, Void> checkerFn;
-    private final Aggregator<Integer, Integer> success =
-        createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
-    private final Aggregator<Integer, Integer> failure =
-        createAggregator(FAILURE_COUNTER, new Sum.SumIntegerFn());
-    private final PCollectionView<ActualT> actual;
-
-    private CheckerDoFn(
-        SerializableFunction<ActualT, Void> checkerFn,
-        PCollectionView<ActualT> actual) {
-      this.checkerFn = checkerFn;
-      this.actual = actual;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      try {
-        ActualT actualContents = c.sideInput(actual);
-        checkerFn.apply(actualContents);
-        success.addValue(1);
-      } catch (Throwable t) {
-        LOG.error("DataflowAssert failed expectations.", t);
-        failure.addValue(1);
-        // TODO: allow for metrics to propagate on failure when running a streaming pipeline
-        if (!c.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
-          throw t;
-        }
-      }
-    }
-  }
-
-  /**
-   * An assertion checker that takes a {@link PCollectionView PCollectionView&lt;ActualT&gt;},
-   * a {@link PCollectionView PCollectionView&lt;ExpectedT&gt;}, a relation
-   * over {@code A} and {@code B}, and checks that the relation holds
-   * within a dataflow pipeline.
-   *
-   * <p>This is useful when either/both of {@code A} and {@code B}
-   * are not serializable, but have coders (provided
-   * by the underlying {@link PCollection}s).
-   */
-  static class TwoSideInputAssert<ActualT, ExpectedT>
-      extends PTransform<PBegin, PDone> implements Serializable {
-
-    private final transient PTransform<PBegin, PCollectionView<ActualT>> createActual;
-    private final transient PTransform<PBegin, PCollectionView<ExpectedT>> createExpected;
-    private final AssertRelation<ActualT, ExpectedT> relation;
-
-    protected TwoSideInputAssert(
-        PTransform<PBegin, PCollectionView<ActualT>> createActual,
-        PTransform<PBegin, PCollectionView<ExpectedT>> createExpected,
-        AssertRelation<ActualT, ExpectedT> relation) {
-      this.createActual = createActual;
-      this.createExpected = createExpected;
-      this.relation = relation;
-    }
-
-    @Override
-    public PDone apply(PBegin input) {
-      final PCollectionView<ActualT> actual = input.apply("CreateActual", createActual);
-      final PCollectionView<ExpectedT> expected = input.apply("CreateExpected", createExpected);
-
-      input
-          .apply(Create.<Void>of((Void) null).withCoder(VoidCoder.of()))
-          .apply(ParDo.named("RunChecks").withSideInputs(actual, expected)
-              .of(new CheckerDoFn<>(relation, actual, expected)));
-
-      return PDone.in(input.getPipeline());
-    }
-
-    private static class CheckerDoFn<ActualT, ExpectedT> extends DoFn<Void, Void> {
-      private final Aggregator<Integer, Integer> success =
-          createAggregator(SUCCESS_COUNTER, new Sum.SumIntegerFn());
-      private final Aggregator<Integer, Integer> failure =
-          createAggregator(FAILURE_COUNTER, new Sum.SumIntegerFn());
-      private final AssertRelation<ActualT, ExpectedT> relation;
-      private final PCollectionView<ActualT> actual;
-      private final PCollectionView<ExpectedT> expected;
-
-      private CheckerDoFn(AssertRelation<ActualT, ExpectedT> relation,
-          PCollectionView<ActualT> actual, PCollectionView<ExpectedT> expected) {
-        this.relation = relation;
-        this.actual = actual;
-        this.expected = expected;
-      }
-
-      @Override
-      public void processElement(ProcessContext c) {
-        try {
-          ActualT actualContents = c.sideInput(actual);
-          ExpectedT expectedContents = c.sideInput(expected);
-          relation.assertFor(expectedContents).apply(actualContents);
-          success.addValue(1);
-        } catch (Throwable t) {
-          LOG.error("DataflowAssert failed expectations.", t);
-          failure.addValue(1);
-          // TODO: allow for metrics to propagate on failure when running a streaming pipeline
-          if (!c.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
-            throw t;
-          }
-        }
-      }
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@link SerializableFunction} that verifies that an actual value is equal to an
-   * expected value.
-   */
-  private static class AssertIsEqualTo<T> implements SerializableFunction<T, Void> {
-    private T expected;
-
-    public AssertIsEqualTo(T expected) {
-      this.expected = expected;
-    }
-
-    @Override
-    public Void apply(T actual) {
-      assertThat(actual, equalTo(expected));
-      return null;
-    }
-  }
-
-  /**
-   * A {@link SerializableFunction} that verifies that an actual value is not equal to an
-   * expected value.
-   */
-  private static class AssertNotEqualTo<T> implements SerializableFunction<T, Void> {
-    private T expected;
-
-    public AssertNotEqualTo(T expected) {
-      this.expected = expected;
-    }
-
-    @Override
-    public Void apply(T actual) {
-      assertThat(actual, not(equalTo(expected)));
-      return null;
-    }
-  }
-
-  /**
-   * A {@link SerializableFunction} that verifies that an {@code Iterable} contains
-   * expected items in any order.
-   */
-  private static class AssertContainsInAnyOrder<T>
-      implements SerializableFunction<Iterable<T>, Void> {
-    private T[] expected;
-
-    @SafeVarargs
-    public AssertContainsInAnyOrder(T... expected) {
-      this.expected = expected;
-    }
-
-    @SuppressWarnings("unchecked")
-    public AssertContainsInAnyOrder(Collection<T> expected) {
-      this((T[]) expected.toArray());
-    }
-
-    public AssertContainsInAnyOrder(Iterable<T> expected) {
-      this(Lists.<T>newArrayList(expected));
-    }
-
-    @Override
-    public Void apply(Iterable<T> actual) {
-      assertThat(actual, containsInAnyOrder(expected));
-      return null;
-    }
-  }
-
-  ////////////////////////////////////////////////////////////
-
-  /**
-   * A binary predicate between types {@code Actual} and {@code Expected}.
-   * Implemented as a method {@code assertFor(Expected)} which returns
-   * a {@code SerializableFunction<Actual, Void>}
-   * that should verify the assertion..
-   */
-  private static interface AssertRelation<ActualT, ExpectedT> extends Serializable {
-    public SerializableFunction<ActualT, Void> assertFor(ExpectedT input);
-  }
-
-  /**
-   * An {@link AssertRelation} implementing the binary predicate that two objects are equal.
-   */
-  private static class AssertIsEqualToRelation<T>
-      implements AssertRelation<T, T> {
-    @Override
-    public SerializableFunction<T, Void> assertFor(T expected) {
-      return new AssertIsEqualTo<T>(expected);
-    }
-  }
-
-  /**
-   * An {@link AssertRelation} implementing the binary predicate that two objects are not equal.
-   */
-  private static class AssertNotEqualToRelation<T>
-      implements AssertRelation<T, T> {
-    @Override
-    public SerializableFunction<T, Void> assertFor(T expected) {
-      return new AssertNotEqualTo<T>(expected);
-    }
-  }
-
-  /**
-   * An {@code AssertRelation} implementing the binary predicate that two collections are equal
-   * modulo reordering.
-   */
-  private static class AssertContainsInAnyOrderRelation<T>
-      implements AssertRelation<Iterable<T>, Iterable<T>> {
-    @Override
-    public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
-      return new AssertContainsInAnyOrder<T>(expectedElements);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
deleted file mode 100644
index 60ab2e5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/RunnableOnService.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-/**
- * Category tag for tests that can be run on the
- * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} if the
- * {@code runIntegrationTestOnService} System property is set to true.
- * Example usage:
- * <pre><code>
- *     {@literal @}Test
- *     {@literal @}Category(RunnableOnService.class)
- *     public void testParDo() {...
- * </code></pre>
- */
-public interface RunnableOnService {}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
deleted file mode 100644
index 10f221e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatcher.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import org.hamcrest.Matcher;
-
-import java.io.Serializable;
-
-/**
- * A {@link Matcher} that is also {@link Serializable}.
- *
- * <p>Such matchers can be used with {@link DataflowAssert}, which builds Dataflow pipelines
- * such that these matchers may be serialized and executed remotely.
- *
- * <p>To create a {@code SerializableMatcher}, extend {@link org.hamcrest.BaseMatcher}
- * and also implement this interface.
- *
- * @param <T> The type of value matched.
- */
-interface SerializableMatcher<T> extends Matcher<T>, Serializable {
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
deleted file mode 100644
index da5171e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/SerializableMatchers.java
+++ /dev/null
@@ -1,1180 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.MoreObjects;
-
-import org.hamcrest.BaseMatcher;
-import org.hamcrest.Description;
-import org.hamcrest.Matcher;
-import org.hamcrest.Matchers;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Static class for building and using {@link SerializableMatcher} instances.
- *
- * <p>Most matchers are wrappers for hamcrest's {@link Matchers}. Please be familiar with the
- * documentation there. Values retained by a {@link SerializableMatcher} are required to be
- * serializable, either via Java serialization or via a provided {@link Coder}.
- *
- * <p>The following matchers are novel to Dataflow:
- * <ul>
- * <li>{@link #kvWithKey} for matching just the key of a {@link KV}.
- * <li>{@link #kvWithValue} for matching just the value of a {@link KV}.
- * <li>{@link #kv} for matching the key and value of a {@link KV}.
- * </ul>
- *
- * <p>For example, to match a group from
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}, which has type
- * {@code KV<K, Iterable<V>>} for some {@code K} and {@code V} and where the order of the iterable
- * is undefined, use a matcher like
- * {@code kv(equalTo("some key"), containsInAnyOrder(1, 2, 3))}.
- */
-class SerializableMatchers implements Serializable {
-
-  // Serializable only because of capture by anonymous inner classes
-  private SerializableMatchers() { } // not instantiable
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#allOf(Iterable)}.
-   */
-  public static <T> SerializableMatcher<T>
-  allOf(Iterable<SerializableMatcher<? super T>> serializableMatchers) {
-
-    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-    final Iterable<Matcher<? super T>> matchers = (Iterable) serializableMatchers;
-
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.allOf(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#allOf(Matcher[])}.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<T> allOf(final SerializableMatcher<T>... matchers) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.allOf(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#anyOf(Iterable)}.
-   */
-  public static <T> SerializableMatcher<T>
-  anyOf(Iterable<SerializableMatcher<? super T>> serializableMatchers) {
-
-    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-    final Iterable<Matcher<? super T>> matchers = (Iterable) serializableMatchers;
-
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.anyOf(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#anyOf(Matcher[])}.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<T> anyOf(final SerializableMatcher<T>... matchers) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.anyOf(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#anything()}.
-   */
-  public static SerializableMatcher<Object> anything() {
-    return fromSupplier(new SerializableSupplier<Matcher<Object>>() {
-      @Override
-      public Matcher<Object> get() {
-        return Matchers.anything();
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContaining(Object[])}.
-   */
-  @SafeVarargs
-  public static <T extends Serializable> SerializableMatcher<T[]>
-  arrayContaining(final T... items) {
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayContaining(items);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContaining(Object[])}.
-   *
-   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}. They are
-   * explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<T[]> arrayContaining(Coder<T> coder, T... items) {
-
-    final SerializableSupplier<T[]> itemsSupplier =
-        new SerializableArrayViaCoder<>(coder, items);
-
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayContaining(itemsSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContaining(Matcher[])}.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<T[]>
-  arrayContaining(final SerializableMatcher<? super T>... matchers) {
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.<T>arrayContaining(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContaining(List)}.
-   */
-  public static <T> SerializableMatcher<T[]>
-  arrayContaining(List<SerializableMatcher<? super T>> serializableMatchers) {
-
-    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-    final List<Matcher<? super T>> matchers = (List) serializableMatchers;
-
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayContaining(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContainingInAnyOrder(Object[])}.
-   */
-  @SafeVarargs
-  public static <T extends Serializable> SerializableMatcher<T[]>
-  arrayContainingInAnyOrder(final T... items) {
-
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayContainingInAnyOrder(items);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContainingInAnyOrder(Object[])}.
-   *
-   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}. They are
-   * explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(Coder<T> coder, T... items) {
-
-    final SerializableSupplier<T[]> itemsSupplier =
-        new SerializableArrayViaCoder<>(coder, items);
-
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayContaining(itemsSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContainingInAnyOrder(Matcher[])}.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(
-      final SerializableMatcher<? super T>... matchers) {
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.<T>arrayContainingInAnyOrder(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayContainingInAnyOrder(Collection)}.
-   */
-  public static <T> SerializableMatcher<T[]> arrayContainingInAnyOrder(
-      Collection<SerializableMatcher<? super T>> serializableMatchers) {
-
-    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-    final Collection<Matcher<? super T>> matchers = (Collection) serializableMatchers;
-
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayContainingInAnyOrder(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayWithSize(int)}.
-   */
-  public static <T> SerializableMatcher<T[]> arrayWithSize(final int size) {
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayWithSize(size);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#arrayWithSize(Matcher)}.
-   */
-  public static <T> SerializableMatcher<T[]> arrayWithSize(
-      final SerializableMatcher<? super Integer> sizeMatcher) {
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.arrayWithSize(sizeMatcher);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#closeTo(double,double)}.
-   */
-  public static SerializableMatcher<Double> closeTo(final double target, final double error) {
-    return fromSupplier(new SerializableSupplier<Matcher<Double>>() {
-      @Override
-      public Matcher<Double> get() {
-        return Matchers.closeTo(target, error);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#contains(Object[])}.
-   */
-  @SafeVarargs
-  public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>> contains(
-      final T... items) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.contains(items);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#contains(Object[])}.
-   *
-   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}. They are
-   * explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<Iterable<? extends T>>
-  contains(Coder<T> coder, T... items) {
-
-    final SerializableSupplier<T[]> itemsSupplier =
-        new SerializableArrayViaCoder<>(coder, items);
-
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.containsInAnyOrder(itemsSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#contains(Matcher[])}.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<Iterable<? extends T>> contains(
-      final SerializableMatcher<? super T>... matchers) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.<T>contains(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#contains(List)}.
-   */
-  public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>> contains(
-      List<SerializableMatcher<? super T>> serializableMatchers) {
-
-    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-    final List<Matcher<? super T>> matchers = (List) serializableMatchers;
-
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.contains(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#containsInAnyOrder(Object[])}.
-   */
-  @SafeVarargs
-  public static <T extends Serializable> SerializableMatcher<Iterable<? extends T>>
-  containsInAnyOrder(final T... items) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.containsInAnyOrder(items);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#containsInAnyOrder(Object[])}.
-   *
-   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<Iterable<? extends T>>
-  containsInAnyOrder(Coder<T> coder, T... items) {
-
-    final SerializableSupplier<T[]> itemsSupplier =
-        new SerializableArrayViaCoder<>(coder, items);
-
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.containsInAnyOrder(itemsSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#containsInAnyOrder(Matcher[])}.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<Iterable<? extends T>> containsInAnyOrder(
-      final SerializableMatcher<? super T>... matchers) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.<T>containsInAnyOrder(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#containsInAnyOrder(Collection)}.
-   */
-  public static <T> SerializableMatcher<Iterable<? extends T>> containsInAnyOrder(
-      Collection<SerializableMatcher<? super T>> serializableMatchers) {
-
-    @SuppressWarnings({"rawtypes", "unchecked"}) // safe covariant cast
-    final Collection<Matcher<? super T>> matchers = (Collection) serializableMatchers;
-
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.containsInAnyOrder(matchers);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#containsString}.
-   */
-  public static SerializableMatcher<String> containsString(final String substring) {
-    return fromSupplier(new SerializableSupplier<Matcher<String>>() {
-      @Override
-      public Matcher<String> get() {
-        return Matchers.containsString(substring);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#empty()}.
-   */
-  public static <T> SerializableMatcher<Collection<? extends T>> empty() {
-    return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
-      @Override
-      public Matcher<Collection<? extends T>> get() {
-        return Matchers.empty();
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#emptyArray()}.
-   */
-  public static <T> SerializableMatcher<T[]> emptyArray() {
-    return fromSupplier(new SerializableSupplier<Matcher<T[]>>() {
-      @Override
-      public Matcher<T[]> get() {
-        return Matchers.emptyArray();
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#emptyIterable()}.
-   */
-  public static <T> SerializableMatcher<Iterable<? extends T>> emptyIterable() {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? extends T>>>() {
-      @Override
-      public Matcher<Iterable<? extends T>> get() {
-        return Matchers.emptyIterable();
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#endsWith}.
-   */
-  public static SerializableMatcher<String> endsWith(final String substring) {
-    return fromSupplier(new SerializableSupplier<Matcher<String>>() {
-      @Override
-      public Matcher<String> get() {
-        return Matchers.endsWith(substring);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#equalTo()}.
-   */
-  public static <T extends Serializable> SerializableMatcher<T> equalTo(final T expected) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.equalTo(expected);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#equalTo()}.
-   *
-   * <p>The expected value of type {@code T} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T> SerializableMatcher<T> equalTo(Coder<T> coder, T expected) {
-
-    final SerializableSupplier<T> expectedSupplier = new SerializableViaCoder<>(coder, expected);
-
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.equalTo(expectedSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#greaterThan()}.
-   */
-  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T>
-  greaterThan(final T target) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.greaterThan(target);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#greaterThan()}.
-   *
-   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T>
-  greaterThan(final Coder<T> coder, T target) {
-    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.greaterThan(targetSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#greaterThanOrEqualTo()}.
-   */
-  public static <T extends Comparable<T>> SerializableMatcher<T> greaterThanOrEqualTo(
-      final T target) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.greaterThanOrEqualTo(target);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#greaterThanOrEqualTo()}.
-   *
-   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T>
-  greaterThanOrEqualTo(final Coder<T> coder, T target) {
-    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.greaterThanOrEqualTo(targetSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasItem(Object)}.
-   */
-  public static <T extends Serializable> SerializableMatcher<Iterable<? super T>> hasItem(
-      final T target) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
-      @Override
-      public Matcher<Iterable<? super T>> get() {
-        return Matchers.hasItem(target);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasItem(Object)}.
-   *
-   * <p>The item of type {@code T} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T> SerializableMatcher<Iterable<? super T>> hasItem(Coder<T> coder, T target) {
-    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
-      @Override
-      public Matcher<Iterable<? super T>> get() {
-        return Matchers.hasItem(targetSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasItem(Matcher)}.
-   */
-  public static <T> SerializableMatcher<Iterable<? super T>> hasItem(
-      final SerializableMatcher<? super T> matcher) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<? super T>>>() {
-      @Override
-      public Matcher<Iterable<? super T>> get() {
-        return Matchers.hasItem(matcher);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasSize(int)}.
-   */
-  public static <T> SerializableMatcher<Collection<? extends T>> hasSize(final int size) {
-    return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
-      @Override
-      public Matcher<Collection<? extends T>> get() {
-        return Matchers.hasSize(size);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#hasSize(Matcher)}.
-   */
-  public static <T> SerializableMatcher<Collection<? extends T>> hasSize(
-      final SerializableMatcher<? super Integer> sizeMatcher) {
-    return fromSupplier(new SerializableSupplier<Matcher<Collection<? extends T>>>() {
-      @Override
-      public Matcher<Collection<? extends T>> get() {
-        return Matchers.hasSize(sizeMatcher);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#iterableWithSize(int)}.
-   */
-  public static <T> SerializableMatcher<Iterable<T>> iterableWithSize(final int size) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<T>>>() {
-      @Override
-      public Matcher<Iterable<T>> get() {
-        return Matchers.iterableWithSize(size);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#iterableWithSize(Matcher)}.
-   */
-  public static <T> SerializableMatcher<Iterable<T>> iterableWithSize(
-      final SerializableMatcher<? super Integer> sizeMatcher) {
-    return fromSupplier(new SerializableSupplier<Matcher<Iterable<T>>>() {
-      @Override
-      public Matcher<Iterable<T>> get() {
-        return Matchers.iterableWithSize(sizeMatcher);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Collection)}.
-   */
-  public static <T extends Serializable> SerializableMatcher<T>
-  isIn(final Collection<T> collection) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.isIn(collection);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Collection)}.
-   *
-   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
-   * They are explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T> SerializableMatcher<T> isIn(Coder<T> coder, Collection<T> collection) {
-    @SuppressWarnings("unchecked")
-    T[] items = (T[]) collection.toArray();
-    final SerializableSupplier<T[]> itemsSupplier =
-        new SerializableArrayViaCoder<>(coder, items);
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.isIn(itemsSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Object[])}.
-   */
-  public static <T extends Serializable> SerializableMatcher<T> isIn(final T[] items) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.isIn(items);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isIn(Object[])}.
-   *
-   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
-   * They are explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T> SerializableMatcher<T> isIn(Coder<T> coder, T[] items) {
-    final SerializableSupplier<T[]> itemsSupplier =
-        new SerializableArrayViaCoder<>(coder, items);
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.isIn(itemsSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isOneOf}.
-   */
-  @SafeVarargs
-  public static <T extends Serializable> SerializableMatcher<T> isOneOf(final T... elems) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.isOneOf(elems);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#isOneOf}.
-   *
-   * <p>The items of type {@code T} will be serialized using the provided {@link Coder}.
-   * They are explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  @SafeVarargs
-  public static <T> SerializableMatcher<T> isOneOf(Coder<T> coder, T... items) {
-    final SerializableSupplier<T[]> itemsSupplier =
-        new SerializableArrayViaCoder<>(coder, items);
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.isOneOf(itemsSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} that matches any {@link KV} with the specified key.
-   */
-  public static <K extends Serializable, V> SerializableMatcher<KV<? extends K, ? extends V>>
-  kvWithKey(K key) {
-    return new KvKeyMatcher<K, V>(equalTo(key));
-  }
-
-  /**
-   * A {@link SerializableMatcher} that matches any {@link KV} with the specified key.
-   *
-   * <p>The key of type {@code K} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>>
-  kvWithKey(Coder<K> coder, K key) {
-    return new KvKeyMatcher<K, V>(equalTo(coder, key));
-  }
-
-  /**
-   * A {@link SerializableMatcher} that matches any {@link KV} with matching key.
-   */
-  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>> kvWithKey(
-      final SerializableMatcher<? super K> keyMatcher) {
-    return new KvKeyMatcher<K, V>(keyMatcher);
-  }
-
-  /**
-   * A {@link SerializableMatcher} that matches any {@link KV} with the specified value.
-   */
-  public static <K, V extends Serializable> SerializableMatcher<KV<? extends K, ? extends V>>
-  kvWithValue(V value) {
-    return new KvValueMatcher<K, V>(equalTo(value));
-  }
-
-  /**
-   * A {@link SerializableMatcher} that matches any {@link KV} with the specified value.
-   *
-   * <p>The value of type {@code V} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>>
-  kvWithValue(Coder<V> coder, V value) {
-    return new KvValueMatcher<K, V>(equalTo(coder, value));
-  }
-
-  /**
-   * A {@link SerializableMatcher} that matches any {@link KV} with matching value.
-   */
-  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>> kvWithValue(
-      final SerializableMatcher<? super V> valueMatcher) {
-    return new KvValueMatcher<>(valueMatcher);
-  }
-
-  /**
-   * A {@link SerializableMatcher} that matches any {@link KV} with matching key and value.
-   */
-  public static <K, V> SerializableMatcher<KV<? extends K, ? extends V>> kv(
-      final SerializableMatcher<? super K> keyMatcher,
-      final SerializableMatcher<? super V> valueMatcher) {
-
-    return SerializableMatchers.<KV<? extends K, ? extends V>>allOf(
-        SerializableMatchers.<K, V>kvWithKey(keyMatcher),
-        SerializableMatchers.<K, V>kvWithValue(valueMatcher));
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#lessThan()}.
-   */
-  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T> lessThan(
-      final T target) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.lessThan(target);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#lessThan()}.
-   *
-   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T extends Comparable<T>> SerializableMatcher<T>
-  lessThan(Coder<T> coder, T target) {
-    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.lessThan(targetSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#lessThanOrEqualTo()}.
-   */
-  public static <T extends Comparable<T> & Serializable> SerializableMatcher<T> lessThanOrEqualTo(
-      final T target) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.lessThanOrEqualTo(target);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#lessThanOrEqualTo()}.
-   *
-   * <p>The target value of type {@code T} will be serialized using the provided {@link Coder}.
-   * It is explicitly <i>not</i> required or expected to be serializable via Java serialization.
-   */
-  public static <T extends Comparable<T>> SerializableMatcher<T> lessThanOrEqualTo(
-      Coder<T> coder, T target) {
-    final SerializableSupplier<T> targetSupplier = new SerializableViaCoder<>(coder, target);
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.lessThanOrEqualTo(targetSupplier.get());
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#not}.
-   */
-  public static <T> SerializableMatcher<T> not(final SerializableMatcher<T> matcher) {
-    return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-      @Override
-      public Matcher<T> get() {
-        return Matchers.not(matcher);
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to
-   * {@link Matchers#nullValue}.
-   */
-  public static SerializableMatcher<Object> nullValue() {
-    return fromSupplier(new SerializableSupplier<Matcher<Object>>() {
-      @Override
-      public Matcher<Object> get() {
-        return Matchers.nullValue();
-      }
-    });
-  }
-
-  /**
-   * A {@link SerializableMatcher} with identical criteria to {@link Matchers#startsWith}.
-   */
-  public static SerializableMatcher<String> startsWith(final String substring) {
-    return fromSupplier(new SerializableSupplier<Matcher<String>>() {
-      @Override
-      public Matcher<String> get() {
-        return Matchers.startsWith(substring);
-      }
-    });
-  }
-
-  private static class KvKeyMatcher<K, V>
-  extends BaseMatcher<KV<? extends K, ? extends V>>
-  implements SerializableMatcher<KV<? extends K, ? extends V>> {
-    private final SerializableMatcher<? super K> keyMatcher;
-
-    public KvKeyMatcher(SerializableMatcher<? super K> keyMatcher) {
-      this.keyMatcher = keyMatcher;
-    }
-
-    @Override
-    public boolean matches(Object item) {
-      @SuppressWarnings("unchecked")
-      KV<K, ?> kvItem = (KV<K, ?>) item;
-      return keyMatcher.matches(kvItem.getKey());
-    }
-
-    @Override
-    public void describeMismatch(Object item, Description mismatchDescription) {
-      @SuppressWarnings("unchecked")
-      KV<K, ?> kvItem = (KV<K, ?>) item;
-      if (!keyMatcher.matches(kvItem.getKey())) {
-        mismatchDescription.appendText("key did not match: ");
-        keyMatcher.describeMismatch(kvItem.getKey(), mismatchDescription);
-      }
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description.appendText("KV with key matching ");
-      keyMatcher.describeTo(description);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(this)
-          .addValue(keyMatcher)
-          .toString();
-    }
-  }
-
-  private static class KvValueMatcher<K, V>
-  extends BaseMatcher<KV<? extends K, ? extends V>>
-  implements SerializableMatcher<KV<? extends K, ? extends V>> {
-    private final SerializableMatcher<? super V> valueMatcher;
-
-    public KvValueMatcher(SerializableMatcher<? super V> valueMatcher) {
-      this.valueMatcher = valueMatcher;
-    }
-
-    @Override
-    public boolean matches(Object item) {
-      @SuppressWarnings("unchecked")
-      KV<?, V> kvItem = (KV<?, V>) item;
-      return valueMatcher.matches(kvItem.getValue());
-    }
-
-    @Override
-    public void describeMismatch(Object item, Description mismatchDescription) {
-      @SuppressWarnings("unchecked")
-      KV<?, V> kvItem = (KV<?, V>) item;
-      if (!valueMatcher.matches(kvItem.getValue())) {
-        mismatchDescription.appendText("value did not match: ");
-        valueMatcher.describeMismatch(kvItem.getValue(), mismatchDescription);
-      }
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description.appendText("KV with value matching ");
-      valueMatcher.describeTo(description);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(this)
-          .addValue(valueMatcher)
-          .toString();
-    }
-  }
-
-  /**
-   * Constructs a {@link SerializableMatcher} from a non-serializable {@link Matcher} via
-   * indirection through {@link SerializableSupplier}.
-   *
-   * <p>To wrap a {@link Matcher} which is not serializable, provide a {@link SerializableSupplier}
-   * with a {@link SerializableSupplier#get()} method that returns a fresh instance of the
-   * {@link Matcher} desired. The resulting {@link SerializableMatcher} will behave according to
-   * the {@link Matcher} returned by {@link SerializableSupplier#get() get()} when it is invoked
-   * during matching (which may occur on another machine, such as a Dataflow worker).
-   *
-   * <code>
-   * return fromSupplier(new SerializableSupplier<Matcher<T>>() {
-   *   *     @Override
-   *     public Matcher<T> get() {
-   *       return new MyMatcherForT();
-   *     }
-   * });
-   * </code>
-   */
-  public static <T> SerializableMatcher<T> fromSupplier(
-      SerializableSupplier<Matcher<T>> supplier) {
-    return new SerializableMatcherFromSupplier<>(supplier);
-  }
-
-  /**
-   * Supplies values of type {@code T}, and is serializable. Thus, even if {@code T} is not
-   * serializable, the supplier can be serialized and provide a {@code T} wherever it is
-   * deserialized.
-   *
-   * @param <T> the type of value supplied.
-   */
-  public interface SerializableSupplier<T> extends Serializable {
-    T get();
-  }
-
-  /**
-   * Since the delegate {@link Matcher} is not generally serializable, instead this takes a nullary
-   * SerializableFunction to return such a matcher.
-   */
-  private static class SerializableMatcherFromSupplier<T> extends BaseMatcher<T>
-  implements SerializableMatcher<T> {
-
-    private SerializableSupplier<Matcher<T>> supplier;
-
-    public SerializableMatcherFromSupplier(SerializableSupplier<Matcher<T>> supplier) {
-      this.supplier = supplier;
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      supplier.get().describeTo(description);
-    }
-
-    @Override
-    public boolean matches(Object item) {
-      return supplier.get().matches(item);
-    }
-
-    @Override
-    public void describeMismatch(Object item, Description mismatchDescription) {
-      supplier.get().describeMismatch(item, mismatchDescription);
-    }
-  }
-
-  /**
-   * Wraps any value that can be encoded via a {@link Coder} to make it {@link Serializable}.
-   * This is not likely to be a good encoding, so should be used only for tests, where data
-   * volume is small and minor costs are not critical.
-   */
-  private static class SerializableViaCoder<T> implements SerializableSupplier<T> {
-    /** Cached value that is not serialized. */
-    @Nullable
-    private transient T value;
-
-    /** The bytes of {@link #value} when encoded via {@link #coder}. */
-    private byte[] encodedValue;
-
-    private Coder<T> coder;
-
-    public SerializableViaCoder(Coder<T> coder, T value) {
-      this.coder = coder;
-      this.value = value;
-      try {
-        this.encodedValue = CoderUtils.encodeToByteArray(coder, value);
-      } catch (CoderException exc) {
-        throw new RuntimeException("Error serializing via Coder", exc);
-      }
-    }
-
-    @Override
-    public T get() {
-      if (value == null) {
-        try {
-          value = CoderUtils.decodeFromByteArray(coder, encodedValue);
-        } catch (CoderException exc) {
-          throw new RuntimeException("Error deserializing via Coder", exc);
-        }
-      }
-      return value;
-    }
-  }
-
-  /**
-   * Wraps any array with values that can be encoded via a {@link Coder} to make it
-   * {@link Serializable}. This is not likely to be a good encoding, so should be used only for
-   * tests, where data volume is small and minor costs are not critical.
-   */
-  private static class SerializableArrayViaCoder<T> implements SerializableSupplier<T[]> {
-    /** Cached value that is not serialized. */
-    @Nullable
-    private transient T[] value;
-
-    /** The bytes of {@link #value} when encoded via {@link #coder}. */
-    private byte[] encodedValue;
-
-    private Coder<List<T>> coder;
-
-    public SerializableArrayViaCoder(Coder<T> elementCoder, T[] value) {
-      this.coder = ListCoder.of(elementCoder);
-      this.value = value;
-      try {
-        this.encodedValue = CoderUtils.encodeToByteArray(coder, Arrays.asList(value));
-      } catch (CoderException exc) {
-        throw UserCodeException.wrap(exc);
-      }
-    }
-
-    @Override
-    public T[] get() {
-      if (value == null) {
-        try {
-          @SuppressWarnings("unchecked")
-          T[] decoded = (T[]) CoderUtils.decodeFromByteArray(coder, encodedValue).toArray();
-          value = decoded;
-        } catch (CoderException exc) {
-          throw new RuntimeException("Error deserializing via Coder", exc);
-        }
-      }
-      return value;
-    }
-  }
-}

[44/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
deleted file mode 100644
index be3a415..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedSource.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * A {@link Source} that reads a finite amount of input and, because of that, supports
- * some additional operations.
- *
- * <p>The operations are:
- * <ul>
- * <li>Splitting into bundles of given size: {@link #splitIntoBundles};
- * <li>Size estimation: {@link #getEstimatedSizeBytes};
- * <li>Telling whether or not this source produces key/value pairs in sorted order:
- * {@link #producesSortedKeys};
- * <li>The reader ({@link BoundedReader}) supports progress estimation
- * ({@link BoundedReader#getFractionConsumed}) and dynamic splitting
- * ({@link BoundedReader#splitAtFraction}).
- * </ul>
- *
- * <p>To use this class for supporting your custom input type, derive your class
- * class from it, and override the abstract methods. For an example, see {@link DatastoreIO}.
- *
- * @param <T> Type of records read by the source.
- */
-public abstract class BoundedSource<T> extends Source<T> {
-  /**
-   * Splits the source into bundles of approximately {@code desiredBundleSizeBytes}.
-   */
-  public abstract List<? extends BoundedSource<T>> splitIntoBundles(
-      long desiredBundleSizeBytes, PipelineOptions options) throws Exception;
-
-  /**
-   * An estimate of the total size (in bytes) of the data that would be read from this source.
-   * This estimate is in terms of external storage size, before any decompression or other
-   * processing done by the reader.
-   */
-  public abstract long getEstimatedSizeBytes(PipelineOptions options) throws Exception;
-
-  /**
-   * Whether this source is known to produce key/value pairs sorted by lexicographic order on
-   * the bytes of the encoded key.
-   */
-  public abstract boolean producesSortedKeys(PipelineOptions options) throws Exception;
-
-  /**
-   * Returns a new {@link BoundedReader} that reads from this source.
-   */
-  public abstract BoundedReader<T> createReader(PipelineOptions options) throws IOException;
-
-  /**
-   * A {@code Reader} that reads a bounded amount of input and supports some additional
-   * operations, such as progress estimation and dynamic work rebalancing.
-   *
-   * <h3>Boundedness</h3>
-   * <p>Once {@link #start} or {@link #advance} has returned false, neither will be called
-   * again on this object.
-   *
-   * <h3>Thread safety</h3>
-   * All methods will be run from the same thread except {@link #splitAtFraction},
-   * {@link #getFractionConsumed} and {@link #getCurrentSource}, which can be called concurrently
-   * from a different thread. There will not be multiple concurrent calls to
-   * {@link #splitAtFraction} but there can be for {@link #getFractionConsumed} if
-   * {@link #splitAtFraction} is implemented.
-   *
-   * <p>If the source does not implement {@link #splitAtFraction}, you do not need to worry about
-   * thread safety. If implemented, it must be safe to call {@link #splitAtFraction} and
-   * {@link #getFractionConsumed} concurrently with other methods.
-   *
-   * <p>Additionally, a successful {@link #splitAtFraction} call must, by definition, cause
-   * {@link #getCurrentSource} to start returning a different value.
-   * Callers of {@link #getCurrentSource} need to be aware of the possibility that the returned
-   * value can change at any time, and must only access the properties of the source returned by
-   * {@link #getCurrentSource} which do not change between {@link #splitAtFraction} calls.
-   *
-   * <h3>Implementing {@link #splitAtFraction}</h3>
-   * In the course of dynamic work rebalancing, the method {@link #splitAtFraction}
-   * may be called concurrently with {@link #advance} or {@link #start}. It is critical that
-   * their interaction is implemented in a thread-safe way, otherwise data loss is possible.
-   *
-   * <p>Sources which support dynamic work rebalancing should use
-   * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} to manage the (source-specific)
-   * range of positions that is being split. If your source supports dynamic work rebalancing,
-   * please use that class to implement it if possible; if not possible, please contact the team
-   * at <i>dataflow-feedback@google.com</i>.
-   */
-  @Experimental(Experimental.Kind.SOURCE_SINK)
-  public abstract static class BoundedReader<T> extends Source.Reader<T> {
-    /**
-     * Returns a value in [0, 1] representing approximately what fraction of the
-     * {@link #getCurrentSource current source} this reader has read so far, or {@code null} if such
-     * an estimate is not available.
-     *
-     * <p>It is recommended that this method should satisfy the following properties:
-     * <ul>
-     *   <li>Should return 0 before the {@link #start} call.
-     *   <li>Should return 1 after a {@link #start} or {@link #advance} call that returns false.
-     *   <li>The returned values should be non-decreasing (though they don't have to be unique).
-     * </ul>
-     *
-     * <p>By default, returns null to indicate that this cannot be estimated.
-     *
-     * <h5>Thread safety</h5>
-     * If {@link #splitAtFraction} is implemented, this method can be called concurrently to other
-     * methods (including itself), and it is therefore critical for it to be implemented
-     * in a thread-safe way.
-     */
-    public Double getFractionConsumed() {
-      return null;
-    }
-
-    /**
-     * Returns a {@code Source} describing the same input that this {@code Reader} currently reads
-     * (including items already read).
-     *
-     * <h3>Usage</h3>
-     * <p>Reader subclasses can use this method for convenience to access unchanging properties of
-     * the source being read. Alternatively, they can cache these properties in the constructor.
-     * <p>The framework will call this method in the course of dynamic work rebalancing, e.g. after
-     * a successful {@link BoundedSource.BoundedReader#splitAtFraction} call.
-     *
-     * <h3>Mutability and thread safety</h3>
-     * Remember that {@link Source} objects must always be immutable. However, the return value of
-     * this function may be affected by dynamic work rebalancing, happening asynchronously via
-     * {@link BoundedSource.BoundedReader#splitAtFraction}, meaning it can return a different
-     * {@link Source} object. However, the returned object itself will still itself be immutable.
-     * Callers must take care not to rely on properties of the returned source that may be
-     * asynchronously changed as a result of this process (e.g. do not cache an end offset when
-     * reading a file).
-     *
-     * <h3>Implementation</h3>
-     * For convenience, subclasses should usually return the most concrete subclass of
-     * {@link Source} possible.
-     * In practice, the implementation of this method should nearly always be one of the following:
-     * <ul>
-     *   <li>Source that inherits from a base class that already implements
-     *   {@link #getCurrentSource}: delegate to base class. In this case, it is almost always
-     *   an error for the subclass to maintain its own copy of the source.
-     * <pre>{@code
-     *   public FooReader(FooSource<T> source) {
-     *     super(source);
-     *   }
-     *
-     *   public FooSource<T> getCurrentSource() {
-     *     return (FooSource<T>)super.getCurrentSource();
-     *   }
-     * }</pre>
-     *   <li>Source that does not support dynamic work rebalancing: return a private final variable.
-     * <pre>{@code
-     *   private final FooSource<T> source;
-     *
-     *   public FooReader(FooSource<T> source) {
-     *     this.source = source;
-     *   }
-     *
-     *   public FooSource<T> getCurrentSource() {
-     *     return source;
-     *   }
-     * }</pre>
-     *   <li>{@link BoundedSource.BoundedReader} that explicitly supports dynamic work rebalancing:
-     *   maintain a variable pointing to an immutable source object, and protect it with
-     *   synchronization.
-     * <pre>{@code
-     *   private FooSource<T> source;
-     *
-     *   public FooReader(FooSource<T> source) {
-     *     this.source = source;
-     *   }
-     *
-     *   public synchronized FooSource<T> getCurrentSource() {
-     *     return source;
-     *   }
-     *
-     *   public synchronized FooSource<T> splitAtFraction(double fraction) {
-     *     ...
-     *     FooSource<T> primary = ...;
-     *     FooSource<T> residual = ...;
-     *     this.source = primary;
-     *     return residual;
-     *   }
-     * }</pre>
-     * </ul>
-     */
-    @Override
-    public abstract BoundedSource<T> getCurrentSource();
-
-    /**
-     * Tells the reader to narrow the range of the input it's going to read and give up
-     * the remainder, so that the new range would contain approximately the given
-     * fraction of the amount of data in the current range.
-     *
-     * <p>Returns a {@code BoundedSource} representing the remainder.
-     *
-     * <h5>Detailed description</h5>
-     * Assuming the following sequence of calls:
-     * <pre>{@code
-     *   BoundedSource<T> initial = reader.getCurrentSource();
-     *   BoundedSource<T> residual = reader.splitAtFraction(fraction);
-     *   BoundedSource<T> primary = reader.getCurrentSource();
-     * }</pre>
-     * <ul>
-     *  <li> The "primary" and "residual" sources, when read, should together cover the same
-     *  set of records as "initial".
-     *  <li> The current reader should continue to be in a valid state, and continuing to read
-     *  from it should, together with the records it already read, yield the same records
-     *  as would have been read by "primary".
-     *  <li> The amount of data read by "primary" should ideally represent approximately
-     *  the given fraction of the amount of data read by "initial".
-     * </ul>
-     * For example, a reader that reads a range of offsets <i>[A, B)</i> in a file might implement
-     * this method by truncating the current range to <i>[A, A + fraction*(B-A))</i> and returning
-     * a Source representing the range <i>[A + fraction*(B-A), B)</i>.
-     *
-     * <p>This method should return {@code null} if the split cannot be performed for this fraction
-     * while satisfying the semantics above. E.g., a reader that reads a range of offsets
-     * in a file should return {@code null} if it is already past the position in its range
-     * corresponding to the given fraction. In this case, the method MUST have no effect
-     * (the reader must behave as if the method hadn't been called at all).
-     *
-     * <h5>Statefulness</h5>
-     * Since this method (if successful) affects the reader's source, in subsequent invocations
-     * "fraction" should be interpreted relative to the new current source.
-     *
-     * <h5>Thread safety and blocking</h5>
-     * This method will be called concurrently to other methods (however there will not be multiple
-     * concurrent invocations of this method itself), and it is critical for it to be implemented
-     * in a thread-safe way (otherwise data loss is possible).
-     *
-     * <p>It is also very important that this method always completes quickly. In particular,
-     * it should not perform or wait on any blocking operations such as I/O, RPCs etc. Violating
-     * this requirement may stall completion of the work item or even cause it to fail.
-     *
-     * <p>It is incorrect to make both this method and {@link #start}/{@link #advance}
-     * {@code synchronized}, because those methods can perform blocking operations, and then
-     * this method would have to wait for those calls to complete.
-     *
-     * <p>{@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} makes it easy to implement
-     * this method safely and correctly.
-     *
-     * <p>By default, returns null to indicate that splitting is not possible.
-     */
-    public BoundedSource<T> splitAtFraction(double fraction) {
-      return null;
-    }
-
-    /**
-     * By default, returns the minimum possible timestamp.
-     */
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      return BoundedWindow.TIMESTAMP_MIN_VALUE;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
deleted file mode 100644
index e3dca91..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CompressedSource.java
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.common.base.Preconditions;
-import com.google.common.io.ByteStreams;
-import com.google.common.primitives.Ints;
-
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
-import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
-
-import java.io.IOException;
-import java.io.PushbackInputStream;
-import java.io.Serializable;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import java.util.NoSuchElementException;
-import java.util.zip.GZIPInputStream;
-
-/**
- * A Source that reads from compressed files. A {@code CompressedSources} wraps a delegate
- * {@link FileBasedSource} that is able to read the decompressed file format.
- *
- * <p>For example, use the following to read from a gzip-compressed XML file:
- *
- * <pre> {@code
- * XmlSource mySource = XmlSource.from(...);
- * PCollection<T> collection = p.apply(Read.from(CompressedSource
- *     .from(mySource)
- *     .withDecompression(CompressedSource.CompressionMode.GZIP)));
- * } </pre>
- *
- * <p>Supported compression algorithms are {@link CompressionMode#GZIP} and
- * {@link CompressionMode#BZIP2}. User-defined compression types are supported by implementing
- * {@link DecompressingChannelFactory}.
- *
- * <p>By default, the compression algorithm is selected from those supported in
- * {@link CompressionMode} based on the file name provided to the source, namely
- * {@code ".bz2"} indicates {@link CompressionMode#BZIP2} and {@code ".gz"} indicates
- * {@link CompressionMode#GZIP}. If the file name does not match any of the supported
- * algorithms, it is assumed to be uncompressed data.
- *
- * @param <T> The type to read from the compressed file.
- */
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public class CompressedSource<T> extends FileBasedSource<T> {
-  /**
-   * Factory interface for creating channels that decompress the content of an underlying channel.
-   */
-  public static interface DecompressingChannelFactory extends Serializable {
-    /**
-     * Given a channel, create a channel that decompresses the content read from the channel.
-     * @throws IOException
-     */
-    public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
-        throws IOException;
-  }
-
-  /**
-   * Factory interface for creating channels that decompress the content of an underlying channel,
-   * based on both the channel and the file name.
-   */
-  private static interface FileNameBasedDecompressingChannelFactory
-      extends DecompressingChannelFactory {
-    /**
-     * Given a channel, create a channel that decompresses the content read from the channel.
-     * @throws IOException
-     */
-    ReadableByteChannel createDecompressingChannel(String fileName, ReadableByteChannel channel)
-        throws IOException;
-
-    /**
-     * Given a file name, returns true if the file name matches any supported compression
-     * scheme.
-     */
-    boolean isCompressed(String fileName);
-  }
-
-  /**
-   * Default compression types supported by the {@code CompressedSource}.
-   */
-  public enum CompressionMode implements DecompressingChannelFactory {
-    /**
-     * Reads a byte channel assuming it is compressed with gzip.
-     */
-    GZIP {
-      @Override
-      public boolean matches(String fileName) {
-          return fileName.toLowerCase().endsWith(".gz");
-      }
-
-      @Override
-      public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
-          throws IOException {
-        // Determine if the input stream is gzipped. The input stream returned from the
-        // GCS connector may already be decompressed; GCS does this based on the
-        // content-encoding property.
-        PushbackInputStream stream = new PushbackInputStream(Channels.newInputStream(channel), 2);
-        byte[] headerBytes = new byte[2];
-        int bytesRead = ByteStreams.read(
-            stream /* source */, headerBytes /* dest */, 0 /* offset */, 2 /* len */);
-        stream.unread(headerBytes, 0, bytesRead);
-        if (bytesRead >= 2) {
-          byte zero = 0x00;
-          int header = Ints.fromBytes(zero, zero, headerBytes[1], headerBytes[0]);
-          if (header == GZIPInputStream.GZIP_MAGIC) {
-            return Channels.newChannel(new GzipCompressorInputStream(stream));
-          }
-        }
-        return Channels.newChannel(stream);
-      }
-    },
-
-    /**
-     * Reads a byte channel assuming it is compressed with bzip2.
-     */
-    BZIP2 {
-      @Override
-      public boolean matches(String fileName) {
-          return fileName.toLowerCase().endsWith(".bz2");
-      }
-
-      @Override
-      public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
-          throws IOException {
-        return Channels.newChannel(
-            new BZip2CompressorInputStream(Channels.newInputStream(channel)));
-      }
-    };
-
-    /**
-     * Returns {@code true} if the given file name implies that the contents are compressed
-     * according to the compression embodied by this factory.
-     */
-    public abstract boolean matches(String fileName);
-
-    @Override
-    public abstract ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
-        throws IOException;
-  }
-
-  /**
-   * Reads a byte channel detecting compression according to the file name. If the filename
-   * is not any other known {@link CompressionMode}, it is presumed to be uncompressed.
-   */
-  private static class DecompressAccordingToFilename
-      implements FileNameBasedDecompressingChannelFactory {
-
-    @Override
-    public ReadableByteChannel createDecompressingChannel(
-        String fileName, ReadableByteChannel channel) throws IOException {
-      for (CompressionMode type : CompressionMode.values()) {
-        if (type.matches(fileName)) {
-          return type.createDecompressingChannel(channel);
-        }
-      }
-      // Uncompressed
-      return channel;
-    }
-
-    @Override
-    public ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel) {
-      throw new UnsupportedOperationException(
-          String.format("%s does not support createDecompressingChannel(%s) but only"
-              + " createDecompressingChannel(%s,%s)",
-              getClass().getSimpleName(),
-              String.class.getSimpleName(),
-              ReadableByteChannel.class.getSimpleName(),
-              ReadableByteChannel.class.getSimpleName()));
-    }
-
-    @Override
-    public boolean isCompressed(String fileName) {
-      for (CompressionMode type : CompressionMode.values()) {
-        if  (type.matches(fileName)) {
-          return true;
-        }
-      }
-      return false;
-    }
-  }
-
-  private final FileBasedSource<T> sourceDelegate;
-  private final DecompressingChannelFactory channelFactory;
-
-  /**
-   * Creates a {@link Read} transform that reads from that reads from the underlying
-   * {@link FileBasedSource} {@code sourceDelegate} after decompressing it with a {@link
-   * DecompressingChannelFactory}.
-   */
-  public static <T> Read.Bounded<T> readFromSource(
-      FileBasedSource<T> sourceDelegate, DecompressingChannelFactory channelFactory) {
-    return Read.from(new CompressedSource<>(sourceDelegate, channelFactory));
-  }
-
-  /**
-   * Creates a {@code CompressedSource} from an underlying {@code FileBasedSource}. The type
-   * of compression used will be based on the file name extension unless explicitly
-   * configured via {@link CompressedSource#withDecompression}.
-   */
-  public static <T> CompressedSource<T> from(FileBasedSource<T> sourceDelegate) {
-    return new CompressedSource<>(sourceDelegate, new DecompressAccordingToFilename());
-  }
-
-  /**
-   * Return a {@code CompressedSource} that is like this one but will decompress its underlying file
-   * with the given {@link DecompressingChannelFactory}.
-   */
-  public CompressedSource<T> withDecompression(DecompressingChannelFactory channelFactory) {
-    return new CompressedSource<>(this.sourceDelegate, channelFactory);
-  }
-
-  /**
-   * Creates a {@code CompressedSource} from a delegate file based source and a decompressing
-   * channel factory.
-   */
-  private CompressedSource(
-      FileBasedSource<T> sourceDelegate, DecompressingChannelFactory channelFactory) {
-    super(sourceDelegate.getFileOrPatternSpec(), Long.MAX_VALUE);
-    this.sourceDelegate = sourceDelegate;
-    this.channelFactory = channelFactory;
-  }
-
-  /**
-   * Creates a {@code CompressedSource} for an individual file. Used by {@link
-   * CompressedSource#createForSubrangeOfFile}.
-   */
-  private CompressedSource(FileBasedSource<T> sourceDelegate,
-      DecompressingChannelFactory channelFactory, String filePatternOrSpec, long minBundleSize,
-      long startOffset, long endOffset) {
-    super(filePatternOrSpec, minBundleSize, startOffset, endOffset);
-    Preconditions.checkArgument(
-        startOffset == 0,
-        "CompressedSources must start reading at offset 0. Requested offset: " + startOffset);
-    this.sourceDelegate = sourceDelegate;
-    this.channelFactory = channelFactory;
-  }
-
-  /**
-   * Validates that the delegate source is a valid source and that the channel factory is not null.
-   */
-  @Override
-  public void validate() {
-    super.validate();
-    Preconditions.checkNotNull(sourceDelegate);
-    sourceDelegate.validate();
-    Preconditions.checkNotNull(channelFactory);
-  }
-
-  /**
-   * Creates a {@code CompressedSource} for a subrange of a file. Called by superclass to create a
-   * source for a single file.
-   */
-  @Override
-  protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
-    return new CompressedSource<>(sourceDelegate.createForSubrangeOfFile(fileName, start, end),
-        channelFactory, fileName, Long.MAX_VALUE, start, end);
-  }
-
-  /**
-   * Determines whether a single file represented by this source is splittable. Returns true
-   * if we are using the default decompression factory and and it determines
-   * from the requested file name that the file is not compressed.
-   */
-  @Override
-  protected final boolean isSplittable() throws Exception {
-    if (channelFactory instanceof FileNameBasedDecompressingChannelFactory) {
-      FileNameBasedDecompressingChannelFactory fileNameBasedChannelFactory =
-          (FileNameBasedDecompressingChannelFactory) channelFactory;
-      return !fileNameBasedChannelFactory.isCompressed(getFileOrPatternSpec());
-    }
-    return true;
-  }
-
-  /**
-   * Creates a {@code FileBasedReader} to read a single file.
-   *
-   * <p>Uses the delegate source to create a single file reader for the delegate source.
-   * Utilizes the default decompression channel factory to not wrap the source reader
-   * if the file name does not represent a compressed file allowing for splitting of
-   * the source.
-   */
-  @Override
-  protected final FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
-    if (channelFactory instanceof FileNameBasedDecompressingChannelFactory) {
-      FileNameBasedDecompressingChannelFactory fileNameBasedChannelFactory =
-          (FileNameBasedDecompressingChannelFactory) channelFactory;
-      if (!fileNameBasedChannelFactory.isCompressed(getFileOrPatternSpec())) {
-        return sourceDelegate.createSingleFileReader(options);
-      }
-    }
-    return new CompressedReader<T>(
-        this, sourceDelegate.createSingleFileReader(options));
-  }
-
-  /**
-   * Returns whether the delegate source produces sorted keys.
-   */
-  @Override
-  public final boolean producesSortedKeys(PipelineOptions options) throws Exception {
-    return sourceDelegate.producesSortedKeys(options);
-  }
-
-  /**
-   * Returns the delegate source's default output coder.
-   */
-  @Override
-  public final Coder<T> getDefaultOutputCoder() {
-    return sourceDelegate.getDefaultOutputCoder();
-  }
-
-  public final DecompressingChannelFactory getChannelFactory() {
-    return channelFactory;
-  }
-
-  /**
-   * Reader for a {@link CompressedSource}. Decompresses its input and uses a delegate
-   * reader to read elements from the decompressed input.
-   * @param <T> The type of records read from the source.
-   */
-  public static class CompressedReader<T> extends FileBasedReader<T> {
-
-    private final FileBasedReader<T> readerDelegate;
-    private final CompressedSource<T> source;
-    private int numRecordsRead;
-
-    /**
-     * Create a {@code CompressedReader} from a {@code CompressedSource} and delegate reader.
-     */
-    public CompressedReader(CompressedSource<T> source, FileBasedReader<T> readerDelegate) {
-      super(source);
-      this.source = source;
-      this.readerDelegate = readerDelegate;
-    }
-
-    /**
-     * Gets the current record from the delegate reader.
-     */
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      return readerDelegate.getCurrent();
-    }
-
-    /**
-     * Returns true only for the first record; compressed sources cannot be split.
-     */
-    @Override
-    protected final boolean isAtSplitPoint() {
-      // We have to return true for the first record, but not for the state before reading it,
-      // and not for the state after reading any other record. Hence == rather than >= or <=.
-      // This is required because FileBasedReader is intended for readers that can read a range
-      // of offsets in a file and where the range can be split in parts. CompressedReader,
-      // however, is a degenerate case because it cannot be split, but it has to satisfy the
-      // semantics of offsets and split points anyway.
-      return numRecordsRead == 1;
-    }
-
-    /**
-     * Creates a decompressing channel from the input channel and passes it to its delegate reader's
-     * {@link FileBasedReader#startReading(ReadableByteChannel)}.
-     */
-    @Override
-    protected final void startReading(ReadableByteChannel channel) throws IOException {
-      if (source.getChannelFactory() instanceof FileNameBasedDecompressingChannelFactory) {
-        FileNameBasedDecompressingChannelFactory channelFactory =
-            (FileNameBasedDecompressingChannelFactory) source.getChannelFactory();
-        readerDelegate.startReading(channelFactory.createDecompressingChannel(
-            getCurrentSource().getFileOrPatternSpec(),
-            channel));
-      } else {
-        readerDelegate.startReading(source.getChannelFactory().createDecompressingChannel(
-            channel));
-      }
-    }
-
-    /**
-     * Reads the next record via the delegate reader.
-     */
-    @Override
-    protected final boolean readNextRecord() throws IOException {
-      if (!readerDelegate.readNextRecord()) {
-        return false;
-      }
-      ++numRecordsRead;
-      return true;
-    }
-
-    /**
-     * Returns the delegate reader's current offset in the decompressed input.
-     */
-    @Override
-    protected final long getCurrentOffset() {
-      return readerDelegate.getCurrentOffset();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingInput.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingInput.java
deleted file mode 100644
index 07609ba..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingInput.java
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.io.CountingSource.NowTimestampFn;
-import com.google.cloud.dataflow.sdk.io.Read.Unbounded;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.common.base.Optional;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-/**
- * A {@link PTransform} that produces longs. When used to produce a
- * {@link IsBounded#BOUNDED bounded} {@link PCollection}, {@link CountingInput} starts at {@code 0}
- * and counts up to a specified maximum. When used to produce an
- * {@link IsBounded#UNBOUNDED unbounded} {@link PCollection}, it counts up to {@link Long#MAX_VALUE}
- * and then never produces more output. (In practice, this limit should never be reached.)
- *
- * <p>The bounded {@link CountingInput} is implemented based on {@link OffsetBasedSource} and
- * {@link OffsetBasedSource.OffsetBasedReader}, so it performs efficient initial splitting and it
- * supports dynamic work rebalancing.
- *
- * <p>To produce a bounded {@code PCollection<Long>}, use {@link CountingInput#upTo(long)}:
- *
- * <pre>{@code
- * Pipeline p = ...
- * PTransform<PBegin, PCollection<Long>> producer = CountingInput.upTo(1000);
- * PCollection<Long> bounded = p.apply(producer);
- * }</pre>
- *
- * <p>To produce an unbounded {@code PCollection<Long>}, use {@link CountingInput#unbounded()},
- * calling {@link UnboundedCountingInput#withTimestampFn(SerializableFunction)} to provide values
- * with timestamps other than {@link Instant#now}.
- *
- * <pre>{@code
- * Pipeline p = ...
- *
- * // To create an unbounded producer that uses processing time as the element timestamp.
- * PCollection<Long> unbounded = p.apply(CountingInput.unbounded());
- * // Or, to create an unbounded source that uses a provided function to set the element timestamp.
- * PCollection<Long> unboundedWithTimestamps =
- *     p.apply(CountingInput.unbounded().withTimestampFn(someFn));
- * }</pre>
- */
-public class CountingInput {
-  /**
-   * Creates a {@link BoundedCountingInput} that will produce the specified number of elements,
-   * from {@code 0} to {@code numElements - 1}.
-   */
-  public static BoundedCountingInput upTo(long numElements) {
-    checkArgument(numElements > 0, "numElements (%s) must be greater than 0", numElements);
-    return new BoundedCountingInput(numElements);
-  }
-
-  /**
-   * Creates an {@link UnboundedCountingInput} that will produce numbers starting from {@code 0} up
-   * to {@link Long#MAX_VALUE}.
-   *
-   * <p>After {@link Long#MAX_VALUE}, the transform never produces more output. (In practice, this
-   * limit should never be reached.)
-   *
-   * <p>Elements in the resulting {@link PCollection PCollection&lt;Long&gt;} will by default have
-   * timestamps corresponding to processing time at element generation, provided by
-   * {@link Instant#now}. Use the transform returned by
-   * {@link UnboundedCountingInput#withTimestampFn(SerializableFunction)} to control the output
-   * timestamps.
-   */
-  public static UnboundedCountingInput unbounded() {
-    return new UnboundedCountingInput(
-        new NowTimestampFn(), Optional.<Long>absent(), Optional.<Duration>absent());
-  }
-
-  /**
-   * A {@link PTransform} that will produce a specified number of {@link Long Longs} starting from
-   * 0.
-   */
-  public static class BoundedCountingInput extends PTransform<PBegin, PCollection<Long>> {
-    private final long numElements;
-
-    private BoundedCountingInput(long numElements) {
-      this.numElements = numElements;
-    }
-
-    @SuppressWarnings("deprecation")
-    @Override
-    public PCollection<Long> apply(PBegin begin) {
-      return begin.apply(Read.from(CountingSource.upTo(numElements)));
-    }
-  }
-
-  /**
-   * A {@link PTransform} that will produce numbers starting from {@code 0} up to
-   * {@link Long#MAX_VALUE}.
-   *
-   * <p>After {@link Long#MAX_VALUE}, the transform never produces more output. (In practice, this
-   * limit should never be reached.)
-   *
-   * <p>Elements in the resulting {@link PCollection PCollection&lt;Long&gt;} will by default have
-   * timestamps corresponding to processing time at element generation, provided by
-   * {@link Instant#now}. Use the transform returned by
-   * {@link UnboundedCountingInput#withTimestampFn(SerializableFunction)} to control the output
-   * timestamps.
-   */
-  public static class UnboundedCountingInput extends PTransform<PBegin, PCollection<Long>> {
-    private final SerializableFunction<Long, Instant> timestampFn;
-    private final Optional<Long> maxNumRecords;
-    private final Optional<Duration> maxReadTime;
-
-    private UnboundedCountingInput(
-        SerializableFunction<Long, Instant> timestampFn,
-        Optional<Long> maxNumRecords,
-        Optional<Duration> maxReadTime) {
-      this.timestampFn = timestampFn;
-      this.maxNumRecords = maxNumRecords;
-      this.maxReadTime = maxReadTime;
-    }
-
-    /**
-     * Returns an {@link UnboundedCountingInput} like this one, but where output elements have the
-     * timestamp specified by the timestampFn.
-     *
-     * <p>Note that the timestamps produced by {@code timestampFn} may not decrease.
-     */
-    public UnboundedCountingInput withTimestampFn(SerializableFunction<Long, Instant> timestampFn) {
-      return new UnboundedCountingInput(timestampFn, maxNumRecords, maxReadTime);
-    }
-
-    /**
-     * Returns an {@link UnboundedCountingInput} like this one, but that will read at most the
-     * specified number of elements.
-     *
-     * <p>A bounded amount of elements will be produced by the result transform, and the result
-     * {@link PCollection} will be {@link IsBounded#BOUNDED bounded}.
-     */
-    public UnboundedCountingInput withMaxNumRecords(long maxRecords) {
-      checkArgument(
-          maxRecords > 0, "MaxRecords must be a positive (nonzero) value. Got %s", maxRecords);
-      return new UnboundedCountingInput(timestampFn, Optional.of(maxRecords), maxReadTime);
-    }
-
-    /**
-     * Returns an {@link UnboundedCountingInput} like this one, but that will read for at most the
-     * specified amount of time.
-     *
-     * <p>A bounded amount of elements will be produced by the result transform, and the result
-     * {@link PCollection} will be {@link IsBounded#BOUNDED bounded}.
-     */
-    public UnboundedCountingInput withMaxReadTime(Duration readTime) {
-      checkNotNull(readTime, "ReadTime cannot be null");
-      return new UnboundedCountingInput(timestampFn, maxNumRecords, Optional.of(readTime));
-    }
-
-    @SuppressWarnings("deprecation")
-    @Override
-    public PCollection<Long> apply(PBegin begin) {
-      Unbounded<Long> read = Read.from(CountingSource.unboundedWithTimestampFn(timestampFn));
-      if (!maxNumRecords.isPresent() && !maxReadTime.isPresent()) {
-        return begin.apply(read);
-      } else if (maxNumRecords.isPresent() && !maxReadTime.isPresent()) {
-        return begin.apply(read.withMaxNumRecords(maxNumRecords.get()));
-      } else if (!maxNumRecords.isPresent() && maxReadTime.isPresent()) {
-        return begin.apply(read.withMaxReadTime(maxReadTime.get()));
-      } else {
-        return begin.apply(
-            read.withMaxReadTime(maxReadTime.get()).withMaxNumRecords(maxNumRecords.get()));
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java
deleted file mode 100644
index 412f3a7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/CountingSource.java
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.io.CountingInput.UnboundedCountingInput;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * A source that produces longs. When used as a {@link BoundedSource}, {@link CountingSource}
- * starts at {@code 0} and counts up to a specified maximum. When used as an
- * {@link UnboundedSource}, it counts up to {@link Long#MAX_VALUE} and then never produces more
- * output. (In practice, this limit should never be reached.)
- *
- * <p>The bounded {@link CountingSource} is implemented based on {@link OffsetBasedSource} and
- * {@link OffsetBasedSource.OffsetBasedReader}, so it performs efficient initial splitting and it
- * supports dynamic work rebalancing.
- *
- * <p>To produce a bounded {@code PCollection<Long>}, use {@link CountingSource#upTo(long)}:
- *
- * <pre>{@code
- * Pipeline p = ...
- * PTransform<PBegin, PCollection<Long>> producer = CountingInput.upTo(1000);
- * PCollection<Long> bounded = p.apply(producer);
- * }</pre>
- *
- * <p>To produce an unbounded {@code PCollection<Long>}, use {@link CountingInput#unbounded()},
- * calling {@link UnboundedCountingInput#withTimestampFn(SerializableFunction)} to provide values
- * with timestamps other than {@link Instant#now}.
- *
- * <pre>{@code
- * Pipeline p = ...
- *
- * // To create an unbounded PCollection that uses processing time as the element timestamp.
- * PCollection<Long> unbounded = p.apply(CountingInput.unbounded());
- * // Or, to create an unbounded source that uses a provided function to set the element timestamp.
- * PCollection<Long> unboundedWithTimestamps =
- *     p.apply(CountingInput.unbounded().withTimestampFn(someFn));
- *
- * }</pre>
- */
-public class CountingSource {
-  /**
-   * Creates a {@link BoundedSource} that will produce the specified number of elements,
-   * from {@code 0} to {@code numElements - 1}.
-   *
-   * @deprecated use {@link CountingInput#upTo(long)} instead
-   */
-  @Deprecated
-  public static BoundedSource<Long> upTo(long numElements) {
-    checkArgument(numElements > 0, "numElements (%s) must be greater than 0", numElements);
-    return new BoundedCountingSource(0, numElements);
-  }
-
-  /**
-   * Creates an {@link UnboundedSource} that will produce numbers starting from {@code 0} up to
-   * {@link Long#MAX_VALUE}.
-   *
-   * <p>After {@link Long#MAX_VALUE}, the source never produces more output. (In practice, this
-   * limit should never be reached.)
-   *
-   * <p>Elements in the resulting {@link PCollection PCollection&lt;Long&gt;} will have timestamps
-   * corresponding to processing time at element generation, provided by {@link Instant#now}.
-   *
-   * @deprecated use {@link CountingInput#unbounded()} instead
-   */
-  @Deprecated
-  public static UnboundedSource<Long, CounterMark> unbounded() {
-    return unboundedWithTimestampFn(new NowTimestampFn());
-  }
-
-  /**
-   * Creates an {@link UnboundedSource} that will produce numbers starting from {@code 0} up to
-   * {@link Long#MAX_VALUE}, with element timestamps supplied by the specified function.
-   *
-   * <p>After {@link Long#MAX_VALUE}, the source never produces more output. (In practice, this
-   * limit should never be reached.)
-   *
-   * <p>Note that the timestamps produced by {@code timestampFn} may not decrease.
-   *
-   * @deprecated use {@link CountingInput#unbounded()} and call
-   *             {@link UnboundedCountingInput#withTimestampFn(SerializableFunction)} instead
-   */
-  @Deprecated
-  public static UnboundedSource<Long, CounterMark> unboundedWithTimestampFn(
-      SerializableFunction<Long, Instant> timestampFn) {
-    return new UnboundedCountingSource(0, 1, timestampFn);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////////////////////
-
-  /** Prevent instantiation. */
-  private CountingSource() {}
-
-  /**
-   * A function that returns {@link Instant#now} as the timestamp for each generated element.
-   */
-  static class NowTimestampFn implements SerializableFunction<Long, Instant> {
-    @Override
-    public Instant apply(Long input) {
-      return Instant.now();
-    }
-  }
-
-  /**
-   * An implementation of {@link CountingSource} that produces a bounded {@link PCollection}.
-   * It is implemented on top of {@link OffsetBasedSource} (with associated reader
-   * {@link BoundedCountingReader}) and performs efficient initial splitting and supports dynamic
-   * work rebalancing.
-   */
-  private static class BoundedCountingSource extends OffsetBasedSource<Long> {
-    /**
-     * Creates a {@link BoundedCountingSource} that generates the numbers in the specified
-     * {@code [start, end)} range.
-     */
-    public BoundedCountingSource(long start, long end) {
-      super(start, end, 1 /* can be split every 1 offset */);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////////////////////
-
-    @Override
-    public long getBytesPerOffset() {
-      return 8;
-    }
-
-    @Override
-    public long getMaxEndOffset(PipelineOptions options) throws Exception {
-      return getEndOffset();
-    }
-
-    @Override
-    public OffsetBasedSource<Long> createSourceForSubrange(long start, long end) {
-      return new BoundedCountingSource(start, end);
-    }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-      return true;
-    }
-
-    @Override
-    public com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader<Long> createReader(
-        PipelineOptions options) throws IOException {
-      return new BoundedCountingReader(this);
-    }
-
-    @Override
-    public Coder<Long> getDefaultOutputCoder() {
-      return VarLongCoder.of();
-    }
-  }
-
-  /**
-   * The reader associated with {@link BoundedCountingSource}.
-   *
-   * @see BoundedCountingSource
-   */
-  private static class BoundedCountingReader extends OffsetBasedSource.OffsetBasedReader<Long> {
-    private long current;
-
-    public BoundedCountingReader(OffsetBasedSource<Long> source) {
-      super(source);
-    }
-
-    @Override
-    protected long getCurrentOffset() throws NoSuchElementException {
-      return current;
-    }
-
-    @Override
-    public synchronized BoundedCountingSource getCurrentSource()  {
-      return (BoundedCountingSource) super.getCurrentSource();
-    }
-
-    @Override
-    public Long getCurrent() throws NoSuchElementException {
-      return current;
-    }
-
-    @Override
-    protected boolean startImpl() throws IOException {
-      current = getCurrentSource().getStartOffset();
-      return true;
-    }
-
-    @Override
-    protected boolean advanceImpl() throws IOException {
-      current++;
-      return true;
-    }
-
-    @Override
-    public void close() throws IOException {}
-  }
-
-  /**
-   * An implementation of {@link CountingSource} that produces an unbounded {@link PCollection}.
-   */
-  private static class UnboundedCountingSource extends UnboundedSource<Long, CounterMark> {
-    /** The first number (>= 0) generated by this {@link UnboundedCountingSource}. */
-    private final long start;
-    /** The interval between numbers generated by this {@link UnboundedCountingSource}. */
-    private final long stride;
-    /** The function used to produce timestamps for the generated elements. */
-    private final SerializableFunction<Long, Instant> timestampFn;
-
-    /**
-     * Creates an {@link UnboundedSource} that will produce numbers starting from {@code 0} up to
-     * {@link Long#MAX_VALUE}, with element timestamps supplied by the specified function.
-     *
-     * <p>After {@link Long#MAX_VALUE}, the source never produces more output. (In practice, this
-     * limit should never be reached.)
-     *
-     * <p>Note that the timestamps produced by {@code timestampFn} may not decrease.
-     */
-    public UnboundedCountingSource(
-        long start, long stride, SerializableFunction<Long, Instant> timestampFn) {
-      this.start = start;
-      this.stride = stride;
-      this.timestampFn = timestampFn;
-    }
-
-    /**
-     * Splits an unbounded source {@code desiredNumSplits} ways by giving each split every
-     * {@code desiredNumSplits}th element that this {@link UnboundedCountingSource}
-     * produces.
-     *
-     * <p>E.g., if a source produces all even numbers {@code [0, 2, 4, 6, 8, ...)} and we want to
-     * split into 3 new sources, then the new sources will produce numbers that are 6 apart and
-     * are offset at the start by the original stride: {@code [0, 6, 12, ...)},
-     * {@code [2, 8, 14, ...)}, and {@code [4, 10, 16, ...)}.
-     */
-    @Override
-    public List<? extends UnboundedSource<Long, CountingSource.CounterMark>> generateInitialSplits(
-        int desiredNumSplits, PipelineOptions options) throws Exception {
-      // Using Javadoc example, stride 2 with 3 splits becomes stride 6.
-      long newStride = stride * desiredNumSplits;
-
-      ImmutableList.Builder<UnboundedCountingSource> splits = ImmutableList.builder();
-      for (int i = 0; i < desiredNumSplits; ++i) {
-        // Starts offset by the original stride. Using Javadoc example, this generates starts of
-        // 0, 2, and 4.
-        splits.add(new UnboundedCountingSource(start + i * stride, newStride, timestampFn));
-      }
-      return splits.build();
-    }
-
-    @Override
-    public UnboundedReader<Long> createReader(
-        PipelineOptions options, CounterMark checkpointMark) {
-      return new UnboundedCountingReader(this, checkpointMark);
-    }
-
-    @Override
-    public Coder<CountingSource.CounterMark> getCheckpointMarkCoder() {
-      return AvroCoder.of(CountingSource.CounterMark.class);
-    }
-
-    @Override
-    public void validate() {}
-
-    @Override
-    public Coder<Long> getDefaultOutputCoder() {
-      return VarLongCoder.of();
-    }
-  }
-
-  /**
-   * The reader associated with {@link UnboundedCountingSource}.
-   *
-   * @see UnboundedCountingSource
-   */
-  private static class UnboundedCountingReader extends UnboundedReader<Long> {
-    private UnboundedCountingSource source;
-    private long current;
-    private Instant currentTimestamp;
-
-    public UnboundedCountingReader(UnboundedCountingSource source, CounterMark mark) {
-      this.source = source;
-      if (mark == null) {
-        // Because we have not emitted an element yet, and start() calls advance, we need to
-        // "un-advance" so that start() produces the correct output.
-        this.current = source.start - source.stride;
-      } else {
-        this.current = mark.getLastEmitted();
-      }
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      return advance();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      // Overflow-safe check that (current + source.stride) <= LONG.MAX_VALUE. Else, stop producing.
-      if (Long.MAX_VALUE - source.stride < current) {
-        return false;
-      }
-      current += source.stride;
-      currentTimestamp = source.timestampFn.apply(current);
-      return true;
-    }
-
-    @Override
-    public Instant getWatermark() {
-      return source.timestampFn.apply(current);
-    }
-
-    @Override
-    public CounterMark getCheckpointMark() {
-      return new CounterMark(current);
-    }
-
-    @Override
-    public UnboundedSource<Long, CounterMark> getCurrentSource() {
-      return source;
-    }
-
-    @Override
-    public Long getCurrent() throws NoSuchElementException {
-      return current;
-    }
-
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      return currentTimestamp;
-    }
-
-    @Override
-    public void close() throws IOException {}
-  }
-
-  /**
-   * The checkpoint for an unbounded {@link CountingSource} is simply the last value produced. The
-   * associated source object encapsulates the information needed to produce the next value.
-   */
-  @DefaultCoder(AvroCoder.class)
-  public static class CounterMark implements UnboundedSource.CheckpointMark {
-    /** The last value emitted. */
-    private final long lastEmitted;
-
-    /**
-     * Creates a checkpoint mark reflecting the last emitted value.
-     */
-    public CounterMark(long lastEmitted) {
-      this.lastEmitted = lastEmitted;
-    }
-
-    /**
-     * Returns the last value emitted by the reader.
-     */
-    public long getLastEmitted() {
-      return lastEmitted;
-    }
-
-    /////////////////////////////////////////////////////////////////////////////////////
-
-    @SuppressWarnings("unused") // For AvroCoder
-    private CounterMark() {
-      this.lastEmitted = 0L;
-    }
-
-    @Override
-    public void finalizeCheckpoint() throws IOException {}
-   }
-}

[48/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
deleted file mode 100644
index 00982e6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CoderRegistry.java
+++ /dev/null
@@ -1,843 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException.ReasonCode;
-import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-import com.google.protobuf.ByteString;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.Type;
-import java.lang.reflect.TypeVariable;
-import java.lang.reflect.WildcardType;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link CoderRegistry} allows registering the default {@link Coder} to use for a Java class,
- * and looking up and instantiating the default {@link Coder} for a Java type.
- *
- * <p>{@link CoderRegistry} uses the following mechanisms to determine a default {@link Coder} for a
- * Java class, in order of precedence:
- * <ol>
- *   <li>Registration:
- *     <ul>
- *       <li>A {@link CoderFactory} can be registered to handle a particular class via
- *           {@link #registerCoder(Class, CoderFactory)}.</li>
- *       <li>A {@link Coder} class with the static methods to satisfy
- *           {@link CoderFactories#fromStaticMethods} can be registered via
- *           {@link #registerCoder(Class, Class)}.</li>
- *       <li>Built-in types are registered via
- *           {@link #registerStandardCoders()}.</li>
- *     </ul>
- *   <li>Annotations: {@link DefaultCoder} can be used to annotate a type with
- *       the default {@code Coder} type. The {@link Coder} class must satisfy the requirements
- *       of {@link CoderProviders#fromStaticMethods}.
- *   <li>Fallback: A fallback {@link CoderProvider} is used to attempt to provide a {@link Coder}
- *       for any type. By default, this is {@link SerializableCoder#PROVIDER}, which can provide
- *       a {@link Coder} for any type that is serializable via Java serialization. The fallback
- *       {@link CoderProvider} can be get and set via {@link #getFallbackCoderProvider()}
- *       and {@link #setFallbackCoderProvider}. Multiple fallbacks can be chained together using
- *       {@link CoderProviders#firstOf}.
- * </ol>
- */
-public class CoderRegistry implements CoderProvider {
-
-  private static final Logger LOG = LoggerFactory.getLogger(CoderRegistry.class);
-
-  public CoderRegistry() {
-    setFallbackCoderProvider(
-        CoderProviders.firstOf(ProtoCoder.coderProvider(), SerializableCoder.PROVIDER));
-  }
-
-  /**
-   * Registers standard Coders with this CoderRegistry.
-   */
-  public void registerStandardCoders() {
-    registerCoder(Byte.class, ByteCoder.class);
-    registerCoder(ByteString.class, ByteStringCoder.class);
-    registerCoder(Double.class, DoubleCoder.class);
-    registerCoder(Instant.class, InstantCoder.class);
-    registerCoder(Integer.class, VarIntCoder.class);
-    registerCoder(Iterable.class, IterableCoder.class);
-    registerCoder(KV.class, KvCoder.class);
-    registerCoder(List.class, ListCoder.class);
-    registerCoder(Long.class, VarLongCoder.class);
-    registerCoder(Map.class, MapCoder.class);
-    registerCoder(Set.class, SetCoder.class);
-    registerCoder(String.class, StringUtf8Coder.class);
-    registerCoder(TableRow.class, TableRowJsonCoder.class);
-    registerCoder(TimestampedValue.class, TimestampedValue.TimestampedValueCoder.class);
-    registerCoder(Void.class, VoidCoder.class);
-    registerCoder(byte[].class, ByteArrayCoder.class);
-  }
-
-  /**
-   * Registers {@code coderClazz} as the default {@link Coder} class to handle encoding and
-   * decoding instances of {@code clazz}, overriding prior registrations if any exist.
-   *
-   * <p>Supposing {@code T} is the static type corresponding to the {@code clazz}, then
-   * {@code coderClazz} should have a static factory method with the following signature:
-   *
-   * <pre> {@code
-   * public static Coder<T> of(Coder<X> argCoder1, Coder<Y> argCoder2, ...)
-   * } </pre>
-   *
-   * <p>This method will be called to create instances of {@code Coder<T>} for values of type
-   * {@code T}, passing Coders for each of the generic type parameters of {@code T}.  If {@code T}
-   * takes no generic type parameters, then the {@code of()} factory method should have no
-   * arguments.
-   *
-   * <p>If {@code T} is a parameterized type, then it should additionally have a method with the
-   * following signature:
-   *
-   * <pre> {@code
-   * public static List<Object> getInstanceComponents(T exampleValue);
-   * } </pre>
-   *
-   * <p>This method will be called to decompose a value during the {@link Coder} inference process,
-   * to automatically choose {@link Coder Coders} for the components.
-   *
-   * @param clazz the class of objects to be encoded
-   * @param coderClazz a class with static factory methods to provide {@link Coder Coders}
-   */
-  public void registerCoder(Class<?> clazz, Class<?> coderClazz) {
-    registerCoder(clazz, CoderFactories.fromStaticMethods(coderClazz));
-  }
-
-  /**
-   * Registers {@code coderFactory} as the default {@link CoderFactory} to produce {@code Coder}
-   * instances to decode and encode instances of {@code clazz}. This will override prior
-   * registrations if any exist.
-   */
-  public void registerCoder(Class<?> clazz, CoderFactory coderFactory) {
-    coderFactoryMap.put(clazz, coderFactory);
-  }
-
-  /**
-   * Register the provided {@link Coder} for encoding all values of the specified {@code Class}.
-   * This will override prior registrations if any exist.
-   *
-   * <p>Not for use with generic rawtypes. Instead, register a {@link CoderFactory} via
-   * {@link #registerCoder(Class, CoderFactory)} or ensure your {@code Coder} class has the
-   * appropriate static methods and register it directly via {@link #registerCoder(Class, Class)}.
-   */
-  public <T> void registerCoder(Class<T> rawClazz, Coder<T> coder) {
-    Preconditions.checkArgument(
-      rawClazz.getTypeParameters().length == 0,
-      "CoderRegistry.registerCoder(Class<T>, Coder<T>) may not be used "
-      + "with unspecialized generic classes");
-
-    CoderFactory factory = CoderFactories.forCoder(coder);
-    registerCoder(rawClazz, factory);
-  }
-
-  /**
-   * Returns the {@link Coder} to use by default for values of the given type.
-   *
-   * @throws CannotProvideCoderException if there is no default Coder.
-   */
-  public <T> Coder<T> getDefaultCoder(TypeDescriptor<T> typeDescriptor)
-      throws CannotProvideCoderException {
-    return getDefaultCoder(typeDescriptor, Collections.<Type, Coder<?>>emptyMap());
-  }
-
-  /**
-   * See {@link #getDefaultCoder(TypeDescriptor)}.
-   */
-  @Override
-  public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor)
-      throws CannotProvideCoderException {
-    return getDefaultCoder(typeDescriptor);
-  }
-
-  /**
-   * Returns the {@link Coder} to use by default for values of the given type, where the given input
-   * type uses the given {@link Coder}.
-   *
-   * @throws CannotProvideCoderException if there is no default Coder.
-   */
-  public <InputT, OutputT> Coder<OutputT> getDefaultCoder(
-      TypeDescriptor<OutputT> typeDescriptor,
-      TypeDescriptor<InputT> inputTypeDescriptor,
-      Coder<InputT> inputCoder)
-      throws CannotProvideCoderException {
-    return getDefaultCoder(
-        typeDescriptor, getTypeToCoderBindings(inputTypeDescriptor.getType(), inputCoder));
-  }
-
-  /**
-   * Returns the {@link Coder} to use on elements produced by this function, given the {@link Coder}
-   * used for its input elements.
-   */
-  public <InputT, OutputT> Coder<OutputT> getDefaultOutputCoder(
-      SerializableFunction<InputT, OutputT> fn, Coder<InputT> inputCoder)
-      throws CannotProvideCoderException {
-
-    ParameterizedType fnType = (ParameterizedType)
-        TypeDescriptor.of(fn.getClass()).getSupertype(SerializableFunction.class).getType();
-
-    return getDefaultCoder(
-        fn.getClass(),
-        SerializableFunction.class,
-        ImmutableMap.of(fnType.getActualTypeArguments()[0], inputCoder),
-        SerializableFunction.class.getTypeParameters()[1]);
-  }
-
-  /**
-   * Returns the {@link Coder} to use for the specified type parameter specialization of the
-   * subclass, given {@link Coder Coders} to use for all other type parameters (if any).
-   *
-   * @throws CannotProvideCoderException if there is no default Coder.
-   */
-  public <T, OutputT> Coder<OutputT> getDefaultCoder(
-      Class<? extends T> subClass,
-      Class<T> baseClass,
-      Map<Type, ? extends Coder<?>> knownCoders,
-      TypeVariable<?> param)
-      throws CannotProvideCoderException {
-
-    Map<Type, Coder<?>> inferredCoders = getDefaultCoders(subClass, baseClass, knownCoders);
-
-    @SuppressWarnings("unchecked")
-    Coder<OutputT> paramCoderOrNull = (Coder<OutputT>) inferredCoders.get(param);
-    if (paramCoderOrNull != null) {
-      return paramCoderOrNull;
-    } else {
-      throw new CannotProvideCoderException(
-          "Cannot infer coder for type parameter " + param.getName());
-    }
-  }
-
-  /**
-   * Returns the {@link Coder} to use for the provided example value, if it can be determined.
-   *
-   * @throws CannotProvideCoderException if there is no default {@link Coder} or
-   * more than one {@link Coder} matches
-   */
-  public <T> Coder<T> getDefaultCoder(T exampleValue) throws CannotProvideCoderException {
-    Class<?> clazz = exampleValue == null ? Void.class : exampleValue.getClass();
-
-    if (clazz.getTypeParameters().length == 0) {
-      // Trust that getDefaultCoder returns a valid
-      // Coder<T> for non-generic clazz.
-      @SuppressWarnings("unchecked")
-      Coder<T> coder = (Coder<T>) getDefaultCoder(clazz);
-      return coder;
-    } else {
-      CoderFactory factory = getDefaultCoderFactory(clazz);
-
-      List<Object> components = factory.getInstanceComponents(exampleValue);
-      if (components == null) {
-        throw new CannotProvideCoderException(String.format(
-            "Cannot provide coder based on value with class %s: The registered CoderFactory with "
-            + "class %s failed to decompose the value, which is required in order to provide "
-            + "Coders for the components.",
-            clazz.getCanonicalName(), factory.getClass().getCanonicalName()));
-      }
-
-      // componentcoders = components.map(this.getDefaultCoder)
-      List<Coder<?>> componentCoders = new ArrayList<>();
-      for (Object component : components) {
-        try {
-          Coder<?> componentCoder = getDefaultCoder(component);
-          componentCoders.add(componentCoder);
-        } catch (CannotProvideCoderException exc) {
-          throw new CannotProvideCoderException(
-              String.format("Cannot provide coder based on value with class %s",
-                  clazz.getCanonicalName()),
-              exc);
-        }
-      }
-
-      // Trust that factory.create maps from valid component Coders
-      // to a valid Coder<T>.
-      @SuppressWarnings("unchecked")
-      Coder<T> coder = (Coder<T>) factory.create(componentCoders);
-      return coder;
-    }
-  }
-
-  /**
-   * Returns the {@link Coder} to use by default for values of the given class. The following three
-   * sources for a {@link Coder} will be attempted, in order:
-   *
-   * <ol>
-   *   <li>A {@link Coder} class registered explicitly via a call to {@link #registerCoder},
-   *   <li>A {@link DefaultCoder} annotation on the class,
-   *   <li>This registry's fallback {@link CoderProvider}, which may be able to generate a
-   *   {@link Coder} for an arbitrary class.
-   * </ol>
-   *
-   * @throws CannotProvideCoderException if a {@link Coder} cannot be provided
-   */
-  public <T> Coder<T> getDefaultCoder(Class<T> clazz) throws CannotProvideCoderException {
-
-    CannotProvideCoderException factoryException;
-    try {
-      CoderFactory coderFactory = getDefaultCoderFactory(clazz);
-      LOG.debug("Default coder for {} found by factory", clazz);
-      @SuppressWarnings("unchecked")
-      Coder<T> coder = (Coder<T>) coderFactory.create(Collections.<Coder<?>>emptyList());
-      return coder;
-    } catch (CannotProvideCoderException exc) {
-      factoryException = exc;
-    }
-
-    CannotProvideCoderException annotationException;
-    try {
-      return getDefaultCoderFromAnnotation(clazz);
-    } catch (CannotProvideCoderException exc) {
-      annotationException = exc;
-    }
-
-    CannotProvideCoderException fallbackException;
-    if (getFallbackCoderProvider() != null) {
-      try {
-        return getFallbackCoderProvider().getCoder(TypeDescriptor.<T>of(clazz));
-      } catch (CannotProvideCoderException exc) {
-        fallbackException = exc;
-      }
-    } else {
-      fallbackException = new CannotProvideCoderException("no fallback CoderProvider configured");
-    }
-
-    // Build up the error message and list of causes.
-    StringBuilder messageBuilder = new StringBuilder()
-        .append("Unable to provide a default Coder for ").append(clazz.getCanonicalName())
-        .append(". Correct one of the following root causes:");
-
-    messageBuilder
-        .append("\n  Building a Coder using a registered CoderFactory failed: ")
-        .append(factoryException.getMessage());
-
-    messageBuilder
-        .append("\n  Building a Coder from the @DefaultCoder annotation failed: ")
-        .append(annotationException.getMessage());
-
-    messageBuilder
-        .append("\n  Building a Coder from the fallback CoderProvider failed: ")
-        .append(fallbackException.getMessage());
-
-    throw new CannotProvideCoderException(messageBuilder.toString());
-  }
-
-  /**
-   * Sets the fallback {@link CoderProvider} for this registry. If no other method succeeds in
-   * providing a {@code Coder<T>} for a type {@code T}, then the registry will attempt to create
-   * a {@link Coder} using this {@link CoderProvider}.
-   *
-   * <p>By default, this is set to {@link SerializableCoder#PROVIDER}.
-   *
-   * <p>See {@link #getFallbackCoderProvider}.
-   */
-  public void setFallbackCoderProvider(CoderProvider coderProvider) {
-    fallbackCoderProvider = coderProvider;
-  }
-
-  /**
-   * Returns the fallback {@link CoderProvider} for this registry.
-   *
-   * <p>See {@link #setFallbackCoderProvider}.
-   */
-  public CoderProvider getFallbackCoderProvider() {
-    return fallbackCoderProvider;
-  }
-
-  /**
-   * Returns a {@code Map} from each of {@code baseClass}'s type parameters to the {@link Coder} to
-   * use by default for it, in the context of {@code subClass}'s specialization of
-   * {@code baseClass}.
-   *
-   * <p>If no {@link Coder} can be inferred for a particular type parameter, then that type variable
-   * will be absent from the returned {@code Map}.
-   *
-   * <p>For example, if {@code baseClass} is {@code Map.class}, where {@code Map<K, V>} has type
-   * parameters {@code K} and {@code V}, and {@code subClass} extends {@code Map<String, Integer>}
-   * then the result will map the type variable {@code K} to a {@code Coder<String>} and the
-   * type variable {@code V} to a {@code Coder<Integer>}.
-   *
-   * <p>The {@code knownCoders} parameter can be used to provide known {@link Coder Coders} for any
-   * of the parameters; these will be used to infer the others.
-   *
-   * <p>Note that inference is attempted for every type variable. For a type
-   * {@code MyType<One, Two, Three>} inference will be attempted for all of {@code One},
-   * {@code Two}, {@code Three}, even if the requester only wants a {@link Coder} for {@code Two}.
-   *
-   * <p>For this reason {@code getDefaultCoders} (plural) does not throw an exception if a
-   * {@link Coder} for a particular type variable cannot be inferred, but merely omits the entry
-   * from the returned {@code Map}. It is the responsibility of the caller (usually
-   * {@link #getDefaultCoder} to extract the desired coder or throw a
-   * {@link CannotProvideCoderException} when appropriate.
-   *
-   * @param subClass the concrete type whose specializations are being inferred
-   * @param baseClass the base type, a parameterized class
-   * @param knownCoders a map corresponding to the set of known {@link Coder Coders} indexed by
-   * parameter name
-   *
-   * @deprecated this method is not part of the public interface and will be made private
-   */
-  @Deprecated
-  public <T> Map<Type, Coder<?>> getDefaultCoders(
-      Class<? extends T> subClass,
-      Class<T> baseClass,
-      Map<Type, ? extends Coder<?>> knownCoders) {
-    TypeVariable<Class<T>>[] typeParams = baseClass.getTypeParameters();
-    Coder<?>[] knownCodersArray = new Coder<?>[typeParams.length];
-    for (int i = 0; i < typeParams.length; i++) {
-      knownCodersArray[i] = knownCoders.get(typeParams[i]);
-    }
-    Coder<?>[] resultArray = getDefaultCoders(
-      subClass, baseClass, knownCodersArray);
-    Map<Type, Coder<?>> result = new HashMap<>();
-    for (int i = 0; i < typeParams.length; i++) {
-      if (resultArray[i] != null) {
-        result.put(typeParams[i], resultArray[i]);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Returns an array listing, for each of {@code baseClass}'s type parameters, the {@link Coder} to
-   * use by default for it, in the context of {@code subClass}'s specialization of
-   * {@code baseClass}.
-   *
-   * <p>If a {@link Coder} cannot be inferred for a type variable, its slot in the resulting array
-   * will be {@code null}.
-   *
-   * <p>For example, if {@code baseClass} is {@code Map.class}, where {@code Map<K, V>} has type
-   * parameters {@code K} and {@code V} in that order, and {@code subClass} extends
-   * {@code Map<String, Integer>} then the result will contain a {@code Coder<String>} and a
-   * {@code Coder<Integer>}, in that order.
-   *
-   * <p>The {@code knownCoders} parameter can be used to provide known {@link Coder Coders} for any
-   * of the type parameters. These will be used to infer the others. If non-null, the length of this
-   * array must match the number of type parameters of {@code baseClass}, and simply be filled with
-   * {@code null} values for each type parameters without a known {@link Coder}.
-   *
-   * <p>Note that inference is attempted for every type variable. For a type
-   * {@code MyType<One, Two, Three>} inference will will be attempted for all of {@code One},
-   * {@code Two}, {@code Three}, even if the requester only wants a {@link Coder} for {@code Two}.
-   *
-   * <p>For this reason {@code getDefaultCoders} (plural) does not throw an exception if a
-   * {@link Coder} for a particular type variable cannot be inferred. Instead, it results in a
-   * {@code null} in the array. It is the responsibility of the caller (usually
-   * {@link #getDefaultCoder} to extract the desired coder or throw a
-   * {@link CannotProvideCoderException} when appropriate.
-   *
-   * @param subClass the concrete type whose specializations are being inferred
-   * @param baseClass the base type, a parameterized class
-   * @param knownCoders an array corresponding to the set of base class type parameters. Each entry
-   *        can be either a {@link Coder} (in which case it will be used for inference) or
-   *        {@code null} (in which case it will be inferred). May be {@code null} to indicate the
-   *        entire set of parameters should be inferred.
-   * @throws IllegalArgumentException if baseClass doesn't have type parameters or if the length of
-   *         {@code knownCoders} is not equal to the number of type parameters of {@code baseClass}.
-   */
-  private <T> Coder<?>[] getDefaultCoders(
-      Class<? extends T> subClass,
-      Class<T> baseClass,
-      @Nullable Coder<?>[] knownCoders) {
-    Type type = TypeDescriptor.of(subClass).getSupertype(baseClass).getType();
-    if (!(type instanceof ParameterizedType)) {
-      throw new IllegalArgumentException(type + " is not a ParameterizedType");
-    }
-    ParameterizedType parameterizedType = (ParameterizedType) type;
-    Type[] typeArgs = parameterizedType.getActualTypeArguments();
-    if (knownCoders == null) {
-      knownCoders = new Coder<?>[typeArgs.length];
-    } else if (typeArgs.length != knownCoders.length) {
-      throw new IllegalArgumentException(
-          String.format("Class %s has %d parameters, but %d coders are requested.",
-              baseClass.getCanonicalName(), typeArgs.length, knownCoders.length));
-    }
-
-    Map<Type, Coder<?>> context = new HashMap<>();
-    for (int i = 0; i < knownCoders.length; i++) {
-      if (knownCoders[i] != null) {
-        try {
-          verifyCompatible(knownCoders[i], typeArgs[i]);
-        } catch (IncompatibleCoderException exn) {
-          throw new IllegalArgumentException(
-              String.format("Provided coders for type arguments of %s contain incompatibilities:"
-                  + " Cannot encode elements of type %s with coder %s",
-                  baseClass,
-                  typeArgs[i], knownCoders[i]),
-              exn);
-        }
-        context.putAll(getTypeToCoderBindings(typeArgs[i], knownCoders[i]));
-      }
-    }
-
-    Coder<?>[] result = new Coder<?>[typeArgs.length];
-    for (int i = 0; i < knownCoders.length; i++) {
-      if (knownCoders[i] != null) {
-        result[i] = knownCoders[i];
-      } else {
-        try {
-          result[i] = getDefaultCoder(typeArgs[i], context);
-        } catch (CannotProvideCoderException exc) {
-          result[i] = null;
-        }
-      }
-    }
-    return result;
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Thrown when a {@link Coder} cannot possibly encode a type, yet has been proposed as a
-   * {@link Coder} for that type.
-   */
-  @VisibleForTesting static class IncompatibleCoderException extends RuntimeException {
-    private Coder<?> coder;
-    private Type type;
-
-    public IncompatibleCoderException(String message, Coder<?> coder, Type type) {
-      super(message);
-      this.coder = coder;
-      this.type = type;
-    }
-
-    public IncompatibleCoderException(String message, Coder<?> coder, Type type, Throwable cause) {
-      super(message, cause);
-      this.coder = coder;
-      this.type = type;
-    }
-
-    public Coder<?> getCoder() {
-      return coder;
-    }
-
-    public Type getType() {
-      return type;
-    }
-  }
-
-  /**
-   * Returns {@code true} if the given {@link Coder} can possibly encode elements
-   * of the given type.
-   */
-  @VisibleForTesting static <T, CoderT extends Coder<T>, CandidateT>
-  void verifyCompatible(CoderT coder, Type candidateType) throws IncompatibleCoderException {
-
-    // Various representations of the coder's class
-    @SuppressWarnings("unchecked")
-    Class<CoderT> coderClass = (Class<CoderT>) coder.getClass();
-    TypeDescriptor<CoderT> coderDescriptor = TypeDescriptor.of(coderClass);
-
-    // Various representations of the actual coded type
-    @SuppressWarnings("unchecked")
-    TypeDescriptor<T> codedDescriptor = CoderUtils.getCodedType(coderDescriptor);
-    @SuppressWarnings("unchecked")
-    Class<T> codedClass = (Class<T>) codedDescriptor.getRawType();
-    Type codedType = codedDescriptor.getType();
-
-    // Various representations of the candidate type
-    @SuppressWarnings("unchecked")
-    TypeDescriptor<CandidateT> candidateDescriptor =
-        (TypeDescriptor<CandidateT>) TypeDescriptor.of(candidateType);
-    @SuppressWarnings("unchecked")
-    Class<CandidateT> candidateClass = (Class<CandidateT>) candidateDescriptor.getRawType();
-
-    // If coder has type Coder<T> where the actual value of T is lost
-    // to erasure, then we cannot rule it out.
-    if (candidateType instanceof TypeVariable) {
-      return;
-    }
-
-    // If the raw types are not compatible, we can certainly rule out
-    // coder compatibility
-    if (!codedClass.isAssignableFrom(candidateClass)) {
-      throw new IncompatibleCoderException(
-          String.format("Cannot encode elements of type %s with coder %s because the"
-              + " coded type %s is not assignable from %s",
-              candidateType, coder, codedClass, candidateType),
-          coder, candidateType);
-    }
-    // we have established that this is a covariant upcast... though
-    // coders are invariant, we are just checking one direction
-    @SuppressWarnings("unchecked")
-    TypeDescriptor<T> candidateOkDescriptor = (TypeDescriptor<T>) candidateDescriptor;
-
-    // If the coded type is a parameterized type where any of the actual
-    // type parameters are not compatible, then the whole thing is certainly not
-    // compatible.
-    if ((codedType instanceof ParameterizedType) && !isNullOrEmpty(coder.getCoderArguments())) {
-      ParameterizedType parameterizedSupertype = ((ParameterizedType)
-           candidateOkDescriptor.getSupertype(codedClass).getType());
-      Type[] typeArguments = parameterizedSupertype.getActualTypeArguments();
-      List<? extends Coder<?>> typeArgumentCoders = coder.getCoderArguments();
-      if (typeArguments.length < typeArgumentCoders.size()) {
-        throw new IncompatibleCoderException(
-            String.format("Cannot encode elements of type %s with coder %s:"
-                + " the generic supertype %s has %s type parameters, which is less than the"
-                + " number of coder arguments %s has (%s).",
-                candidateOkDescriptor, coder,
-                parameterizedSupertype, typeArguments.length,
-                coder, typeArgumentCoders.size()),
-            coder, candidateOkDescriptor.getType());
-      }
-      for (int i = 0; i < typeArgumentCoders.size(); i++) {
-        try {
-          verifyCompatible(
-              typeArgumentCoders.get(i),
-              candidateDescriptor.resolveType(typeArguments[i]).getType());
-        } catch (IncompatibleCoderException exn) {
-          throw new IncompatibleCoderException(
-              String.format("Cannot encode elements of type %s with coder %s"
-                  + " because some component coder is incompatible",
-                  candidateType, coder),
-              coder, candidateType, exn);
-        }
-      }
-    }
-  }
-
-  private static boolean isNullOrEmpty(Collection<?> c) {
-    return c == null || c.size() == 0;
-  }
-
-  /**
-   * The map of classes to the CoderFactories to use to create their
-   * default Coders.
-   */
-  private Map<Class<?>, CoderFactory> coderFactoryMap = new HashMap<>();
-
-  /**
-   * A provider of coders for types where no coder is registered.
-   */
-  private CoderProvider fallbackCoderProvider;
-
-  /**
-   * Returns the {@link CoderFactory} to use to create default {@link Coder Coders} for instances of
-   * the given class, or {@code null} if there is no default {@link CoderFactory} registered.
-   */
-  private CoderFactory getDefaultCoderFactory(Class<?> clazz) throws CannotProvideCoderException {
-    CoderFactory coderFactoryOrNull = coderFactoryMap.get(clazz);
-    if (coderFactoryOrNull != null) {
-      return coderFactoryOrNull;
-    } else {
-      throw new CannotProvideCoderException(
-          String.format("Cannot provide coder based on value with class %s: No CoderFactory has "
-              + "been registered for the class.", clazz.getCanonicalName()));
-    }
-  }
-
-  /**
-   * Returns the {@link Coder} returned according to the {@link CoderProvider} from any
-   * {@link DefaultCoder} annotation on the given class.
-   */
-  private <T> Coder<T> getDefaultCoderFromAnnotation(Class<T> clazz)
-      throws CannotProvideCoderException {
-    DefaultCoder defaultAnnotation = clazz.getAnnotation(DefaultCoder.class);
-    if (defaultAnnotation == null) {
-      throw new CannotProvideCoderException(
-          String.format("Class %s does not have a @DefaultCoder annotation.",
-              clazz.getCanonicalName()));
-    }
-
-    LOG.debug("DefaultCoder annotation found for {} with value {}",
-        clazz, defaultAnnotation.value());
-    CoderProvider coderProvider = CoderProviders.fromStaticMethods(defaultAnnotation.value());
-    return coderProvider.getCoder(TypeDescriptor.of(clazz));
-  }
-
-  /**
-   * Returns the {@link Coder} to use by default for values of the given type,
-   * in a context where the given types use the given coders.
-   *
-   * @throws CannotProvideCoderException if a coder cannot be provided
-   */
-  private <T> Coder<T> getDefaultCoder(
-      TypeDescriptor<T> typeDescriptor,
-      Map<Type, Coder<?>> typeCoderBindings)
-      throws CannotProvideCoderException {
-
-    Coder<?> defaultCoder = getDefaultCoder(typeDescriptor.getType(), typeCoderBindings);
-    LOG.debug("Default coder for {}: {}", typeDescriptor, defaultCoder);
-    @SuppressWarnings("unchecked")
-    Coder<T> result = (Coder<T>) defaultCoder;
-    return result;
-  }
-
-  /**
-   * Returns the {@link Coder} to use by default for values of the given type,
-   * in a context where the given types use the given coders.
-   *
-   * @throws CannotProvideCoderException if a coder cannot be provided
-   */
-  private Coder<?> getDefaultCoder(Type type, Map<Type, Coder<?>> typeCoderBindings)
-      throws CannotProvideCoderException {
-    Coder<?> coder = typeCoderBindings.get(type);
-    if (coder != null) {
-      return coder;
-    }
-    if (type instanceof Class<?>) {
-      Class<?> clazz = (Class<?>) type;
-      return getDefaultCoder(clazz);
-    } else if (type instanceof ParameterizedType) {
-      return getDefaultCoder((ParameterizedType) type, typeCoderBindings);
-    } else if (type instanceof TypeVariable || type instanceof WildcardType) {
-      // No default coder for an unknown generic type.
-      throw new CannotProvideCoderException(
-          String.format("Cannot provide a coder for type variable %s"
-          + " (declared by %s) because the actual type is unknown due to erasure.",
-          type,
-          ((TypeVariable<?>) type).getGenericDeclaration()),
-          ReasonCode.TYPE_ERASURE);
-    } else {
-      throw new RuntimeException(
-          "Internal error: unexpected kind of Type: " + type);
-    }
-  }
-
-  /**
-   * Returns the {@link Coder} to use by default for values of the given
-   * parameterized type, in a context where the given types use the
-   * given {@link Coder Coders}.
-   *
-   * @throws CannotProvideCoderException if no coder can be provided
-   */
-  private Coder<?> getDefaultCoder(
-      ParameterizedType type,
-      Map<Type, Coder<?>> typeCoderBindings)
-          throws CannotProvideCoderException {
-
-    CannotProvideCoderException factoryException;
-    try {
-      return getDefaultCoderFromFactory(type, typeCoderBindings);
-    } catch (CannotProvideCoderException exc) {
-      factoryException = exc;
-    }
-
-    CannotProvideCoderException annotationException;
-    try {
-      Class<?> rawClazz = (Class<?>) type.getRawType();
-      return getDefaultCoderFromAnnotation(rawClazz);
-    } catch (CannotProvideCoderException exc) {
-      annotationException = exc;
-    }
-
-    // Build up the error message and list of causes.
-    StringBuilder messageBuilder = new StringBuilder()
-        .append("Unable to provide a default Coder for ").append(type)
-        .append(". Correct one of the following root causes:");
-
-    messageBuilder
-        .append("\n  Building a Coder using a registered CoderFactory failed: ")
-        .append(factoryException.getMessage());
-
-    messageBuilder
-        .append("\n  Building a Coder from the @DefaultCoder annotation failed: ")
-        .append(annotationException.getMessage());
-
-    throw new CannotProvideCoderException(messageBuilder.toString());
-  }
-
-  private Coder<?> getDefaultCoderFromFactory(
-      ParameterizedType type,
-      Map<Type, Coder<?>> typeCoderBindings)
-          throws CannotProvideCoderException {
-    Class<?> rawClazz = (Class<?>) type.getRawType();
-    CoderFactory coderFactory = getDefaultCoderFactory(rawClazz);
-    List<Coder<?>> typeArgumentCoders = new ArrayList<>();
-    for (Type typeArgument : type.getActualTypeArguments()) {
-      try {
-        Coder<?> typeArgumentCoder = getDefaultCoder(typeArgument,
-                                                     typeCoderBindings);
-        typeArgumentCoders.add(typeArgumentCoder);
-      } catch (CannotProvideCoderException exc) {
-         throw new CannotProvideCoderException(
-          String.format("Cannot provide coder for parameterized type %s: %s",
-              type,
-              exc.getMessage()),
-          exc);
-      }
-    }
-    return coderFactory.create(typeArgumentCoders);
-  }
-
-  /**
-   * Returns an immutable {@code Map} from each of the type variables
-   * embedded in the given type to the corresponding types
-   * in the given {@link Coder}.
-   */
-  private Map<Type, Coder<?>> getTypeToCoderBindings(Type type, Coder<?> coder) {
-    if (type instanceof TypeVariable || type instanceof Class) {
-      return ImmutableMap.<Type, Coder<?>>of(type, coder);
-    } else if (type instanceof ParameterizedType) {
-      return getTypeToCoderBindings((ParameterizedType) type, coder);
-    } else {
-      return ImmutableMap.of();
-    }
-  }
-
-  /**
-   * Returns an immutable {@code Map} from the type arguments of the parameterized type to their
-   * corresponding {@link Coder Coders}, and so on recursively for their type parameters.
-   *
-   * <p>This method is simply a specialization to break out the most
-   * elaborate case of {@link #getTypeToCoderBindings(Type, Coder)}.
-   */
-  private Map<Type, Coder<?>> getTypeToCoderBindings(ParameterizedType type, Coder<?> coder) {
-    List<Type> typeArguments = Arrays.asList(type.getActualTypeArguments());
-    List<? extends Coder<?>> coderArguments = coder.getCoderArguments();
-
-    if ((coderArguments == null) || (typeArguments.size() != coderArguments.size())) {
-      return ImmutableMap.of();
-    } else {
-      Map<Type, Coder<?>> typeToCoder = Maps.newHashMap();
-
-      typeToCoder.put(type, coder);
-
-      for (int i = 0; i < typeArguments.size(); i++) {
-        Type typeArgument = typeArguments.get(i);
-        Coder<?> coderArgument = coderArguments.get(i);
-        typeToCoder.putAll(getTypeToCoderBindings(typeArgument, coderArgument));
-      }
-
-      return ImmutableMap.<Type, Coder<?>>builder().putAll(typeToCoder).build();
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
deleted file mode 100644
index a028317..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CollectionCoder.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.Collection;
-import java.util.List;
-
-/**
- * A {@link CollectionCoder} encodes {@link Collection Collections} in the format
- * of {@link IterableLikeCoder}.
- */
-public class CollectionCoder<T> extends IterableLikeCoder<T, Collection<T>> {
-
-  public static <T> CollectionCoder<T> of(Coder<T> elemCoder) {
-    return new CollectionCoder<>(elemCoder);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal operations below here.
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return the decoded elements directly, since {@link List} is a subtype of
-   * {@link Collection}.
-   */
-  @Override
-  protected final Collection<T> decodeToIterable(List<T> decodedElements) {
-    return decodedElements;
-  }
-
-  @JsonCreator
-  public static CollectionCoder<?> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Object> components) {
-    Preconditions.checkArgument(components.size() == 1,
-        "Expecting 1 component, got " + components.size());
-    return of((Coder<?>) components.get(0));
-  }
-
-  /**
-   * Returns the first element in this collection if it is non-empty,
-   * otherwise returns {@code null}.
-   */
-  public static <T> List<Object> getInstanceComponents(
-      Collection<T> exampleValue) {
-    return getInstanceComponentsHelper(exampleValue);
-  }
-
-  protected CollectionCoder(Coder<T> elemCoder) {
-    super(elemCoder, "Collection");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
deleted file mode 100644
index b34ef8c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/CustomCoder.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.common.collect.Lists;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.Serializable;
-import java.util.Collection;
-
-/**
- * An abstract base class for writing a {@link Coder} class that encodes itself via Java
- * serialization.
- *
- * <p>To complete an implementation, subclasses must implement {@link Coder#encode}
- * and {@link Coder#decode} methods. Anonymous subclasses must furthermore override
- * {@link #getEncodingId}.
- *
- * <p>Not to be confused with {@link SerializableCoder} that encodes objects that implement the
- * {@link Serializable} interface.
- *
- * @param <T> the type of elements handled by this coder
- */
-public abstract class CustomCoder<T> extends AtomicCoder<T>
-    implements Serializable {
-  @JsonCreator
-  public static CustomCoder<?> of(
-      // N.B. typeId is a required parameter here, since a field named "@type"
-      // is presented to the deserializer as an input.
-      //
-      // If this method did not consume the field, Jackson2 would observe an
-      // unconsumed field and a returned value of a derived type.  So Jackson2
-      // would attempt to update the returned value with the unconsumed field
-      // data, The standard JsonDeserializer does not implement a mechanism for
-      // updating constructed values, so it would throw an exception, causing
-      // deserialization to fail.
-      @JsonProperty(value = "@type", required = false) String typeId,
-      @JsonProperty(value = "encoding_id", required = false) String encodingId,
-      @JsonProperty("type") String type,
-      @JsonProperty("serialized_coder") String serializedCoder) {
-    return (CustomCoder<?>) SerializableUtils.deserializeFromByteArray(
-        StringUtils.jsonStringToByteArray(serializedCoder),
-        type);
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return A thin {@link CloudObject} wrapping of the Java serialization of {@code this}.
-   */
-  @Override
-  public CloudObject asCloudObject() {
-    // N.B. We use the CustomCoder class, not the derived class, since during
-    // deserialization we will be using the CustomCoder's static factory method
-    // to construct an instance of the derived class.
-    CloudObject result = CloudObject.forClass(CustomCoder.class);
-    addString(result, "type", getClass().getName());
-    addString(result, "serialized_coder",
-        StringUtils.byteArrayToJsonString(
-            SerializableUtils.serializeToByteArray(this)));
-
-    String encodingId = getEncodingId();
-    checkNotNull(encodingId, "Coder.getEncodingId() must not return null.");
-    if (!encodingId.isEmpty()) {
-      addString(result, PropertyNames.ENCODING_ID, encodingId);
-    }
-
-    Collection<String> allowedEncodings = getAllowedEncodings();
-    if (!allowedEncodings.isEmpty()) {
-      addStringList(result, PropertyNames.ALLOWED_ENCODINGS, Lists.newArrayList(allowedEncodings));
-    }
-
-    return result;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException a {@link CustomCoder} is presumed
-   * nondeterministic.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "CustomCoder implementations must override verifyDeterministic,"
-        + " or they are presumed nondeterministic.");
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return The canonical class name for this coder. For stable data formats that are independent
-   *         of class name, it is recommended to override this method.
-   *
-   * @throws UnsupportedOperationException when an anonymous class is used, since they do not have
-   *         a stable canonical class name.
-   */
-  @Override
-  public String getEncodingId() {
-    if (getClass().isAnonymousClass()) {
-      throw new UnsupportedOperationException(
-          String.format("Anonymous CustomCoder subclass %s must override getEncodingId()."
-              + " Otherwise, convert to a named class and getEncodingId() will be automatically"
-              + " generated from the fully qualified class name.",
-              getClass()));
-    }
-    return getClass().getCanonicalName();
-  }
-
-  // This coder inherits isRegisterByteSizeObserverCheap,
-  // getEncodedElementByteSize and registerByteSizeObserver
-  // from StandardCoder. Override if we can do better.
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
deleted file mode 100644
index 110579b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DefaultCoder.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-/**
- * The {@link DefaultCoder} annotation
- * specifies a default {@link Coder} class to handle encoding and decoding
- * instances of the annotated class.
- *
- * <p>The specified {@link Coder} must satisfy the requirements of
- * {@link CoderProviders#fromStaticMethods}. Two classes provided by the SDK that
- * are intended for use with this annotation include {@link SerializableCoder}
- * and {@link AvroCoder}.
- *
- * <p>To configure the use of Java serialization as the default
- * for a class, annotate the class to use
- * {@link SerializableCoder} as follows:
- *
- * <pre><code>{@literal @}DefaultCoder(SerializableCoder.class)
- * public class MyCustomDataType implements Serializable {
- *   // ...
- * }</code></pre>
- *
- * <p>Similarly, to configure the use of
- * {@link AvroCoder} as the default:
- * <pre><code>{@literal @}DefaultCoder(AvroCoder.class)
- * public class MyCustomDataType {
- *   public MyCustomDataType() {}  // Avro requires an empty constructor.
- *   // ...
- * }</code></pre>
- *
- * <p>Coders specified explicitly via
- * {@link PCollection#setCoder}
- * take precedence, followed by Coders registered at runtime via
- * {@link CoderRegistry#registerCoder}. See {@link CoderRegistry} for a more detailed discussion
- * of the precedence rules.
- */
-@Documented
-@Retention(RetentionPolicy.RUNTIME)
-@Target(ElementType.TYPE)
-@SuppressWarnings("rawtypes")
-public @interface DefaultCoder {
-  Class<? extends Coder> value();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
deleted file mode 100644
index cdd882b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DelegateCoder.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * A {@code DelegateCoder<T, IntermediateT>} wraps a {@link Coder} for {@code IntermediateT} and
- * encodes/decodes values of type {@code T} by converting
- * to/from {@code IntermediateT} and then encoding/decoding using the underlying
- * {@code Coder<IntermediateT>}.
- *
- * <p>The conversions from {@code T} to {@code IntermediateT} and vice versa
- * must be supplied as {@link CodingFunction}, a serializable
- * function that may throw any {@code Exception}. If a thrown
- * exception is an instance of {@link CoderException} or
- * {@link IOException}, it will be re-thrown, otherwise it will be wrapped as
- * a {@link CoderException}.
- *
- * @param <T> The type of objects coded by this Coder.
- * @param <IntermediateT> The type of objects a {@code T} will be converted to for coding.
- */
-public class DelegateCoder<T, IntermediateT> extends CustomCoder<T> {
-  /**
-   * A {@link DelegateCoder.CodingFunction CodingFunction&lt;InputT, OutputT&gt;} is a serializable
-   * function from {@code InputT} to {@code OutputT} that may throw any {@link Exception}.
-   */
-  public static interface CodingFunction<InputT, OutputT> extends Serializable {
-     public abstract OutputT apply(InputT input) throws Exception;
-  }
-
-  public static <T, IntermediateT> DelegateCoder<T, IntermediateT> of(Coder<IntermediateT> coder,
-      CodingFunction<T, IntermediateT> toFn,
-      CodingFunction<IntermediateT, T> fromFn) {
-    return new DelegateCoder<T, IntermediateT>(coder, toFn, fromFn);
-  }
-
-  @Override
-  public void encode(T value, OutputStream outStream, Context context)
-      throws CoderException, IOException {
-    coder.encode(applyAndWrapExceptions(toFn, value), outStream, context);
-  }
-
-  @Override
-  public T decode(InputStream inStream, Context context) throws CoderException, IOException {
-    return applyAndWrapExceptions(fromFn, coder.decode(inStream, context));
-  }
-
-  /**
-   * Returns the coder used to encode/decode the intermediate values produced/consumed by the
-   * coding functions of this {@code DelegateCoder}.
-   */
-  public Coder<IntermediateT> getCoder() {
-    return coder;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException when the underlying coder's {@code verifyDeterministic()}
-   *         throws a {@link Coder.NonDeterministicException}. For this to be safe, the
-   *         intermediate {@code CodingFunction<T, IntermediateT>} must also be deterministic.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    coder.verifyDeterministic();
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return a structural for a value of type {@code T} obtained by first converting to
-   *         {@code IntermediateT} and then obtaining a structural value according to the underlying
-   *         coder.
-   */
-  @Override
-  public Object structuralValue(T value) throws Exception {
-    return coder.structuralValue(toFn.apply(value));
-  }
-
-  @Override
-  public String toString() {
-    return "DelegateCoder(" + coder + ")";
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return a {@link String} composed from the underlying coder class name and its encoding id.
-   *         Note that this omits any description of the coding functions. These should be modified
-   *         with care.
-   */
-  @Override
-  public String getEncodingId() {
-    return delegateEncodingId(coder.getClass(), coder.getEncodingId());
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return allowed encodings which are composed from the underlying coder class and its allowed
-   *         encoding ids. Note that this omits any description of the coding functions. These
-   *         should be modified with care.
-   */
-  @Override
-  public Collection<String> getAllowedEncodings() {
-    List<String> allowedEncodings = Lists.newArrayList();
-    for (String allowedEncoding : coder.getAllowedEncodings()) {
-      allowedEncodings.add(delegateEncodingId(coder.getClass(), allowedEncoding));
-    }
-    return allowedEncodings;
-  }
-
-  private String delegateEncodingId(Class<?> delegateClass, String encodingId) {
-    return String.format("%s:%s", delegateClass.getName(), encodingId);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private <InputT, OutputT> OutputT applyAndWrapExceptions(
-      CodingFunction<InputT, OutputT> fn,
-      InputT input) throws CoderException, IOException {
-    try {
-      return fn.apply(input);
-    } catch (IOException exc) {
-      throw exc;
-    } catch (Exception exc) {
-      throw new CoderException(exc);
-    }
-  }
-
-  private final Coder<IntermediateT> coder;
-  private final CodingFunction<T, IntermediateT> toFn;
-  private final CodingFunction<IntermediateT, T> fromFn;
-
-  protected DelegateCoder(Coder<IntermediateT> coder,
-      CodingFunction<T, IntermediateT> toFn,
-      CodingFunction<IntermediateT, T> fromFn) {
-    this.coder = coder;
-    this.fromFn = fromFn;
-    this.toFn = toFn;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
deleted file mode 100644
index 0e0018a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DeterministicStandardCoder.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-/**
- * A {@link DeterministicStandardCoder} is a {@link StandardCoder} that is
- * deterministic, in the sense that for objects considered equal
- * according to {@link Object#equals(Object)}, the encoded bytes are
- * also equal.
- *
- * @param <T> the type of the values being transcoded
- */
-public abstract class DeterministicStandardCoder<T> extends StandardCoder<T> {
-  protected DeterministicStandardCoder() {}
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException never, unless overridden. A
-   * {@link DeterministicStandardCoder} is presumed deterministic.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException { }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
deleted file mode 100644
index 68d58df..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DoubleCoder.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UTFDataFormatException;
-
-/**
- * A {@link DoubleCoder} encodes {@link Double} values in 8 bytes using Java serialization.
- */
-public class DoubleCoder extends AtomicCoder<Double> {
-
-  @JsonCreator
-  public static DoubleCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final DoubleCoder INSTANCE = new DoubleCoder();
-
-  private DoubleCoder() {}
-
-  @Override
-  public void encode(Double value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Double");
-    }
-    new DataOutputStream(outStream).writeDouble(value);
-  }
-
-  @Override
-  public Double decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    try {
-      return new DataInputStream(inStream).readDouble();
-    } catch (EOFException | UTFDataFormatException exn) {
-      // These exceptions correspond to decoding problems, so change
-      // what kind of exception they're branded as.
-      throw new CoderException(exn);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException always.
-   *         Floating-point operations are not guaranteed to be deterministic, even
-   *         if the storage format might be, so floating point representations are not
-   *         recommended for use in operations that require deterministic inputs.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "Floating point encodings are not guaranteed to be deterministic.");
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. This coder is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. {@link DoubleCoder#getEncodedElementByteSize} returns a constant.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Double value, Context context) {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code 8}, the byte size of a {@link Double} encoded using Java serialization.
-   */
-  @Override
-  protected long getEncodedElementByteSize(Double value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Double");
-    }
-    return 8;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
deleted file mode 100644
index 25527f0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/DurationCoder.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import org.joda.time.Duration;
-import org.joda.time.ReadableDuration;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} that encodes a joda {@link Duration} as a {@link Long} using the format of
- * {@link VarLongCoder}.
- */
-public class DurationCoder extends AtomicCoder<ReadableDuration> {
-
-  @JsonCreator
-  public static DurationCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final DurationCoder INSTANCE = new DurationCoder();
-
-  private final VarLongCoder longCoder = VarLongCoder.of();
-
-  private DurationCoder() {}
-
-  private Long toLong(ReadableDuration value) {
-    return value.getMillis();
-  }
-
-  private ReadableDuration fromLong(Long decoded) {
-    return Duration.millis(decoded);
-  }
-
-  @Override
-  public void encode(ReadableDuration value, OutputStream outStream, Context context)
-      throws CoderException, IOException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null ReadableDuration");
-    }
-    longCoder.encode(toLong(value), outStream, context);
-  }
-
-  @Override
-  public ReadableDuration decode(InputStream inStream, Context context)
-      throws CoderException, IOException {
-      return fromLong(longCoder.decode(inStream, context));
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. This coder is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}, because it is cheap to ascertain the byte size of a long.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(ReadableDuration value, Context context) {
-    return longCoder.isRegisterByteSizeObserverCheap(toLong(value), context);
-  }
-
-  @Override
-  public void registerByteSizeObserver(
-      ReadableDuration value, ElementByteSizeObserver observer, Context context) throws Exception {
-    longCoder.registerByteSizeObserver(toLong(value), observer, context);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
deleted file mode 100644
index 3ae857f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/EntityCoder.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.api.services.datastore.DatastoreV1.Entity;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} for {@link Entity} objects based on their encoded Protocol Buffer form.
- */
-public class EntityCoder extends AtomicCoder<Entity> {
-
-  @JsonCreator
-  public static EntityCoder of() {
-    return INSTANCE;
-  }
-
-  /***************************/
-
-  private static final EntityCoder INSTANCE = new EntityCoder();
-
-  private EntityCoder() {}
-
-  @Override
-  public void encode(Entity value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Entity");
-    }
-
-    // Since Entity implements com.google.protobuf.MessageLite,
-    // we could directly use writeTo to write to a OutputStream object
-    outStream.write(java.nio.ByteBuffer.allocate(4).putInt(value.getSerializedSize()).array());
-    value.writeTo(outStream);
-    outStream.flush();
-  }
-
-  @Override
-  public Entity decode(InputStream inStream, Context context)
-      throws IOException {
-    byte[] entitySize = new byte[4];
-    inStream.read(entitySize, 0, 4);
-    int size = java.nio.ByteBuffer.wrap(entitySize).getInt();
-    byte[] data = new byte[size];
-    inStream.read(data, 0, size);
-    return Entity.parseFrom(data);
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(Entity value, Context context)
-      throws Exception {
-    return value.getSerializedSize();
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException always.
-   *         A datastore kind can hold arbitrary {@link Object} instances, which
-   *         makes the encoding non-deterministic.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "Datastore encodings can hold arbitrary Object instances");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
deleted file mode 100644
index 99b58ce..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/InstantCoder.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.common.base.Converter;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} for joda {@link Instant} that encodes it as a big endian {@link Long}
- * shifted such that lexicographic ordering of the bytes corresponds to chronological order.
- */
-public class InstantCoder extends AtomicCoder<Instant> {
-
-  @JsonCreator
-  public static InstantCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final InstantCoder INSTANCE = new InstantCoder();
-
-  private final BigEndianLongCoder longCoder = BigEndianLongCoder.of();
-
-  private InstantCoder() {}
-
-  /**
-   * Converts {@link Instant} to a {@code Long} representing its millis-since-epoch,
-   * but shifted so that the byte representation of negative values are lexicographically
-   * ordered before the byte representation of positive values.
-   *
-   * <p>This deliberately utilizes the well-defined overflow for {@code Long} values.
-   * See http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.18.2
-   */
-  private static final Converter<Instant, Long> ORDER_PRESERVING_CONVERTER =
-      new Converter<Instant, Long>() {
-
-        @Override
-        protected Long doForward(Instant instant) {
-          return instant.getMillis() - Long.MIN_VALUE;
-        }
-
-        @Override
-        protected Instant doBackward(Long shiftedMillis) {
-          return new Instant(shiftedMillis + Long.MIN_VALUE);
-        }
-  };
-
-  @Override
-  public void encode(Instant value, OutputStream outStream, Context context)
-      throws CoderException, IOException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Instant");
-    }
-    longCoder.encode(ORDER_PRESERVING_CONVERTER.convert(value), outStream, context);
-  }
-
-  @Override
-  public Instant decode(InputStream inStream, Context context)
-      throws CoderException, IOException {
-    return ORDER_PRESERVING_CONVERTER.reverse().convert(longCoder.decode(inStream, context));
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. This coder is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. The byte size for a big endian long is a constant.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Instant value, Context context) {
-    return longCoder.isRegisterByteSizeObserverCheap(
-        ORDER_PRESERVING_CONVERTER.convert(value), context);
-  }
-
-  @Override
-  public void registerByteSizeObserver(
-      Instant value, ElementByteSizeObserver observer, Context context) throws Exception {
-    longCoder.registerByteSizeObserver(
-        ORDER_PRESERVING_CONVERTER.convert(value), observer, context);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
deleted file mode 100644
index 70dcd84..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableCoder.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.List;
-
-/**
- * An {@link IterableCoder} encodes any {@link Iterable} in the format
- * of {@link IterableLikeCoder}.
- *
- * @param <T> the type of the elements of the iterables being transcoded
- */
-public class IterableCoder<T> extends IterableLikeCoder<T, Iterable<T>> {
-
-  public static <T> IterableCoder<T> of(Coder<T> elemCoder) {
-    return new IterableCoder<>(elemCoder);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal operations below here.
-
-  @Override
-  protected final Iterable<T> decodeToIterable(List<T> decodedElements) {
-    return decodedElements;
-  }
-
-  @JsonCreator
-  public static IterableCoder<?> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> components) {
-    Preconditions.checkArgument(components.size() == 1,
-        "Expecting 1 component, got " + components.size());
-    return of(components.get(0));
-  }
-
-  /**
-   * Returns the first element in this iterable if it is non-empty,
-   * otherwise returns {@code null}.
-   */
-  public static <T> List<Object> getInstanceComponents(
-      Iterable<T> exampleValue) {
-    return getInstanceComponentsHelper(exampleValue);
-  }
-
-  protected IterableCoder(Coder<T> elemCoder) {
-    super(elemCoder, "Iterable");
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = super.asCloudObject();
-    addBoolean(result, PropertyNames.IS_STREAM_LIKE, true);
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
deleted file mode 100644
index 7fb573a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/IterableLikeCoder.java
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.BufferedElementCountingOutputStream;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObservableIterable;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.common.base.Preconditions;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Observable;
-import java.util.Observer;
-
-/**
- * An abstract base class with functionality for assembling a
- * {@link Coder} for a class that implements {@code Iterable}.
- *
- * <p>To complete a subclass, implement the {@link #decodeToIterable} method. This superclass
- * will decode the elements in the input stream into a {@link List} and then pass them to that
- * method to be converted into the appropriate iterable type. Note that this means the input
- * iterables must fit into memory.
- *
- * <p>The format of this coder is as follows:
- *
- * <ul>
- *   <li>If the input {@link Iterable} has a known and finite size, then the size is written to the
- *       output stream in big endian format, followed by all of the encoded elements.</li>
- *   <li>If the input {@link Iterable} is not known to have a finite size, then each element
- *       of the input is preceded by {@code true} encoded as a byte (indicating "more data")
- *       followed by the encoded element, and terminated by {@code false} encoded as a byte.</li>
- * </ul>
- *
- * @param <T> the type of the elements of the {@code Iterable}s being transcoded
- * @param <IterableT> the type of the Iterables being transcoded
- */
-public abstract class IterableLikeCoder<T, IterableT extends Iterable<T>>
-    extends StandardCoder<IterableT> {
-  public Coder<T> getElemCoder() {
-    return elementCoder;
-  }
-
-  /**
-   * Builds an instance of {@code IterableT}, this coder's associated {@link Iterable}-like
-   * subtype, from a list of decoded elements.
-   */
-  protected abstract IterableT decodeToIterable(List<T> decodedElements);
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal operations below here.
-
-  private final Coder<T> elementCoder;
-  private final String iterableName;
-
-  /**
-   * Returns the first element in the iterable-like {@code exampleValue} if it is non-empty,
-   * otherwise returns {@code null}.
-   */
-  protected static <T, IterableT extends Iterable<T>>
-      List<Object> getInstanceComponentsHelper(IterableT exampleValue) {
-    for (T value : exampleValue) {
-      return Arrays.<Object>asList(value);
-    }
-    return null;
-  }
-
-  protected IterableLikeCoder(Coder<T> elementCoder, String  iterableName) {
-    Preconditions.checkArgument(elementCoder != null,
-        "element Coder for IterableLikeCoder must not be null");
-    Preconditions.checkArgument(iterableName != null,
-        "iterable name for IterableLikeCoder must not be null");
-    this.elementCoder = elementCoder;
-    this.iterableName = iterableName;
-  }
-
-  @Override
-  public void encode(
-      IterableT iterable, OutputStream outStream, Context context)
-      throws IOException, CoderException  {
-    if (iterable == null) {
-      throw new CoderException("cannot encode a null " + iterableName);
-    }
-    Context nestedContext = context.nested();
-    DataOutputStream dataOutStream = new DataOutputStream(outStream);
-    if (iterable instanceof Collection) {
-      // We can know the size of the Iterable.  Use an encoding with a
-      // leading size field, followed by that many elements.
-      Collection<T> collection = (Collection<T>) iterable;
-      dataOutStream.writeInt(collection.size());
-      for (T elem : collection) {
-        elementCoder.encode(elem, dataOutStream, nestedContext);
-      }
-    } else {
-      // We don't know the size without traversing it so use a fixed size buffer
-      // and encode as many elements as possible into it before outputting the size followed
-      // by the elements.
-      dataOutStream.writeInt(-1);
-      BufferedElementCountingOutputStream countingOutputStream =
-          new BufferedElementCountingOutputStream(dataOutStream);
-      for (T elem : iterable) {
-        countingOutputStream.markElementStart();
-        elementCoder.encode(elem, countingOutputStream, nestedContext);
-      }
-      countingOutputStream.finish();
-    }
-    // Make sure all our output gets pushed to the underlying outStream.
-    dataOutStream.flush();
-  }
-
-  @Override
-  public IterableT decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    Context nestedContext = context.nested();
-    DataInputStream dataInStream = new DataInputStream(inStream);
-    int size = dataInStream.readInt();
-    if (size >= 0) {
-      List<T> elements = new ArrayList<>(size);
-      for (int i = 0; i < size; i++) {
-        elements.add(elementCoder.decode(dataInStream, nestedContext));
-      }
-      return decodeToIterable(elements);
-    } else {
-      List<T> elements = new ArrayList<>();
-      long count;
-      // We don't know the size a priori.  Check if we're done with
-      // each block of elements.
-      while ((count = VarInt.decodeLong(dataInStream)) > 0) {
-        while (count > 0) {
-          elements.add(elementCoder.decode(dataInStream, nestedContext));
-          count -= 1;
-        }
-      }
-      return decodeToIterable(elements);
-    }
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return Arrays.asList(elementCoder);
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException always.
-   * Encoding is not deterministic for the general {@link Iterable} case, as it depends
-   * upon the type of iterable. This may allow two objects to compare as equal
-   * while the encoding differs.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "IterableLikeCoder can not guarantee deterministic ordering.");
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true} if the iterable is of a known class that supports lazy counting
-   * of byte size, since that requires minimal extra computation.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(
-      IterableT iterable, Context context) {
-    return iterable instanceof ElementByteSizeObservableIterable;
-  }
-
-  @Override
-  public void registerByteSizeObserver(
-      IterableT iterable, ElementByteSizeObserver observer, Context context)
-      throws Exception {
-    if (iterable == null) {
-      throw new CoderException("cannot encode a null Iterable");
-    }
-    Context nestedContext = context.nested();
-
-    if (iterable instanceof ElementByteSizeObservableIterable) {
-      observer.setLazy();
-      ElementByteSizeObservableIterable<?, ?> observableIterable =
-          (ElementByteSizeObservableIterable<?, ?>) iterable;
-      observableIterable.addObserver(
-          new IteratorObserver(observer, iterable instanceof Collection));
-    } else {
-      if (iterable instanceof Collection) {
-        // We can know the size of the Iterable.  Use an encoding with a
-        // leading size field, followed by that many elements.
-        Collection<T> collection = (Collection<T>) iterable;
-        observer.update(4L);
-        for (T elem : collection) {
-          elementCoder.registerByteSizeObserver(elem, observer, nestedContext);
-        }
-      } else {
-        // TODO: Update to use an accurate count depending on size and count, currently we
-        // are under estimating the size by up to 10 bytes per block of data since we are
-        // not encoding the count prefix which occurs at most once per 64k of data and is upto
-        // 10 bytes long. Since we include the total count we can upper bound the underestimate
-        // to be 10 / 65536 ~= 0.0153% of the actual size.
-        observer.update(4L);
-        long count = 0;
-        for (T elem : iterable) {
-          count += 1;
-          elementCoder.registerByteSizeObserver(elem, observer, nestedContext);
-        }
-        if (count > 0) {
-          // Update the length based upon the number of counted elements, this helps
-          // eliminate the case where all the elements are encoded in the first block and
-          // it is quite short (e.g. Long.MAX_VALUE nulls encoded with VoidCoder).
-          observer.update(VarInt.getLength(count));
-        }
-        // Update with the terminator byte.
-        observer.update(1L);
-      }
-    }
-  }
-
-  /**
-   * An observer that gets notified when an observable iterator
-   * returns a new value. This observer just notifies an outerObserver
-   * about this event. Additionally, the outerObserver is notified
-   * about additional separators that are transparently added by this
-   * coder.
-   */
-  private class IteratorObserver implements Observer {
-    private final ElementByteSizeObserver outerObserver;
-    private final boolean countable;
-
-    public IteratorObserver(ElementByteSizeObserver outerObserver,
-                            boolean countable) {
-      this.outerObserver = outerObserver;
-      this.countable = countable;
-
-      if (countable) {
-        // Additional 4 bytes are due to size.
-        outerObserver.update(4L);
-      } else {
-        // Additional 5 bytes are due to size = -1 (4 bytes) and
-        // hasNext = false (1 byte).
-        outerObserver.update(5L);
-      }
-    }
-
-    @Override
-    public void update(Observable obs, Object obj) {
-      if (!(obj instanceof Long)) {
-        throw new AssertionError("unexpected parameter object");
-      }
-
-      if (countable) {
-        outerObserver.update(obs, obj);
-      } else {
-        // Additional 1 byte is due to hasNext = true flag.
-        outerObserver.update(obs, 1 + (long) obj);
-      }
-    }
-  }
-}

[24/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java
deleted file mode 100644
index a0b06cf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFnBase.java
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.CombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.ImmutableMap;
-
-import java.io.Serializable;
-import java.lang.reflect.Type;
-import java.lang.reflect.TypeVariable;
-
-/**
- * This class contains the shared interfaces and abstract classes for different types of combine
- * functions.
- *
- * <p>Users should not implement or extend them directly.
- */
-public class CombineFnBase {
-  /**
-   * A {@code GloballyCombineFn<InputT, AccumT, OutputT>} specifies how to combine a
-   * collection of input values of type {@code InputT} into a single
-   * output value of type {@code OutputT}.  It does this via one or more
-   * intermediate mutable accumulator values of type {@code AccumT}.
-   *
-   * <p>Do not implement this interface directly.
-   * Extends {@link CombineFn} and {@link CombineFnWithContext} instead.
-   *
-   * @param <InputT> type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  public interface GlobalCombineFn<InputT, AccumT, OutputT> extends Serializable {
-
-    /**
-     * Returns the {@code Coder} to use for accumulator {@code AccumT}
-     * values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being used
-     * for {@code InputT} values and the enclosing {@code Pipeline}'s
-     * {@code CoderRegistry} to try to infer the Coder for {@code AccumT}
-     * values.
-     *
-     * <p>This is the Coder used to send data through a communication-intensive
-     * shuffle step, so a compact and efficient representation may have
-     * significant performance benefits.
-     */
-    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-        throws CannotProvideCoderException;
-
-    /**
-     * Returns the {@code Coder} to use by default for output
-     * {@code OutputT} values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being
-     * used for input {@code InputT} values and the enclosing
-     * {@code Pipeline}'s {@code CoderRegistry} to try to infer the
-     * Coder for {@code OutputT} values.
-     */
-    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-        throws CannotProvideCoderException;
-
-    /**
-     * Returns the error message for not supported default values in Combine.globally().
-     */
-    public String getIncompatibleGlobalWindowErrorMessage();
-
-    /**
-     * Returns the default value when there are no values added to the accumulator.
-     */
-    public OutputT defaultValue();
-
-    /**
-     * Converts this {@code GloballyCombineFn} into an equivalent
-     * {@link PerKeyCombineFn} that ignores the keys passed to it and
-     * combines the values according to this {@code GloballyCombineFn}.
-     *
-     * @param <K> the type of the (ignored) keys
-     */
-    public <K> PerKeyCombineFn<K, InputT, AccumT, OutputT> asKeyedFn();
-  }
-
-  /**
-   * A {@code PerKeyCombineFn<K, InputT, AccumT, OutputT>} specifies how to combine
-   * a collection of input values of type {@code InputT}, associated with
-   * a key of type {@code K}, into a single output value of type
-   * {@code OutputT}.  It does this via one or more intermediate mutable
-   * accumulator values of type {@code AccumT}.
-   *
-   * <p>Do not implement this interface directly.
-   * Extends {@link KeyedCombineFn} and {@link KeyedCombineFnWithContext} instead.
-   *
-   * @param <K> type of keys
-   * @param <InputT> type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  public interface PerKeyCombineFn<K, InputT, AccumT, OutputT> extends Serializable {
-    /**
-     * Returns the {@code Coder} to use for accumulator {@code AccumT}
-     * values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being
-     * used for {@code K} keys and input {@code InputT} values and the
-     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
-     * infer the Coder for {@code AccumT} values.
-     *
-     * <p>This is the Coder used to send data through a communication-intensive
-     * shuffle step, so a compact and efficient representation may have
-     * significant performance benefits.
-     */
-    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-        Coder<InputT> inputCoder) throws CannotProvideCoderException;
-
-    /**
-     * Returns the {@code Coder} to use by default for output
-     * {@code OutputT} values, or null if it is not able to be inferred.
-     *
-     * <p>By default, uses the knowledge of the {@code Coder} being
-     * used for {@code K} keys and input {@code InputT} values and the
-     * enclosing {@code Pipeline}'s {@code CoderRegistry} to try to
-     * infer the Coder for {@code OutputT} values.
-     */
-    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
-        Coder<InputT> inputCoder) throws CannotProvideCoderException;
-
-    /**
-     * Returns the a regular {@link GlobalCombineFn} that operates on a specific key.
-     */
-    public abstract GlobalCombineFn<InputT, AccumT, OutputT> forKey(
-        final K key, final Coder<K> keyCoder);
-  }
-
-  /**
-   * An abstract {@link GlobalCombineFn} base class shared by
-   * {@link CombineFn} and {@link CombineFnWithContext}.
-   *
-   * <p>Do not extend this class directly.
-   * Extends {@link CombineFn} and {@link CombineFnWithContext} instead.
-   *
-   * @param <InputT> type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  abstract static class AbstractGlobalCombineFn<InputT, AccumT, OutputT>
-      implements GlobalCombineFn<InputT, AccumT, OutputT>, Serializable {
-    private static final String INCOMPATIBLE_GLOBAL_WINDOW_ERROR_MESSAGE =
-        "Default values are not supported in Combine.globally() if the output "
-        + "PCollection is not windowed by GlobalWindows. Instead, use "
-        + "Combine.globally().withoutDefaults() to output an empty PCollection if the input "
-        + "PCollection is empty, or Combine.globally().asSingletonView() to get the default "
-        + "output of the CombineFn if the input PCollection is empty.";
-
-    @Override
-    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-        throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), AbstractGlobalCombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(getInputTVariable(), inputCoder), getAccumTVariable());
-    }
-
-    @Override
-    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-        throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), AbstractGlobalCombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(getInputTVariable(), inputCoder, getAccumTVariable(),
-              this.getAccumulatorCoder(registry, inputCoder)),
-          getOutputTVariable());
-    }
-
-    @Override
-    public String getIncompatibleGlobalWindowErrorMessage() {
-      return INCOMPATIBLE_GLOBAL_WINDOW_ERROR_MESSAGE;
-    }
-
-    /**
-     * Returns the {@link TypeVariable} of {@code InputT}.
-     */
-    public TypeVariable<?> getInputTVariable() {
-      return (TypeVariable<?>)
-          new TypeDescriptor<InputT>(AbstractGlobalCombineFn.class) {}.getType();
-    }
-
-    /**
-     * Returns the {@link TypeVariable} of {@code AccumT}.
-     */
-    public TypeVariable<?> getAccumTVariable() {
-      return (TypeVariable<?>)
-          new TypeDescriptor<AccumT>(AbstractGlobalCombineFn.class) {}.getType();
-    }
-
-    /**
-     * Returns the {@link TypeVariable} of {@code OutputT}.
-     */
-    public TypeVariable<?> getOutputTVariable() {
-      return (TypeVariable<?>)
-          new TypeDescriptor<OutputT>(AbstractGlobalCombineFn.class) {}.getType();
-    }
-  }
-
-  /**
-   * An abstract {@link PerKeyCombineFn} base class shared by
-   * {@link KeyedCombineFn} and {@link KeyedCombineFnWithContext}.
-   *
-   * <p>Do not extends this class directly.
-   * Extends {@link KeyedCombineFn} and {@link KeyedCombineFnWithContext} instead.
-   *
-   * @param <K> type of keys
-   * @param <InputT> type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  abstract static class AbstractPerKeyCombineFn<K, InputT, AccumT, OutputT>
-      implements PerKeyCombineFn<K, InputT, AccumT, OutputT> {
-    @Override
-    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-        Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), AbstractPerKeyCombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(
-              getKTypeVariable(), keyCoder, getInputTVariable(), inputCoder),
-          getAccumTVariable());
-    }
-
-    @Override
-    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
-        Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return registry.getDefaultCoder(getClass(), AbstractPerKeyCombineFn.class,
-          ImmutableMap.<Type, Coder<?>>of(getKTypeVariable(), keyCoder, getInputTVariable(),
-              inputCoder, getAccumTVariable(),
-              this.getAccumulatorCoder(registry, keyCoder, inputCoder)),
-          getOutputTVariable());
-    }
-
-    /**
-     * Returns the {@link TypeVariable} of {@code K}.
-     */
-    public TypeVariable<?> getKTypeVariable() {
-      return (TypeVariable<?>) new TypeDescriptor<K>(AbstractPerKeyCombineFn.class) {}.getType();
-    }
-
-    /**
-     * Returns the {@link TypeVariable} of {@code InputT}.
-     */
-    public TypeVariable<?> getInputTVariable() {
-      return (TypeVariable<?>)
-          new TypeDescriptor<InputT>(AbstractPerKeyCombineFn.class) {}.getType();
-    }
-
-    /**
-     * Returns the {@link TypeVariable} of {@code AccumT}.
-     */
-    public TypeVariable<?> getAccumTVariable() {
-      return (TypeVariable<?>)
-          new TypeDescriptor<AccumT>(AbstractPerKeyCombineFn.class) {}.getType();
-    }
-
-    /**
-     * Returns the {@link TypeVariable} of {@code OutputT}.
-     */
-    public TypeVariable<?> getOutputTVariable() {
-      return (TypeVariable<?>)
-          new TypeDescriptor<OutputT>(AbstractPerKeyCombineFn.class) {}.getType();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFns.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFns.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFns.java
deleted file mode 100644
index 656c010..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineFns.java
+++ /dev/null
@@ -1,1100 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.GlobalCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.CombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
-/**
- * Static utility methods that create combine function instances.
- */
-public class CombineFns {
-
-  /**
-   * Returns a {@link ComposeKeyedCombineFnBuilder} to construct a composed
-   * {@link PerKeyCombineFn}.
-   *
-   * <p>The same {@link TupleTag} cannot be used in a composition multiple times.
-   *
-   * <p>Example:
-   * <pre>{ @code
-   * PCollection<KV<K, Integer>> latencies = ...;
-   *
-   * TupleTag<Integer> maxLatencyTag = new TupleTag<Integer>();
-   * TupleTag<Double> meanLatencyTag = new TupleTag<Double>();
-   *
-   * SimpleFunction<Integer, Integer> identityFn =
-   *     new SimpleFunction<Integer, Integer>() {
-   *       @Override
-   *       public Integer apply(Integer input) {
-   *           return input;
-   *       }};
-   * PCollection<KV<K, CoCombineResult>> maxAndMean = latencies.apply(
-   *     Combine.perKey(
-   *         CombineFns.composeKeyed()
-   *            .with(identityFn, new MaxIntegerFn(), maxLatencyTag)
-   *            .with(identityFn, new MeanFn<Integer>(), meanLatencyTag)));
-   *
-   * PCollection<T> finalResultCollection = maxAndMean
-   *     .apply(ParDo.of(
-   *         new DoFn<KV<K, CoCombineResult>, T>() {
-   *           @Override
-   *           public void processElement(ProcessContext c) throws Exception {
-   *             KV<K, CoCombineResult> e = c.element();
-   *             Integer maxLatency = e.getValue().get(maxLatencyTag);
-   *             Double meanLatency = e.getValue().get(meanLatencyTag);
-   *             .... Do Something ....
-   *             c.output(...some T...);
-   *           }
-   *         }));
-   * } </pre>
-   */
-  public static ComposeKeyedCombineFnBuilder composeKeyed() {
-    return new ComposeKeyedCombineFnBuilder();
-  }
-
-  /**
-   * Returns a {@link ComposeCombineFnBuilder} to construct a composed
-   * {@link GlobalCombineFn}.
-   *
-   * <p>The same {@link TupleTag} cannot be used in a composition multiple times.
-   *
-   * <p>Example:
-   * <pre>{ @code
-   * PCollection<Integer> globalLatencies = ...;
-   *
-   * TupleTag<Integer> maxLatencyTag = new TupleTag<Integer>();
-   * TupleTag<Double> meanLatencyTag = new TupleTag<Double>();
-   *
-   * SimpleFunction<Integer, Integer> identityFn =
-   *     new SimpleFunction<Integer, Integer>() {
-   *       @Override
-   *       public Integer apply(Integer input) {
-   *           return input;
-   *       }};
-   * PCollection<CoCombineResult> maxAndMean = globalLatencies.apply(
-   *     Combine.globally(
-   *         CombineFns.compose()
-   *            .with(identityFn, new MaxIntegerFn(), maxLatencyTag)
-   *            .with(identityFn, new MeanFn<Integer>(), meanLatencyTag)));
-   *
-   * PCollection<T> finalResultCollection = maxAndMean
-   *     .apply(ParDo.of(
-   *         new DoFn<CoCombineResult, T>() {
-   *           @Override
-   *           public void processElement(ProcessContext c) throws Exception {
-   *             CoCombineResult e = c.element();
-   *             Integer maxLatency = e.get(maxLatencyTag);
-   *             Double meanLatency = e.get(meanLatencyTag);
-   *             .... Do Something ....
-   *             c.output(...some T...);
-   *           }
-   *         }));
-   * } </pre>
-   */
-  public static ComposeCombineFnBuilder compose() {
-    return new ComposeCombineFnBuilder();
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A builder class to construct a composed {@link PerKeyCombineFn}.
-   */
-  public static class ComposeKeyedCombineFnBuilder {
-    /**
-     * Returns a {@link ComposedKeyedCombineFn} that can take additional
-     * {@link PerKeyCombineFn PerKeyCombineFns} and apply them as a single combine function.
-     *
-     * <p>The {@link ComposedKeyedCombineFn} extracts inputs from {@code DataT} with
-     * the {@code extractInputFn} and combines them with the {@code keyedCombineFn},
-     * and then it outputs each combined value with a {@link TupleTag} to a
-     * {@link CoCombineResult}.
-     */
-    public <K, DataT, InputT, OutputT> ComposedKeyedCombineFn<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        KeyedCombineFn<K, InputT, ?, OutputT> keyedCombineFn,
-        TupleTag<OutputT> outputTag) {
-      return new ComposedKeyedCombineFn<DataT, K>()
-          .with(extractInputFn, keyedCombineFn, outputTag);
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFnWithContext} that can take additional
-     * {@link PerKeyCombineFn PerKeyCombineFns} and apply them as a single combine function.
-     *
-     * <p>The {@link ComposedKeyedCombineFnWithContext} extracts inputs from {@code DataT} with
-     * the {@code extractInputFn} and combines them with the {@code keyedCombineFnWithContext},
-     * and then it outputs each combined value with a {@link TupleTag} to a
-     * {@link CoCombineResult}.
-     */
-    public <K, DataT, InputT, OutputT> ComposedKeyedCombineFnWithContext<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        KeyedCombineFnWithContext<K, InputT, ?, OutputT> keyedCombineFnWithContext,
-        TupleTag<OutputT> outputTag) {
-      return new ComposedKeyedCombineFnWithContext<DataT, K>()
-          .with(extractInputFn, keyedCombineFnWithContext, outputTag);
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFn} that can take additional
-     * {@link PerKeyCombineFn PerKeyCombineFns} and apply them as a single combine function.
-     */
-    public <K, DataT, InputT, OutputT> ComposedKeyedCombineFn<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFn<InputT, ?, OutputT> combineFn,
-        TupleTag<OutputT> outputTag) {
-      return with(extractInputFn, combineFn.<K>asKeyedFn(), outputTag);
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFnWithContext} that can take additional
-     * {@link PerKeyCombineFn PerKeyCombineFns} and apply them as a single combine function.
-     */
-    public <K, DataT, InputT, OutputT> ComposedKeyedCombineFnWithContext<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFnWithContext<InputT, ?, OutputT> combineFnWithContext,
-        TupleTag<OutputT> outputTag) {
-      return with(extractInputFn, combineFnWithContext.<K>asKeyedFn(), outputTag);
-    }
-  }
-
-  /**
-   * A builder class to construct a composed {@link GlobalCombineFn}.
-   */
-  public static class ComposeCombineFnBuilder {
-    /**
-     * Returns a {@link ComposedCombineFn} that can take additional
-     * {@link GlobalCombineFn GlobalCombineFns} and apply them as a single combine function.
-     *
-     * <p>The {@link ComposedCombineFn} extracts inputs from {@code DataT} with
-     * the {@code extractInputFn} and combines them with the {@code combineFn},
-     * and then it outputs each combined value with a {@link TupleTag} to a
-     * {@link CoCombineResult}.
-     */
-    public <DataT, InputT, OutputT> ComposedCombineFn<DataT> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFn<InputT, ?, OutputT> combineFn,
-        TupleTag<OutputT> outputTag) {
-      return new ComposedCombineFn<DataT>()
-          .with(extractInputFn, combineFn, outputTag);
-    }
-
-    /**
-     * Returns a {@link ComposedCombineFnWithContext} that can take additional
-     * {@link GlobalCombineFn GlobalCombineFns} and apply them as a single combine function.
-     *
-     * <p>The {@link ComposedCombineFnWithContext} extracts inputs from {@code DataT} with
-     * the {@code extractInputFn} and combines them with the {@code combineFnWithContext},
-     * and then it outputs each combined value with a {@link TupleTag} to a
-     * {@link CoCombineResult}.
-     */
-    public <DataT, InputT, OutputT> ComposedCombineFnWithContext<DataT> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFnWithContext<InputT, ?, OutputT> combineFnWithContext,
-        TupleTag<OutputT> outputTag) {
-      return new ComposedCombineFnWithContext<DataT>()
-          .with(extractInputFn, combineFnWithContext, outputTag);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A tuple of outputs produced by a composed combine functions.
-   *
-   * <p>See {@link #compose()} or {@link #composeKeyed()}) for details.
-   */
-  public static class CoCombineResult implements Serializable {
-
-    private enum NullValue {
-      INSTANCE;
-    }
-
-    private final Map<TupleTag<?>, Object> valuesMap;
-
-    /**
-     * The constructor of {@link CoCombineResult}.
-     *
-     * <p>Null values should have been filtered out from the {@code valuesMap}.
-     * {@link TupleTag TupleTags} that associate with null values doesn't exist in the key set of
-     * {@code valuesMap}.
-     *
-     * @throws NullPointerException if any key or value in {@code valuesMap} is null
-     */
-    CoCombineResult(Map<TupleTag<?>, Object> valuesMap) {
-      ImmutableMap.Builder<TupleTag<?>, Object> builder = ImmutableMap.builder();
-      for (Entry<TupleTag<?>, Object> entry : valuesMap.entrySet()) {
-        if (entry.getValue() != null) {
-          builder.put(entry);
-        } else {
-          builder.put(entry.getKey(), NullValue.INSTANCE);
-        }
-      }
-      this.valuesMap = builder.build();
-    }
-
-    /**
-     * Returns the value represented by the given {@link TupleTag}.
-     *
-     * <p>It is an error to request a non-exist tuple tag from the {@link CoCombineResult}.
-     */
-    @SuppressWarnings("unchecked")
-    public <V> V get(TupleTag<V> tag) {
-      checkArgument(
-          valuesMap.keySet().contains(tag), "TupleTag " + tag + " is not in the CoCombineResult");
-      Object value = valuesMap.get(tag);
-      if (value == NullValue.INSTANCE) {
-        return null;
-      } else {
-        return (V) value;
-      }
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A composed {@link CombineFn} that applies multiple {@link CombineFn CombineFns}.
-   *
-   * <p>For each {@link CombineFn} it extracts inputs from {@code DataT} with
-   * the {@code extractInputFn} and combines them,
-   * and then it outputs each combined value with a {@link TupleTag} to a
-   * {@link CoCombineResult}.
-   */
-  public static class ComposedCombineFn<DataT> extends CombineFn<DataT, Object[], CoCombineResult> {
-
-    private final List<CombineFn<Object, Object, Object>> combineFns;
-    private final List<SerializableFunction<DataT, Object>> extractInputFns;
-    private final List<TupleTag<?>> outputTags;
-    private final int combineFnCount;
-
-    private ComposedCombineFn() {
-      this.extractInputFns = ImmutableList.of();
-      this.combineFns = ImmutableList.of();
-      this.outputTags = ImmutableList.of();
-      this.combineFnCount = 0;
-    }
-
-    private ComposedCombineFn(
-        ImmutableList<SerializableFunction<DataT, ?>> extractInputFns,
-        ImmutableList<CombineFn<?, ?, ?>> combineFns,
-        ImmutableList<TupleTag<?>> outputTags) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<SerializableFunction<DataT, Object>> castedExtractInputFns = (List) extractInputFns;
-      this.extractInputFns = castedExtractInputFns;
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<CombineFn<Object, Object, Object>> castedCombineFns = (List) combineFns;
-      this.combineFns = castedCombineFns;
-
-      this.outputTags = outputTags;
-      this.combineFnCount = this.combineFns.size();
-    }
-
-    /**
-     * Returns a {@link ComposedCombineFn} with an additional {@link CombineFn}.
-     */
-    public <InputT, OutputT> ComposedCombineFn<DataT> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFn<InputT, ?, OutputT> combineFn,
-        TupleTag<OutputT> outputTag) {
-      checkUniqueness(outputTags, outputTag);
-      return new ComposedCombineFn<>(
-          ImmutableList.<SerializableFunction<DataT, ?>>builder()
-              .addAll(extractInputFns)
-              .add(extractInputFn)
-              .build(),
-          ImmutableList.<CombineFn<?, ?, ?>>builder()
-              .addAll(combineFns)
-              .add(combineFn)
-              .build(),
-          ImmutableList.<TupleTag<?>>builder()
-              .addAll(outputTags)
-              .add(outputTag)
-              .build());
-    }
-
-    /**
-     * Returns a {@link ComposedCombineFnWithContext} with an additional
-     * {@link CombineFnWithContext}.
-     */
-    public <InputT, OutputT> ComposedCombineFnWithContext<DataT> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFnWithContext<InputT, ?, OutputT> combineFn,
-        TupleTag<OutputT> outputTag) {
-      checkUniqueness(outputTags, outputTag);
-      List<CombineFnWithContext<Object, Object, Object>> fnsWithContext = Lists.newArrayList();
-      for (CombineFn<Object, Object, Object> fn : combineFns) {
-        fnsWithContext.add(toFnWithContext(fn));
-      }
-      return new ComposedCombineFnWithContext<>(
-          ImmutableList.<SerializableFunction<DataT, ?>>builder()
-              .addAll(extractInputFns)
-              .add(extractInputFn)
-              .build(),
-          ImmutableList.<CombineFnWithContext<?, ?, ?>>builder()
-              .addAll(fnsWithContext)
-              .add(combineFn)
-              .build(),
-          ImmutableList.<TupleTag<?>>builder()
-              .addAll(outputTags)
-              .add(outputTag)
-              .build());
-    }
-
-    @Override
-    public Object[] createAccumulator() {
-      Object[] accumsArray = new Object[combineFnCount];
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumsArray[i] = combineFns.get(i).createAccumulator();
-      }
-      return accumsArray;
-    }
-
-    @Override
-    public Object[] addInput(Object[] accumulator, DataT value) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        Object input = extractInputFns.get(i).apply(value);
-        accumulator[i] = combineFns.get(i).addInput(accumulator[i], input);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Object[] mergeAccumulators(Iterable<Object[]> accumulators) {
-      Iterator<Object[]> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator();
-      } else {
-        // Reuses the first accumulator, and overwrites its values.
-        // It is safe because {@code accum[i]} only depends on
-        // the i-th component of each accumulator.
-        Object[] accum = iter.next();
-        for (int i = 0; i < combineFnCount; ++i) {
-          accum[i] = combineFns.get(i).mergeAccumulators(new ProjectionIterable(accumulators, i));
-        }
-        return accum;
-      }
-    }
-
-    @Override
-    public CoCombineResult extractOutput(Object[] accumulator) {
-      Map<TupleTag<?>, Object> valuesMap = Maps.newHashMap();
-      for (int i = 0; i < combineFnCount; ++i) {
-        valuesMap.put(
-            outputTags.get(i),
-            combineFns.get(i).extractOutput(accumulator[i]));
-      }
-      return new CoCombineResult(valuesMap);
-    }
-
-    @Override
-    public Object[] compact(Object[] accumulator) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumulator[i] = combineFns.get(i).compact(accumulator[i]);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Coder<Object[]> getAccumulatorCoder(CoderRegistry registry, Coder<DataT> dataCoder)
-        throws CannotProvideCoderException {
-      List<Coder<Object>> coders = Lists.newArrayList();
-      for (int i = 0; i < combineFnCount; ++i) {
-        Coder<Object> inputCoder =
-            registry.getDefaultOutputCoder(extractInputFns.get(i), dataCoder);
-        coders.add(combineFns.get(i).getAccumulatorCoder(registry, inputCoder));
-      }
-      return new ComposedAccumulatorCoder(coders);
-    }
-  }
-
-  /**
-   * A composed {@link CombineFnWithContext} that applies multiple
-   * {@link CombineFnWithContext CombineFnWithContexts}.
-   *
-   * <p>For each {@link CombineFnWithContext} it extracts inputs from {@code DataT} with
-   * the {@code extractInputFn} and combines them,
-   * and then it outputs each combined value with a {@link TupleTag} to a
-   * {@link CoCombineResult}.
-   */
-  public static class ComposedCombineFnWithContext<DataT>
-      extends CombineFnWithContext<DataT, Object[], CoCombineResult> {
-
-    private final List<SerializableFunction<DataT, Object>> extractInputFns;
-    private final List<CombineFnWithContext<Object, Object, Object>> combineFnWithContexts;
-    private final List<TupleTag<?>> outputTags;
-    private final int combineFnCount;
-
-    private ComposedCombineFnWithContext() {
-      this.extractInputFns = ImmutableList.of();
-      this.combineFnWithContexts = ImmutableList.of();
-      this.outputTags = ImmutableList.of();
-      this.combineFnCount = 0;
-    }
-
-    private ComposedCombineFnWithContext(
-        ImmutableList<SerializableFunction<DataT, ?>> extractInputFns,
-        ImmutableList<CombineFnWithContext<?, ?, ?>> combineFnWithContexts,
-        ImmutableList<TupleTag<?>> outputTags) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<SerializableFunction<DataT, Object>> castedExtractInputFns =
-          (List) extractInputFns;
-      this.extractInputFns = castedExtractInputFns;
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      List<CombineFnWithContext<Object, Object, Object>> castedCombineFnWithContexts
-          = (List) combineFnWithContexts;
-      this.combineFnWithContexts = castedCombineFnWithContexts;
-
-      this.outputTags = outputTags;
-      this.combineFnCount = this.combineFnWithContexts.size();
-    }
-
-    /**
-     * Returns a {@link ComposedCombineFnWithContext} with an additional {@link GlobalCombineFn}.
-     */
-    public <InputT, OutputT> ComposedCombineFnWithContext<DataT> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        GlobalCombineFn<InputT, ?, OutputT> globalCombineFn,
-        TupleTag<OutputT> outputTag) {
-      checkUniqueness(outputTags, outputTag);
-      return new ComposedCombineFnWithContext<>(
-          ImmutableList.<SerializableFunction<DataT, ?>>builder()
-              .addAll(extractInputFns)
-              .add(extractInputFn)
-              .build(),
-          ImmutableList.<CombineFnWithContext<?, ?, ?>>builder()
-              .addAll(combineFnWithContexts)
-              .add(toFnWithContext(globalCombineFn))
-              .build(),
-          ImmutableList.<TupleTag<?>>builder()
-              .addAll(outputTags)
-              .add(outputTag)
-              .build());
-    }
-
-    @Override
-    public Object[] createAccumulator(Context c) {
-      Object[] accumsArray = new Object[combineFnCount];
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumsArray[i] = combineFnWithContexts.get(i).createAccumulator(c);
-      }
-      return accumsArray;
-    }
-
-    @Override
-    public Object[] addInput(Object[] accumulator, DataT value, Context c) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        Object input = extractInputFns.get(i).apply(value);
-        accumulator[i] = combineFnWithContexts.get(i).addInput(accumulator[i], input, c);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Object[] mergeAccumulators(Iterable<Object[]> accumulators, Context c) {
-      Iterator<Object[]> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator(c);
-      } else {
-        // Reuses the first accumulator, and overwrites its values.
-        // It is safe because {@code accum[i]} only depends on
-        // the i-th component of each accumulator.
-        Object[] accum = iter.next();
-        for (int i = 0; i < combineFnCount; ++i) {
-          accum[i] = combineFnWithContexts.get(i).mergeAccumulators(
-              new ProjectionIterable(accumulators, i), c);
-        }
-        return accum;
-      }
-    }
-
-    @Override
-    public CoCombineResult extractOutput(Object[] accumulator, Context c) {
-      Map<TupleTag<?>, Object> valuesMap = Maps.newHashMap();
-      for (int i = 0; i < combineFnCount; ++i) {
-        valuesMap.put(
-            outputTags.get(i),
-            combineFnWithContexts.get(i).extractOutput(accumulator[i], c));
-      }
-      return new CoCombineResult(valuesMap);
-    }
-
-    @Override
-    public Object[] compact(Object[] accumulator, Context c) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumulator[i] = combineFnWithContexts.get(i).compact(accumulator[i], c);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Coder<Object[]> getAccumulatorCoder(CoderRegistry registry, Coder<DataT> dataCoder)
-        throws CannotProvideCoderException {
-      List<Coder<Object>> coders = Lists.newArrayList();
-      for (int i = 0; i < combineFnCount; ++i) {
-        Coder<Object> inputCoder =
-            registry.getDefaultOutputCoder(extractInputFns.get(i), dataCoder);
-        coders.add(combineFnWithContexts.get(i).getAccumulatorCoder(registry, inputCoder));
-      }
-      return new ComposedAccumulatorCoder(coders);
-    }
-  }
-
-  /**
-   * A composed {@link KeyedCombineFn} that applies multiple {@link KeyedCombineFn KeyedCombineFns}.
-   *
-   * <p>For each {@link KeyedCombineFn} it extracts inputs from {@code DataT} with
-   * the {@code extractInputFn} and combines them,
-   * and then it outputs each combined value with a {@link TupleTag} to a
-   * {@link CoCombineResult}.
-   */
-  public static class ComposedKeyedCombineFn<DataT, K>
-      extends KeyedCombineFn<K, DataT, Object[], CoCombineResult> {
-
-    private final List<SerializableFunction<DataT, Object>> extractInputFns;
-    private final List<KeyedCombineFn<K, Object, Object, Object>> keyedCombineFns;
-    private final List<TupleTag<?>> outputTags;
-    private final int combineFnCount;
-
-    private ComposedKeyedCombineFn() {
-      this.extractInputFns = ImmutableList.of();
-      this.keyedCombineFns = ImmutableList.of();
-      this.outputTags = ImmutableList.of();
-      this.combineFnCount = 0;
-    }
-
-    private ComposedKeyedCombineFn(
-        ImmutableList<SerializableFunction<DataT, ?>> extractInputFns,
-        ImmutableList<KeyedCombineFn<K, ?, ?, ?>> keyedCombineFns,
-        ImmutableList<TupleTag<?>> outputTags) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<SerializableFunction<DataT, Object>> castedExtractInputFns = (List) extractInputFns;
-      this.extractInputFns = castedExtractInputFns;
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<KeyedCombineFn<K, Object, Object, Object>> castedKeyedCombineFns =
-          (List) keyedCombineFns;
-      this.keyedCombineFns = castedKeyedCombineFns;
-      this.outputTags = outputTags;
-      this.combineFnCount = this.keyedCombineFns.size();
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFn} with an additional {@link KeyedCombineFn}.
-     */
-    public <InputT, OutputT> ComposedKeyedCombineFn<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        KeyedCombineFn<K, InputT, ?, OutputT> keyedCombineFn,
-        TupleTag<OutputT> outputTag) {
-      checkUniqueness(outputTags, outputTag);
-      return new ComposedKeyedCombineFn<>(
-          ImmutableList.<SerializableFunction<DataT, ?>>builder()
-          .addAll(extractInputFns)
-          .add(extractInputFn)
-          .build(),
-      ImmutableList.<KeyedCombineFn<K, ?, ?, ?>>builder()
-          .addAll(keyedCombineFns)
-          .add(keyedCombineFn)
-          .build(),
-      ImmutableList.<TupleTag<?>>builder()
-          .addAll(outputTags)
-          .add(outputTag)
-          .build());
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFnWithContext} with an additional
-     * {@link KeyedCombineFnWithContext}.
-     */
-    public <InputT, OutputT> ComposedKeyedCombineFnWithContext<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        KeyedCombineFnWithContext<K, InputT, ?, OutputT> keyedCombineFn,
-        TupleTag<OutputT> outputTag) {
-      checkUniqueness(outputTags, outputTag);
-      List<KeyedCombineFnWithContext<K, Object, Object, Object>> fnsWithContext =
-          Lists.newArrayList();
-      for (KeyedCombineFn<K, Object, Object, Object> fn : keyedCombineFns) {
-        fnsWithContext.add(toFnWithContext(fn));
-      }
-      return new ComposedKeyedCombineFnWithContext<>(
-          ImmutableList.<SerializableFunction<DataT, ?>>builder()
-          .addAll(extractInputFns)
-          .add(extractInputFn)
-          .build(),
-      ImmutableList.<KeyedCombineFnWithContext<K, ?, ?, ?>>builder()
-          .addAll(fnsWithContext)
-          .add(keyedCombineFn)
-          .build(),
-      ImmutableList.<TupleTag<?>>builder()
-          .addAll(outputTags)
-          .add(outputTag)
-          .build());
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFn} with an additional {@link CombineFn}.
-     */
-    public <InputT, OutputT> ComposedKeyedCombineFn<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFn<InputT, ?, OutputT> keyedCombineFn,
-        TupleTag<OutputT> outputTag) {
-      return with(extractInputFn, keyedCombineFn.<K>asKeyedFn(), outputTag);
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFnWithContext} with an additional
-     * {@link CombineFnWithContext}.
-     */
-    public <InputT, OutputT> ComposedKeyedCombineFnWithContext<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        CombineFnWithContext<InputT, ?, OutputT> keyedCombineFn,
-        TupleTag<OutputT> outputTag) {
-      return with(extractInputFn, keyedCombineFn.<K>asKeyedFn(), outputTag);
-    }
-
-    @Override
-    public Object[] createAccumulator(K key) {
-      Object[] accumsArray = new Object[combineFnCount];
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumsArray[i] = keyedCombineFns.get(i).createAccumulator(key);
-      }
-      return accumsArray;
-    }
-
-    @Override
-    public Object[] addInput(K key, Object[] accumulator, DataT value) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        Object input = extractInputFns.get(i).apply(value);
-        accumulator[i] = keyedCombineFns.get(i).addInput(key, accumulator[i], input);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Object[] mergeAccumulators(K key, final Iterable<Object[]> accumulators) {
-      Iterator<Object[]> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator(key);
-      } else {
-        // Reuses the first accumulator, and overwrites its values.
-        // It is safe because {@code accum[i]} only depends on
-        // the i-th component of each accumulator.
-        Object[] accum = iter.next();
-        for (int i = 0; i < combineFnCount; ++i) {
-          accum[i] = keyedCombineFns.get(i).mergeAccumulators(
-              key, new ProjectionIterable(accumulators, i));
-        }
-        return accum;
-      }
-    }
-
-    @Override
-    public CoCombineResult extractOutput(K key, Object[] accumulator) {
-      Map<TupleTag<?>, Object> valuesMap = Maps.newHashMap();
-      for (int i = 0; i < combineFnCount; ++i) {
-        valuesMap.put(
-            outputTags.get(i),
-            keyedCombineFns.get(i).extractOutput(key, accumulator[i]));
-      }
-      return new CoCombineResult(valuesMap);
-    }
-
-    @Override
-    public Object[] compact(K key, Object[] accumulator) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumulator[i] = keyedCombineFns.get(i).compact(key, accumulator[i]);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Coder<Object[]> getAccumulatorCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<DataT> dataCoder)
-        throws CannotProvideCoderException {
-      List<Coder<Object>> coders = Lists.newArrayList();
-      for (int i = 0; i < combineFnCount; ++i) {
-        Coder<Object> inputCoder =
-            registry.getDefaultOutputCoder(extractInputFns.get(i), dataCoder);
-        coders.add(keyedCombineFns.get(i).getAccumulatorCoder(registry, keyCoder, inputCoder));
-      }
-      return new ComposedAccumulatorCoder(coders);
-    }
-  }
-
-  /**
-   * A composed {@link KeyedCombineFnWithContext} that applies multiple
-   * {@link KeyedCombineFnWithContext KeyedCombineFnWithContexts}.
-   *
-   * <p>For each {@link KeyedCombineFnWithContext} it extracts inputs from {@code DataT} with
-   * the {@code extractInputFn} and combines them,
-   * and then it outputs each combined value with a {@link TupleTag} to a
-   * {@link CoCombineResult}.
-   */
-  public static class ComposedKeyedCombineFnWithContext<DataT, K>
-      extends KeyedCombineFnWithContext<K, DataT, Object[], CoCombineResult> {
-
-    private final List<SerializableFunction<DataT, Object>> extractInputFns;
-    private final List<KeyedCombineFnWithContext<K, Object, Object, Object>> keyedCombineFns;
-    private final List<TupleTag<?>> outputTags;
-    private final int combineFnCount;
-
-    private ComposedKeyedCombineFnWithContext() {
-      this.extractInputFns = ImmutableList.of();
-      this.keyedCombineFns = ImmutableList.of();
-      this.outputTags = ImmutableList.of();
-      this.combineFnCount = 0;
-    }
-
-    private ComposedKeyedCombineFnWithContext(
-        ImmutableList<SerializableFunction<DataT, ?>> extractInputFns,
-        ImmutableList<KeyedCombineFnWithContext<K, ?, ?, ?>> keyedCombineFns,
-        ImmutableList<TupleTag<?>> outputTags) {
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<SerializableFunction<DataT, Object>> castedExtractInputFns =
-          (List) extractInputFns;
-      this.extractInputFns = castedExtractInputFns;
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      List<KeyedCombineFnWithContext<K, Object, Object, Object>> castedKeyedCombineFns =
-          (List) keyedCombineFns;
-      this.keyedCombineFns = castedKeyedCombineFns;
-      this.outputTags = outputTags;
-      this.combineFnCount = this.keyedCombineFns.size();
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFnWithContext} with an additional
-     * {@link PerKeyCombineFn}.
-     */
-    public <InputT, OutputT> ComposedKeyedCombineFnWithContext<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        PerKeyCombineFn<K, InputT, ?, OutputT> perKeyCombineFn,
-        TupleTag<OutputT> outputTag) {
-      checkUniqueness(outputTags, outputTag);
-      return new ComposedKeyedCombineFnWithContext<>(
-          ImmutableList.<SerializableFunction<DataT, ?>>builder()
-              .addAll(extractInputFns)
-              .add(extractInputFn)
-              .build(),
-          ImmutableList.<KeyedCombineFnWithContext<K, ?, ?, ?>>builder()
-              .addAll(keyedCombineFns)
-              .add(toFnWithContext(perKeyCombineFn))
-              .build(),
-          ImmutableList.<TupleTag<?>>builder()
-              .addAll(outputTags)
-              .add(outputTag)
-              .build());
-    }
-
-    /**
-     * Returns a {@link ComposedKeyedCombineFnWithContext} with an additional
-     * {@link GlobalCombineFn}.
-     */
-    public <InputT, OutputT> ComposedKeyedCombineFnWithContext<DataT, K> with(
-        SimpleFunction<DataT, InputT> extractInputFn,
-        GlobalCombineFn<InputT, ?, OutputT> perKeyCombineFn,
-        TupleTag<OutputT> outputTag) {
-      return with(extractInputFn, perKeyCombineFn.<K>asKeyedFn(), outputTag);
-    }
-
-    @Override
-    public Object[] createAccumulator(K key, Context c) {
-      Object[] accumsArray = new Object[combineFnCount];
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumsArray[i] = keyedCombineFns.get(i).createAccumulator(key, c);
-      }
-      return accumsArray;
-    }
-
-    @Override
-    public Object[] addInput(K key, Object[] accumulator, DataT value, Context c) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        Object input = extractInputFns.get(i).apply(value);
-        accumulator[i] = keyedCombineFns.get(i).addInput(key, accumulator[i], input, c);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Object[] mergeAccumulators(K key, Iterable<Object[]> accumulators, Context c) {
-      Iterator<Object[]> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator(key, c);
-      } else {
-        // Reuses the first accumulator, and overwrites its values.
-        // It is safe because {@code accum[i]} only depends on
-        // the i-th component of each accumulator.
-        Object[] accum = iter.next();
-        for (int i = 0; i < combineFnCount; ++i) {
-          accum[i] = keyedCombineFns.get(i).mergeAccumulators(
-              key, new ProjectionIterable(accumulators, i), c);
-        }
-        return accum;
-      }
-    }
-
-    @Override
-    public CoCombineResult extractOutput(K key, Object[] accumulator, Context c) {
-      Map<TupleTag<?>, Object> valuesMap = Maps.newHashMap();
-      for (int i = 0; i < combineFnCount; ++i) {
-        valuesMap.put(
-            outputTags.get(i),
-            keyedCombineFns.get(i).extractOutput(key, accumulator[i], c));
-      }
-      return new CoCombineResult(valuesMap);
-    }
-
-    @Override
-    public Object[] compact(K key, Object[] accumulator, Context c) {
-      for (int i = 0; i < combineFnCount; ++i) {
-        accumulator[i] = keyedCombineFns.get(i).compact(key, accumulator[i], c);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Coder<Object[]> getAccumulatorCoder(
-        CoderRegistry registry, Coder<K> keyCoder, Coder<DataT> dataCoder)
-        throws CannotProvideCoderException {
-      List<Coder<Object>> coders = Lists.newArrayList();
-      for (int i = 0; i < combineFnCount; ++i) {
-        Coder<Object> inputCoder =
-            registry.getDefaultOutputCoder(extractInputFns.get(i), dataCoder);
-        coders.add(keyedCombineFns.get(i).getAccumulatorCoder(
-            registry, keyCoder, inputCoder));
-      }
-      return new ComposedAccumulatorCoder(coders);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static class ProjectionIterable implements Iterable<Object> {
-    private final Iterable<Object[]> iterable;
-    private final int column;
-
-    private ProjectionIterable(Iterable<Object[]> iterable, int column) {
-      this.iterable = iterable;
-      this.column = column;
-    }
-
-    @Override
-    public Iterator<Object> iterator() {
-      final Iterator<Object[]> iter = iterable.iterator();
-      return new Iterator<Object>() {
-        @Override
-        public boolean hasNext() {
-          return iter.hasNext();
-        }
-
-        @Override
-        public Object next() {
-          return iter.next()[column];
-        }
-
-        @Override
-        public void remove() {
-            throw new UnsupportedOperationException();
-        }
-      };
-    }
-  }
-
-  private static class ComposedAccumulatorCoder extends StandardCoder<Object[]> {
-    private List<Coder<Object>> coders;
-    private int codersCount;
-
-    public ComposedAccumulatorCoder(List<Coder<Object>> coders) {
-      this.coders = ImmutableList.copyOf(coders);
-      this.codersCount  = coders.size();
-    }
-
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    @JsonCreator
-    public static ComposedAccumulatorCoder of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<?>> components) {
-      return new ComposedAccumulatorCoder((List) components);
-    }
-
-    @Override
-    public void encode(Object[] value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      checkArgument(value.length == codersCount);
-      Context nestedContext = context.nested();
-      for (int i = 0; i < codersCount; ++i) {
-        coders.get(i).encode(value[i], outStream, nestedContext);
-      }
-    }
-
-    @Override
-    public Object[] decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      Object[] ret = new Object[codersCount];
-      Context nestedContext = context.nested();
-      for (int i = 0; i < codersCount; ++i) {
-        ret[i] = coders.get(i).decode(inStream, nestedContext);
-      }
-      return ret;
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return coders;
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      for (int i = 0; i < codersCount; ++i) {
-        coders.get(i).verifyDeterministic();
-      }
-    }
-  }
-
-  @SuppressWarnings("unchecked")
-  private static <InputT, AccumT, OutputT> CombineFnWithContext<InputT, AccumT, OutputT>
-  toFnWithContext(GlobalCombineFn<InputT, AccumT, OutputT> globalCombineFn) {
-    if (globalCombineFn instanceof CombineFnWithContext) {
-      return (CombineFnWithContext<InputT, AccumT, OutputT>) globalCombineFn;
-    } else {
-      final CombineFn<InputT, AccumT, OutputT> combineFn =
-          (CombineFn<InputT, AccumT, OutputT>) globalCombineFn;
-      return new CombineFnWithContext<InputT, AccumT, OutputT>() {
-        @Override
-        public AccumT createAccumulator(Context c) {
-          return combineFn.createAccumulator();
-        }
-        @Override
-        public AccumT addInput(AccumT accumulator, InputT input, Context c) {
-          return combineFn.addInput(accumulator, input);
-        }
-        @Override
-        public AccumT mergeAccumulators(Iterable<AccumT> accumulators, Context c) {
-          return combineFn.mergeAccumulators(accumulators);
-        }
-        @Override
-        public OutputT extractOutput(AccumT accumulator, Context c) {
-          return combineFn.extractOutput(accumulator);
-        }
-        @Override
-        public AccumT compact(AccumT accumulator, Context c) {
-          return combineFn.compact(accumulator);
-        }
-        @Override
-        public OutputT defaultValue() {
-          return combineFn.defaultValue();
-        }
-        @Override
-        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-            throws CannotProvideCoderException {
-          return combineFn.getAccumulatorCoder(registry, inputCoder);
-        }
-        @Override
-        public Coder<OutputT> getDefaultOutputCoder(
-            CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return combineFn.getDefaultOutputCoder(registry, inputCoder);
-        }
-      };
-    }
-  }
-
-  private static <K, InputT, AccumT, OutputT> KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>
-  toFnWithContext(PerKeyCombineFn<K, InputT, AccumT, OutputT> perKeyCombineFn) {
-    if (perKeyCombineFn instanceof KeyedCombineFnWithContext) {
-      @SuppressWarnings("unchecked")
-      KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> keyedCombineFnWithContext =
-          (KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>) perKeyCombineFn;
-      return keyedCombineFnWithContext;
-    } else {
-      @SuppressWarnings("unchecked")
-      final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn =
-          (KeyedCombineFn<K, InputT, AccumT, OutputT>) perKeyCombineFn;
-      return new KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>() {
-        @Override
-        public AccumT createAccumulator(K key, Context c) {
-          return keyedCombineFn.createAccumulator(key);
-        }
-        @Override
-        public AccumT addInput(K key, AccumT accumulator, InputT value, Context c) {
-          return keyedCombineFn.addInput(key, accumulator, value);
-        }
-        @Override
-        public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, Context c) {
-          return keyedCombineFn.mergeAccumulators(key, accumulators);
-        }
-        @Override
-        public OutputT extractOutput(K key, AccumT accumulator, Context c) {
-          return keyedCombineFn.extractOutput(key, accumulator);
-        }
-        @Override
-        public AccumT compact(K key, AccumT accumulator, Context c) {
-          return keyedCombineFn.compact(key, accumulator);
-        }
-        @Override
-        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-            Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return keyedCombineFn.getAccumulatorCoder(registry, keyCoder, inputCoder);
-        }
-        @Override
-        public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
-            Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return keyedCombineFn.getDefaultOutputCoder(registry, keyCoder, inputCoder);
-        }
-      };
-    }
-  }
-
-  private static <OutputT> void checkUniqueness(
-      List<TupleTag<?>> registeredTags, TupleTag<OutputT> outputTag) {
-    checkArgument(
-        !registeredTags.contains(outputTag),
-        "Cannot compose with tuple tag %s because it is already present in the composition.",
-        outputTag);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java
deleted file mode 100644
index fdf56e3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/CombineWithContext.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-/**
- * This class contains combine functions that have access to {@code PipelineOptions} and side inputs
- * through {@code CombineWithContext.Context}.
- *
- * <p>{@link CombineFnWithContext} and {@link KeyedCombineFnWithContext} are for users to extend.
- */
-public class CombineWithContext {
-
-  /**
-   * Information accessible to all methods in {@code CombineFnWithContext}
-   * and {@code KeyedCombineFnWithContext}.
-   */
-  public abstract static class Context {
-    /**
-     * Returns the {@code PipelineOptions} specified with the
-     * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
-     * invoking this {@code KeyedCombineFn}.
-     */
-    public abstract PipelineOptions getPipelineOptions();
-
-    /**
-     * Returns the value of the side input for the window corresponding to the
-     * window of the main input element.
-     */
-    public abstract <T> T sideInput(PCollectionView<T> view);
-  }
-
-  /**
-   * An internal interface for signaling that a {@code GloballyCombineFn}
-   * or a {@code PerKeyCombineFn} needs to access {@code CombineWithContext.Context}.
-   *
-   * <p>For internal use only.
-   */
-  public interface RequiresContextInternal {}
-
-  /**
-   * A combine function that has access to {@code PipelineOptions} and side inputs through
-   * {@code CombineWithContext.Context}.
-   *
-   * See the equivalent {@link CombineFn} for details about combine functions.
-   */
-  public abstract static class CombineFnWithContext<InputT, AccumT, OutputT>
-      extends CombineFnBase.AbstractGlobalCombineFn<InputT, AccumT, OutputT>
-      implements RequiresContextInternal {
-    /**
-     * Returns a new, mutable accumulator value, representing the accumulation of zero input values.
-     *
-     * <p>It is equivalent to {@link CombineFn#createAccumulator}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public abstract AccumT createAccumulator(Context c);
-
-    /**
-     * Adds the given input value to the given accumulator, returning the
-     * new accumulator value.
-     *
-     * <p>It is equivalent to {@link CombineFn#addInput}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public abstract AccumT addInput(AccumT accumulator, InputT input, Context c);
-
-    /**
-     * Returns an accumulator representing the accumulation of all the
-     * input values accumulated in the merging accumulators.
-     *
-     * <p>It is equivalent to {@link CombineFn#mergeAccumulators}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public abstract AccumT mergeAccumulators(Iterable<AccumT> accumulators, Context c);
-
-    /**
-     * Returns the output value that is the result of combining all
-     * the input values represented by the given accumulator.
-     *
-     * <p>It is equivalent to {@link CombineFn#extractOutput}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public abstract OutputT extractOutput(AccumT accumulator, Context c);
-
-    /**
-     * Returns an accumulator that represents the same logical value as the
-     * input accumulator, but may have a more compact representation.
-     *
-     * <p>It is equivalent to {@link CombineFn#compact}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public AccumT compact(AccumT accumulator, Context c) {
-      return accumulator;
-    }
-
-    @Override
-    public OutputT defaultValue() {
-      throw new UnsupportedOperationException(
-          "Override this function to provide the default value.");
-    }
-
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    @Override
-    public <K> KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> asKeyedFn() {
-      // The key, an object, is never even looked at.
-      return new KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>() {
-        @Override
-        public AccumT createAccumulator(K key, Context c) {
-          return CombineFnWithContext.this.createAccumulator(c);
-        }
-
-        @Override
-        public AccumT addInput(K key, AccumT accumulator, InputT input, Context c) {
-          return CombineFnWithContext.this.addInput(accumulator, input, c);
-        }
-
-        @Override
-        public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, Context c) {
-          return CombineFnWithContext.this.mergeAccumulators(accumulators, c);
-        }
-
-        @Override
-        public OutputT extractOutput(K key, AccumT accumulator, Context c) {
-          return CombineFnWithContext.this.extractOutput(accumulator, c);
-        }
-
-        @Override
-        public AccumT compact(K key, AccumT accumulator, Context c) {
-          return CombineFnWithContext.this.compact(accumulator, c);
-        }
-
-        @Override
-        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-            Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return CombineFnWithContext.this.getAccumulatorCoder(registry, inputCoder);
-        }
-
-        @Override
-        public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
-            Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return CombineFnWithContext.this.getDefaultOutputCoder(registry, inputCoder);
-        }
-
-        @Override
-        public CombineFnWithContext<InputT, AccumT, OutputT> forKey(K key, Coder<K> keyCoder) {
-          return CombineFnWithContext.this;
-        }
-      };
-    }
-  }
-
-  /**
-   * A keyed combine function that has access to {@code PipelineOptions} and side inputs through
-   * {@code CombineWithContext.Context}.
-   *
-   * See the equivalent {@link KeyedCombineFn} for details about keyed combine functions.
-   */
-  public abstract static class KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>
-      extends CombineFnBase.AbstractPerKeyCombineFn<K, InputT, AccumT, OutputT>
-      implements RequiresContextInternal {
-    /**
-     * Returns a new, mutable accumulator value representing the accumulation of zero input values.
-     *
-     * <p>It is equivalent to {@link KeyedCombineFn#createAccumulator},
-     * but it has additional access to {@code CombineWithContext.Context}.
-     */
-    public abstract AccumT createAccumulator(K key, Context c);
-
-    /**
-     * Adds the given input value to the given accumulator, returning the new accumulator value.
-     *
-     * <p>It is equivalent to {@link KeyedCombineFn#addInput}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public abstract AccumT addInput(K key, AccumT accumulator, InputT value, Context c);
-
-    /**
-     * Returns an accumulator representing the accumulation of all the
-     * input values accumulated in the merging accumulators.
-     *
-     * <p>It is equivalent to {@link KeyedCombineFn#mergeAccumulators},
-     * but it has additional access to {@code CombineWithContext.Context}..
-     */
-    public abstract AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, Context c);
-
-    /**
-     * Returns the output value that is the result of combining all
-     * the input values represented by the given accumulator.
-     *
-     * <p>It is equivalent to {@link KeyedCombineFn#extractOutput}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public abstract OutputT extractOutput(K key, AccumT accumulator, Context c);
-
-    /**
-     * Returns an accumulator that represents the same logical value as the
-     * input accumulator, but may have a more compact representation.
-     *
-     * <p>It is equivalent to {@link KeyedCombineFn#compact}, but it has additional access to
-     * {@code CombineWithContext.Context}.
-     */
-    public AccumT compact(K key, AccumT accumulator, Context c) {
-      return accumulator;
-    }
-
-    /**
-     * Applies this {@code KeyedCombineFnWithContext} to a key and a collection
-     * of input values to produce a combined output value.
-     */
-    public OutputT apply(K key, Iterable<? extends InputT> inputs, Context c) {
-      AccumT accum = createAccumulator(key, c);
-      for (InputT input : inputs) {
-        accum = addInput(key, accum, input, c);
-      }
-      return extractOutput(key, accum, c);
-    }
-
-    @Override
-    public CombineFnWithContext<InputT, AccumT, OutputT> forKey(
-        final K key, final Coder<K> keyCoder) {
-      return new CombineFnWithContext<InputT, AccumT, OutputT>() {
-        @Override
-        public AccumT createAccumulator(Context c) {
-          return KeyedCombineFnWithContext.this.createAccumulator(key, c);
-        }
-
-        @Override
-        public AccumT addInput(AccumT accumulator, InputT input, Context c) {
-          return KeyedCombineFnWithContext.this.addInput(key, accumulator, input, c);
-        }
-
-        @Override
-        public AccumT mergeAccumulators(Iterable<AccumT> accumulators, Context c) {
-          return KeyedCombineFnWithContext.this.mergeAccumulators(key, accumulators, c);
-        }
-
-        @Override
-        public OutputT extractOutput(AccumT accumulator, Context c) {
-          return KeyedCombineFnWithContext.this.extractOutput(key, accumulator, c);
-        }
-
-        @Override
-        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-            throws CannotProvideCoderException {
-          return KeyedCombineFnWithContext.this.getAccumulatorCoder(registry, keyCoder, inputCoder);
-        }
-
-        @Override
-        public Coder<OutputT> getDefaultOutputCoder(
-            CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return KeyedCombineFnWithContext.this.getDefaultOutputCoder(
-              registry, keyCoder, inputCoder);
-        }
-      };
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
deleted file mode 100644
index ffa11d1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Count.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-/**
- * {@code PTransorm}s to count the elements in a {@link PCollection}.
- *
- * <p>{@link Count#perElement()} can be used to count the number of occurrences of each
- * distinct element in the PCollection, {@link Count#perKey()} can be used to count the
- * number of values per key, and {@link Count#globally()} can be used to count the total
- * number of elements in a PCollection.
- */
-public class Count {
-  private Count() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns a {@link Combine.Globally} {@link PTransform} that counts the number of elements in
-   * its input {@link PCollection}.
-   */
-  public static <T> Combine.Globally<T, Long> globally() {
-    return Combine.globally(new CountFn<T>()).named("Count.Globally");
-  }
-
-  /**
-   * Returns a {@link Combine.PerKey} {@link PTransform} that counts the number of elements
-   * associated with each key of its input {@link PCollection}.
-   */
-  public static <K, V> Combine.PerKey<K, V, Long> perKey() {
-    return Combine.<K, V, Long>perKey(new CountFn<V>()).named("Count.PerKey");
-  }
-
-  /**
-   * Returns a {@link PerElement Count.PerElement} {@link PTransform} that counts the number of
-   * occurrences of each element in its input {@link PCollection}.
-   *
-   * <p>See {@link PerElement Count.PerElement} for more details.
-   */
-  public static <T> PerElement<T> perElement() {
-    return new PerElement<>();
-  }
-
-  /**
-   * {@code Count.PerElement<T>} takes a {@code PCollection<T>} and returns a
-   * {@code PCollection<KV<T, Long>>} representing a map from each distinct element of the input
-   * {@code PCollection} to the number of times that element occurs in the input. Each key in the
-   * output {@code PCollection} is unique.
-   *
-   * <p>This transform compares two values of type {@code T} by first encoding each element using
-   * the input {@code PCollection}'s {@code Coder}, then comparing the encoded bytes. Because of
-   * this, the input coder must be deterministic.
-   * (See {@link com.google.cloud.dataflow.sdk.coders.Coder#verifyDeterministic()} for more detail).
-   * Performing the comparison in this manner admits efficient parallel evaluation.
-   *
-   * <p>By default, the {@code Coder} of the keys of the output {@code PCollection} is the same as
-   * the {@code Coder} of the elements of the input {@code PCollection}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<String> words = ...;
-   * PCollection<KV<String, Long>> wordCounts =
-   *     words.apply(Count.<String>perElement());
-   * } </pre>
-   *
-   * @param <T> the type of the elements of the input {@code PCollection}, and the type of the keys
-   * of the output {@code PCollection}
-   */
-  public static class PerElement<T>
-      extends PTransform<PCollection<T>, PCollection<KV<T, Long>>> {
-
-    public PerElement() { }
-
-    @Override
-    public PCollection<KV<T, Long>> apply(PCollection<T> input) {
-      return
-          input
-          .apply(ParDo.named("Init").of(new DoFn<T, KV<T, Void>>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              c.output(KV.of(c.element(), (Void) null));
-            }
-          }))
-          .apply(Count.<T, Void>perKey());
-    }
-  }
-
-  /**
-   * A {@link CombineFn} that counts elements.
-   */
-  private static class CountFn<T> extends CombineFn<T, Long, Long> {
-
-    @Override
-    public Long createAccumulator() {
-      return 0L;
-    }
-
-    @Override
-    public Long addInput(Long accumulator, T input) {
-      return accumulator + 1;
-    }
-
-    @Override
-    public Long mergeAccumulators(Iterable<Long> accumulators) {
-      long result = 0L;
-      for (Long accum : accumulators) {
-        result += accum;
-      }
-      return result;
-    }
-
-    @Override
-    public Long extractOutput(Long accumulator) {
-      return accumulator;
-    }
-  }
-}

[16/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
deleted file mode 100644
index 18f7a97..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PaneInfo.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableMap;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Objects;
-
-/**
- * Provides information about the pane an element belongs to. Every pane is implicitly associated
- * with a window. Panes are observable only via the
- * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.ProcessContext#pane} method of the context
- * passed to a {@link DoFn#processElement} overridden method.
- *
- * <p>Note: This does not uniquely identify a pane, and should not be used for comparisons.
- */
-public final class PaneInfo {
-  /**
-   * Enumerates the possibilities for the timing of this pane firing related to the
-   * input and output watermarks for its computation.
-   *
-   * <p>A window may fire multiple panes, and the timing of those panes generally follows the
-   * regular expression {@code EARLY* ON_TIME? LATE*}. Generally a pane is considered:
-   * <ol>
-   * <li>{@code EARLY} if the system cannot be sure it has seen all data which may contribute
-   * to the pane's window.
-   * <li>{@code ON_TIME} if the system predicts it has seen all the data which may contribute
-   * to the pane's window.
-   * <li>{@code LATE} if the system has encountered new data after predicting no more could arrive.
-   * It is possible an {@code ON_TIME} pane has already been emitted, in which case any
-   * following panes are considered {@code LATE}.
-   * </ol>
-   *
-   * <p>Only an
-   * {@link AfterWatermark#pastEndOfWindow} trigger may produce an {@code ON_TIME} pane.
-   * With merging {@link WindowFn}'s, windows may be merged to produce new windows that satisfy
-   * their own instance of the above regular expression. The only guarantee is that once a window
-   * produces a final pane, it will not be merged into any new windows.
-   *
-   * <p>The predictions above are made using the mechanism of watermarks.
-   * See {@link com.google.cloud.dataflow.sdk.util.TimerInternals} for more information
-   * about watermarks.
-   *
-   * <p>We can state some properties of {@code LATE} and {@code ON_TIME} panes, but first need some
-   * definitions:
-   * <ol>
-   * <li>We'll call a pipeline 'simple' if it does not use
-   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#outputWithTimestamp} in
-   * any {@code DoFn}, and it uses the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window.Bound#withAllowedLateness}
-   * argument value on all windows (or uses the default of {@link org.joda.time.Duration#ZERO}).
-   * <li>We'll call an element 'locally late', from the point of view of a computation on a
-   * worker, if the element's timestamp is before the input watermark for that computation
-   * on that worker. The element is otherwise 'locally on-time'.
-   * <li>We'll say 'the pane's timestamp' to mean the timestamp of the element produced to
-   * represent the pane's contents.
-   * </ol>
-   *
-   * <p>Then in simple pipelines:
-   * <ol>
-   * <li> (Soundness) An {@code ON_TIME} pane can never cause a later computation to generate a
-   * {@code LATE} pane. (If it did, it would imply a later computation's input watermark progressed
-   * ahead of an earlier stage's output watermark, which by design is not possible.)
-   * <li> (Liveness) An {@code ON_TIME} pane is emitted as soon as possible after the input
-   * watermark passes the end of the pane's window.
-   * <li> (Consistency) A pane with only locally on-time elements will always be {@code ON_TIME}.
-   * And a {@code LATE} pane cannot contain locally on-time elements.
-   * </ol>
-   *
-   * However, note that:
-   * <ol>
-   * <li> An {@code ON_TIME} pane may contain locally late elements. It may even contain only
-   * locally late elements. Provided a locally late element finds its way into an {@code ON_TIME}
-   * pane its lateness becomes unobservable.
-   * <li> A {@code LATE} pane does not necessarily cause any following computation panes to be
-   * marked as {@code LATE}.
-   * </ol>
-   */
-  public enum Timing {
-    /**
-     * Pane was fired before the input watermark had progressed after the end of the window.
-     */
-    EARLY,
-    /**
-     * Pane was fired by a {@link AfterWatermark#pastEndOfWindow} trigger because the input
-     * watermark progressed after the end of the window. However the output watermark has not
-     * yet progressed after the end of the window. Thus it is still possible to assign a timestamp
-     * to the element representing this pane which cannot be considered locally late by any
-     * following computation.
-     */
-    ON_TIME,
-    /**
-     * Pane was fired after the output watermark had progressed past the end of the window.
-     */
-    LATE,
-    /**
-     * This element was not produced in a triggered pane and its relation to input and
-     * output watermarks is unknown.
-     */
-    UNKNOWN;
-
-    // NOTE: Do not add fields or re-order them. The ordinal is used as part of
-    // the encoding.
-  }
-
-  private static byte encodedByte(boolean isFirst, boolean isLast, Timing timing) {
-    byte result = 0x0;
-    if (isFirst) {
-      result |= 1;
-    }
-    if (isLast) {
-      result |= 2;
-    }
-    result |= timing.ordinal() << 2;
-    return result;
-  }
-
-  private static final ImmutableMap<Byte, PaneInfo> BYTE_TO_PANE_INFO;
-  static {
-    ImmutableMap.Builder<Byte, PaneInfo> decodingBuilder = ImmutableMap.builder();
-    for (Timing timing : Timing.values()) {
-      long onTimeIndex = timing == Timing.EARLY ? -1 : 0;
-      register(decodingBuilder, new PaneInfo(true, true, timing, 0, onTimeIndex));
-      register(decodingBuilder, new PaneInfo(true, false, timing, 0, onTimeIndex));
-      register(decodingBuilder, new PaneInfo(false, true, timing, -1, onTimeIndex));
-      register(decodingBuilder, new PaneInfo(false, false, timing, -1, onTimeIndex));
-    }
-    BYTE_TO_PANE_INFO = decodingBuilder.build();
-  }
-
-  private static void register(ImmutableMap.Builder<Byte, PaneInfo> builder, PaneInfo info) {
-    builder.put(info.encodedByte, info);
-  }
-
-  private final byte encodedByte;
-
-  private final boolean isFirst;
-  private final boolean isLast;
-  private final Timing timing;
-  private final long index;
-  private final long nonSpeculativeIndex;
-
-  /**
-   * {@code PaneInfo} to use for elements on (and before) initial window assignemnt (including
-   * elements read from sources) before they have passed through a {@link GroupByKey} and are
-   * associated with a particular trigger firing.
-   */
-  public static final PaneInfo NO_FIRING =
-      PaneInfo.createPane(true, true, Timing.UNKNOWN, 0, 0);
-
-  /**
-   * {@code PaneInfo} to use when there will be exactly one firing and it is on time.
-   */
-  public static final PaneInfo ON_TIME_AND_ONLY_FIRING =
-      PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0);
-
-  private PaneInfo(boolean isFirst, boolean isLast, Timing timing, long index, long onTimeIndex) {
-    this.encodedByte = encodedByte(isFirst, isLast, timing);
-    this.isFirst = isFirst;
-    this.isLast = isLast;
-    this.timing = timing;
-    this.index = index;
-    this.nonSpeculativeIndex = onTimeIndex;
-  }
-
-  public static PaneInfo createPane(boolean isFirst, boolean isLast, Timing timing) {
-    Preconditions.checkArgument(isFirst, "Indices must be provided for non-first pane info.");
-    return createPane(isFirst, isLast, timing, 0, timing == Timing.EARLY ? -1 : 0);
-  }
-
-  /**
-   * Factory method to create a {@link PaneInfo} with the specified parameters.
-   */
-  public static PaneInfo createPane(
-      boolean isFirst, boolean isLast, Timing timing, long index, long onTimeIndex) {
-    if (isFirst || timing == Timing.UNKNOWN) {
-      return Preconditions.checkNotNull(
-          BYTE_TO_PANE_INFO.get(encodedByte(isFirst, isLast, timing)));
-    } else {
-      return new PaneInfo(isFirst, isLast, timing, index, onTimeIndex);
-    }
-  }
-
-  public static PaneInfo decodePane(byte encodedPane) {
-    return Preconditions.checkNotNull(BYTE_TO_PANE_INFO.get(encodedPane));
-  }
-
-  /**
-   * Return true if there is no timing information for the current {@link PaneInfo}.
-   * This typically indicates that the current element has not been assigned to
-   * windows or passed through an operation that executes triggers yet.
-   */
-  public boolean isUnknown() {
-    return Timing.UNKNOWN.equals(timing);
-  }
-
-  /**
-   * Return true if this is the first pane produced for the associated window.
-   */
-  public boolean isFirst() {
-    return isFirst;
-  }
-
-  /**
-   * Return true if this is the last pane that will be produced in the associated window.
-   */
-  public boolean isLast() {
-    return isLast;
-  }
-
-  /**
-   * Return true if this is the last pane that will be produced in the associated window.
-   */
-  public Timing getTiming() {
-    return timing;
-  }
-
-  /**
-   * The zero-based index of this trigger firing that produced this pane.
-   *
-   * <p>This will return 0 for the first time the timer fires, 1 for the next time, etc.
-   *
-   * <p>A given (key, window, pane-index) is guaranteed to be unique in the
-   * output of a group-by-key operation.
-   */
-  public long getIndex() {
-    return index;
-  }
-
-  /**
-   * The zero-based index of this trigger firing among non-speculative panes.
-   *
-   * <p> This will return 0 for the first non-{@link Timing#EARLY} timer firing, 1 for the next one,
-   * etc.
-   *
-   * <p>Always -1 for speculative data.
-   */
-  public long getNonSpeculativeIndex() {
-    return nonSpeculativeIndex;
-  }
-
-  int getEncodedByte() {
-    return encodedByte;
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(encodedByte, index, nonSpeculativeIndex);
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      // Simple PaneInfos are interned.
-      return true;
-    } else if (obj instanceof PaneInfo) {
-      PaneInfo that = (PaneInfo) obj;
-      return this.encodedByte == that.encodedByte
-          && this.index == that.index
-          && this.nonSpeculativeIndex == that.nonSpeculativeIndex;
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(getClass())
-        .omitNullValues()
-        .add("isFirst", isFirst ? true : null)
-        .add("isLast", isLast ? true : null)
-        .add("timing", timing)
-        .add("index", index)
-        .add("onTimeIndex", nonSpeculativeIndex != -1 ? nonSpeculativeIndex : null)
-        .toString();
-  }
-
-  /**
-   * A Coder for encoding PaneInfo instances.
-   */
-  public static class PaneInfoCoder extends AtomicCoder<PaneInfo> {
-    private static enum Encoding {
-      FIRST,
-      ONE_INDEX,
-      TWO_INDICES;
-
-      // NOTE: Do not reorder fields. The ordinal is used as part of
-      // the encoding.
-
-      public final byte tag;
-
-      private Encoding() {
-        assert ordinal() < 16;
-        tag = (byte) (ordinal() << 4);
-      }
-
-      public static Encoding fromTag(byte b) {
-        return Encoding.values()[b >> 4];
-      }
-    }
-
-    private Encoding chooseEncoding(PaneInfo value) {
-      if (value.index == 0 && value.nonSpeculativeIndex == 0 || value.timing == Timing.UNKNOWN) {
-        return Encoding.FIRST;
-      } else if (value.index == value.nonSpeculativeIndex || value.timing == Timing.EARLY) {
-        return Encoding.ONE_INDEX;
-      } else {
-        return Encoding.TWO_INDICES;
-      }
-    }
-
-    public static final PaneInfoCoder INSTANCE = new PaneInfoCoder();
-
-    @Override
-    public void encode(PaneInfo value, final OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      Encoding encoding = chooseEncoding(value);
-      switch (chooseEncoding(value)) {
-        case FIRST:
-          outStream.write(value.encodedByte);
-          break;
-        case ONE_INDEX:
-          outStream.write(value.encodedByte | encoding.tag);
-          VarInt.encode(value.index, outStream);
-          break;
-        case TWO_INDICES:
-          outStream.write(value.encodedByte | encoding.tag);
-          VarInt.encode(value.index, outStream);
-          VarInt.encode(value.nonSpeculativeIndex, outStream);
-          break;
-        default:
-          throw new CoderException("Unknown encoding " + encoding);
-      }
-    }
-
-    @Override
-    public PaneInfo decode(final InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      byte keyAndTag = (byte) inStream.read();
-      PaneInfo base = BYTE_TO_PANE_INFO.get((byte) (keyAndTag & 0x0F));
-      long index, onTimeIndex;
-      switch (Encoding.fromTag(keyAndTag)) {
-        case FIRST:
-          return base;
-        case ONE_INDEX:
-          index = VarInt.decodeLong(inStream);
-          onTimeIndex = base.timing == Timing.EARLY ? -1 : index;
-          break;
-        case TWO_INDICES:
-          index = VarInt.decodeLong(inStream);
-          onTimeIndex = VarInt.decodeLong(inStream);
-          break;
-        default:
-          throw new CoderException("Unknown encoding " + (keyAndTag & 0xF0));
-      }
-      return new PaneInfo(base.isFirst, base.isLast, base.timing, index, onTimeIndex);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
deleted file mode 100644
index bea0285..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/PartitioningWindowFn.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import org.joda.time.Instant;
-
-import java.util.Arrays;
-import java.util.Collection;
-
-/**
- * A {@link WindowFn} that places each value into exactly one window based on its timestamp and
- * never merges windows.
- *
- * @param <T> type of elements being windowed
- * @param <W> window type
- */
-public abstract class PartitioningWindowFn<T, W extends BoundedWindow>
-    extends NonMergingWindowFn<T, W> {
-  /**
-   * Returns the single window to which elements with this timestamp belong.
-   */
-  public abstract W assignWindow(Instant timestamp);
-
-  @Override
-  public final Collection<W> assignWindows(AssignContext c) {
-    return Arrays.asList(assignWindow(c.timestamp()));
-  }
-
-  @Override
-  public W getSideInputWindow(final BoundedWindow window) {
-    if (window instanceof GlobalWindow) {
-      throw new IllegalArgumentException(
-          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
-    }
-    return assignWindow(window.maxTimestamp());
-  }
-
-  @Override
-  public boolean assignsToSingleWindow() {
-    return true;
-  }
-
-  @Override
-  public Instant getOutputTime(Instant inputTimestamp, W window) {
-    return inputTimestamp;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
deleted file mode 100644
index e77e2a1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Repeatedly.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-
-import org.joda.time.Instant;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Repeat a trigger, either until some condition is met or forever.
- *
- * <p>For example, to fire after the end of the window, and every time late data arrives:
- * <pre> {@code
- *     Repeatedly.forever(AfterWatermark.isPastEndOfWindow());
- * } </pre>
- *
- * <p>{@code Repeatedly.forever(someTrigger)} behaves like an infinite
- * {@code AfterEach.inOrder(someTrigger, someTrigger, someTrigger, ...)}.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code Trigger}
- */
-public class Repeatedly<W extends BoundedWindow> extends Trigger<W> {
-
-  private static final int REPEATED = 0;
-
-  /**
-   * Create a composite trigger that repeatedly executes the trigger {@code toRepeat}, firing each
-   * time it fires and ignoring any indications to finish.
-   *
-   * <p>Unless used with {@link Trigger#orFinally} the composite trigger will never finish.
-   *
-   * @param repeated the trigger to execute repeatedly.
-   */
-  public static <W extends BoundedWindow> Repeatedly<W> forever(Trigger<W> repeated) {
-    return new Repeatedly<W>(repeated);
-  }
-
-  private Repeatedly(Trigger<W> repeated) {
-    super(Arrays.asList(repeated));
-  }
-
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    getRepeated(c).invokeOnElement(c);
-  }
-
-  @Override
-  public void onMerge(OnMergeContext c) throws Exception {
-    getRepeated(c).invokeOnMerge(c);
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    // This trigger fires once the repeated trigger fires.
-    return subTriggers.get(REPEATED).getWatermarkThatGuaranteesFiring(window);
-  }
-
-  @Override
-  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return new Repeatedly<W>(continuationTriggers.get(REPEATED));
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    return getRepeated(context).invokeShouldFire(context);
-  }
-
-  @Override
-  public void onFire(TriggerContext context) throws Exception {
-    getRepeated(context).invokeOnFire(context);
-
-    if (context.trigger().isFinished(REPEATED)) {
-      context.trigger().setFinished(false, REPEATED);
-      getRepeated(context).invokeClear(context);
-    }
-  }
-
-  private ExecutableTrigger<W> getRepeated(TriggerContext context) {
-    return context.trigger().subTrigger(REPEATED);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
deleted file mode 100644
index da137c1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Sessions.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-import org.joda.time.Duration;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Objects;
-
-/**
- * A {@link WindowFn} windowing values into sessions separated by {@link #gapDuration}-long
- * periods with no elements.
- *
- * <p>For example, in order to window data into session with at least 10 minute
- * gaps in between them:
- * <pre> {@code
- * PCollection<Integer> pc = ...;
- * PCollection<Integer> windowed_pc = pc.apply(
- *   Window.<Integer>into(Sessions.withGapDuration(Duration.standardMinutes(10))));
- * } </pre>
- */
-public class Sessions extends WindowFn<Object, IntervalWindow> {
-  /**
-   * Duration of the gaps between sessions.
-   */
-  private final Duration gapDuration;
-
-  /**
-   * Creates a {@code Sessions} {@link WindowFn} with the specified gap duration.
-   */
-  public static Sessions withGapDuration(Duration gapDuration) {
-    return new Sessions(gapDuration);
-  }
-
-  /**
-   * Creates a {@code Sessions} {@link WindowFn} with the specified gap duration.
-   */
-  private Sessions(Duration gapDuration) {
-    this.gapDuration = gapDuration;
-  }
-
-  @Override
-  public Collection<IntervalWindow> assignWindows(AssignContext c) {
-    // Assign each element into a window from its timestamp until gapDuration in the
-    // future.  Overlapping windows (representing elements within gapDuration of
-    // each other) will be merged.
-    return Arrays.asList(new IntervalWindow(c.timestamp(), gapDuration));
-  }
-
-  @Override
-  public void mergeWindows(MergeContext c) throws Exception {
-    MergeOverlappingIntervalWindows.mergeWindows(c);
-  }
-
-  @Override
-  public Coder<IntervalWindow> windowCoder() {
-    return IntervalWindow.getCoder();
-  }
-
-  @Override
-  public boolean isCompatible(WindowFn<?, ?> other) {
-    return other instanceof Sessions;
-  }
-
-  @Override
-  public IntervalWindow getSideInputWindow(BoundedWindow window) {
-    throw new UnsupportedOperationException("Sessions is not allowed in side inputs");
-  }
-
-  @Experimental(Kind.OUTPUT_TIME)
-  @Override
-  public OutputTimeFn<? super IntervalWindow> getOutputTimeFn() {
-    return OutputTimeFns.outputAtEarliestInputTimestamp();
-  }
-
-  public Duration getGapDuration() {
-    return gapDuration;
-  }
-
-  @Override
-  public boolean equals(Object object) {
-    if (!(object instanceof Sessions)) {
-      return false;
-    }
-    Sessions other = (Sessions) object;
-    return getGapDuration().equals(other.getGapDuration());
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(gapDuration);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
deleted file mode 100644
index b0066d6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/SlidingWindows.java
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * A {@link WindowFn} that windows values into possibly overlapping fixed-size
- * timestamp-based windows.
- *
- * <p>For example, in order to window data into 10 minute windows that
- * update every minute:
- * <pre> {@code
- * PCollection<Integer> items = ...;
- * PCollection<Integer> windowedItems = items.apply(
- *   Window.<Integer>into(SlidingWindows.of(Duration.standardMinutes(10))));
- * } </pre>
- */
-public class SlidingWindows extends NonMergingWindowFn<Object, IntervalWindow> {
-
-  /**
-   * Amount of time between generated windows.
-   */
-  private final Duration period;
-
-  /**
-   * Size of the generated windows.
-   */
-  private final Duration size;
-
-  /**
-   * Offset of the generated windows.
-   * Windows start at time N * start + offset, where 0 is the epoch.
-   */
-  private final Duration offset;
-
-  /**
-   * Assigns timestamps into half-open intervals of the form
-   * [N * period, N * period + size), where 0 is the epoch.
-   *
-   * <p>If {@link SlidingWindows#every} is not called, the period defaults
-   * to the largest time unit smaller than the given duration.  For example,
-   * specifying a size of 5 seconds will result in a default period of 1 second.
-   */
-  public static SlidingWindows of(Duration size) {
-    return new SlidingWindows(getDefaultPeriod(size), size, Duration.ZERO);
-  }
-
-  /**
-   * Returns a new {@code SlidingWindows} with the original size, that assigns
-   * timestamps into half-open intervals of the form
-   * [N * period, N * period + size), where 0 is the epoch.
-   */
-  public SlidingWindows every(Duration period) {
-    return new SlidingWindows(period, size, offset);
-  }
-
-  /**
-   * Assigns timestamps into half-open intervals of the form
-   * [N * period + offset, N * period + offset + size).
-   *
-   * @throws IllegalArgumentException if offset is not in [0, period)
-   */
-  public SlidingWindows withOffset(Duration offset) {
-    return new SlidingWindows(period, size, offset);
-  }
-
-  private SlidingWindows(Duration period, Duration size, Duration offset) {
-    if (offset.isShorterThan(Duration.ZERO)
-        || !offset.isShorterThan(period)
-        || !size.isLongerThan(Duration.ZERO)) {
-      throw new IllegalArgumentException(
-          "SlidingWindows WindowingStrategies must have 0 <= offset < period and 0 < size");
-    }
-    this.period = period;
-    this.size = size;
-    this.offset = offset;
-  }
-
-  @Override
-  public Coder<IntervalWindow> windowCoder() {
-    return IntervalWindow.getCoder();
-  }
-
-  @Override
-  public Collection<IntervalWindow> assignWindows(AssignContext c) {
-    List<IntervalWindow> windows =
-        new ArrayList<>((int) (size.getMillis() / period.getMillis()));
-    Instant timestamp = c.timestamp();
-    long lastStart = lastStartFor(timestamp);
-    for (long start = lastStart;
-         start > timestamp.minus(size).getMillis();
-         start -= period.getMillis()) {
-      windows.add(new IntervalWindow(new Instant(start), size));
-    }
-    return windows;
-  }
-
-  /**
-   * Return the earliest window that contains the end of the main-input window.
-   */
-  @Override
-  public IntervalWindow getSideInputWindow(final BoundedWindow window) {
-    if (window instanceof GlobalWindow) {
-      throw new IllegalArgumentException(
-          "Attempted to get side input window for GlobalWindow from non-global WindowFn");
-    }
-    long lastStart = lastStartFor(window.maxTimestamp().minus(size));
-    return new IntervalWindow(new Instant(lastStart + period.getMillis()), size);
-  }
-
-  @Override
-  public boolean isCompatible(WindowFn<?, ?> other) {
-    return equals(other);
-  }
-
-  /**
-   * Return the last start of a sliding window that contains the timestamp.
-   */
-  private long lastStartFor(Instant timestamp) {
-    return timestamp.getMillis()
-        - timestamp.plus(period).minus(offset).getMillis() % period.getMillis();
-  }
-
-  static Duration getDefaultPeriod(Duration size) {
-    if (size.isLongerThan(Duration.standardHours(1))) {
-      return Duration.standardHours(1);
-    }
-    if (size.isLongerThan(Duration.standardMinutes(1))) {
-      return Duration.standardMinutes(1);
-    }
-    if (size.isLongerThan(Duration.standardSeconds(1))) {
-      return Duration.standardSeconds(1);
-    }
-    return Duration.millis(1);
-  }
-
-  public Duration getPeriod() {
-    return period;
-  }
-
-  public Duration getSize() {
-    return size;
-  }
-
-  public Duration getOffset() {
-    return offset;
-  }
-
-  /**
-   * Ensures that later sliding windows have an output time that is past the end of earlier windows.
-   *
-   * <p>If this is the earliest sliding window containing {@code inputTimestamp}, that's fine.
-   * Otherwise, we pick the earliest time that doesn't overlap with earlier windows.
-   */
-  @Experimental(Kind.OUTPUT_TIME)
-  @Override
-  public OutputTimeFn<? super IntervalWindow> getOutputTimeFn() {
-    return new OutputTimeFn.Defaults<BoundedWindow>() {
-      @Override
-      public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) {
-        Instant startOfLastSegment = window.maxTimestamp().minus(period);
-        return startOfLastSegment.isBefore(inputTimestamp)
-            ? inputTimestamp
-                : startOfLastSegment.plus(1);
-      }
-
-      @Override
-      public boolean dependsOnlyOnEarliestInputTimestamp() {
-        return true;
-      }
-    };
-  }
-
-  @Override
-  public boolean equals(Object object) {
-    if (!(object instanceof SlidingWindows)) {
-      return false;
-    }
-    SlidingWindows other = (SlidingWindows) object;
-    return getOffset().equals(other.getOffset())
-        && getSize().equals(other.getSize())
-        && getPeriod().equals(other.getPeriod());
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(size, offset, period);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
deleted file mode 100644
index 4471563..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Trigger.java
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.common.base.Joiner;
-
-import org.joda.time.Instant;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * {@code Trigger}s control when the elements for a specific key and window are output. As elements
- * arrive, they are put into one or more windows by a {@link Window} transform and its associated
- * {@link WindowFn}, and then passed to the associated {@code Trigger} to determine if the
- * {@code Window}s contents should be output.
- *
- * <p>See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} and {@link Window}
- * for more information about how grouping with windows works.
- *
- * <p>The elements that are assigned to a window since the last time it was fired (or since the
- * window was created) are placed into the current window pane. Triggers are evaluated against the
- * elements as they are added. When the root trigger fires, the elements in the current pane will be
- * output. When the root trigger finishes (indicating it will never fire again), the window is
- * closed and any new elements assigned to that window are discarded.
- *
- * <p>Several predefined {@code Trigger}s are provided:
- * <ul>
- *   <li> {@link AfterWatermark} for firing when the watermark passes a timestamp determined from
- *   either the end of the window or the arrival of the first element in a pane.
- *   <li> {@link AfterProcessingTime} for firing after some amount of processing time has elapsed
- *   (typically since the first element in a pane).
- *   <li> {@link AfterPane} for firing off a property of the elements in the current pane, such as
- *   the number of elements that have been assigned to the current pane.
- * </ul>
- *
- * <p>In addition, {@code Trigger}s can be combined in a variety of ways:
- * <ul>
- *   <li> {@link Repeatedly#forever} to create a trigger that executes forever. Any time its
- *   argument finishes it gets reset and starts over. Can be combined with
- *   {@link Trigger#orFinally} to specify a condition that causes the repetition to stop.
- *   <li> {@link AfterEach#inOrder} to execute each trigger in sequence, firing each (and every)
- *   time that a trigger fires, and advancing to the next trigger in the sequence when it finishes.
- *   <li> {@link AfterFirst#of} to create a trigger that fires after at least one of its arguments
- *   fires. An {@link AfterFirst} trigger finishes after it fires once.
- *   <li> {@link AfterAll#of} to create a trigger that fires after all least one of its arguments
- *   have fired at least once. An {@link AfterAll} trigger finishes after it fires once.
- * </ul>
- *
- * <p>Each trigger tree is instantiated per-key and per-window. Every trigger in the tree is in one
- * of the following states:
- * <ul>
- *   <li> Never Existed - before the trigger has started executing, there is no state associated
- *   with it anywhere in the system. A trigger moves to the executing state as soon as it
- *   processes in the current pane.
- *   <li> Executing - while the trigger is receiving items and may fire. While it is in this state,
- *   it may persist book-keeping information to persisted state, set timers, etc.
- *   <li> Finished - after a trigger finishes, all of its book-keeping data is cleaned up, and the
- *   system remembers only that it is finished. Entering this state causes us to discard any
- *   elements in the buffer for that window, as well.
- * </ul>
- *
- * <p>Once finished, a trigger cannot return itself back to an earlier state, however a composite
- * trigger could reset its sub-triggers.
- *
- * <p>Triggers should not build up any state internally since they may be recreated
- * between invocations of the callbacks. All important values should be persisted using
- * state before the callback returns.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code Trigger}
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public abstract class Trigger<W extends BoundedWindow> implements Serializable, TriggerBuilder<W> {
-
-  /**
-   * Interface for accessing information about the trigger being executed and other triggers in the
-   * same tree.
-   */
-  public interface TriggerInfo<W extends BoundedWindow> {
-
-    /**
-     * Returns true if the windowing strategy of the current {@code PCollection} is a merging
-     * WindowFn. If true, the trigger execution needs to keep enough information to support the
-     * possibility of {@link Trigger#onMerge} being called. If false, {@link Trigger#onMerge} will
-     * never be called.
-     */
-    boolean isMerging();
-
-    /**
-     * Access the executable versions of the sub-triggers of the current trigger.
-     */
-    Iterable<ExecutableTrigger<W>> subTriggers();
-
-    /**
-     * Access the executable version of the specified sub-trigger.
-     */
-    ExecutableTrigger<W> subTrigger(int subtriggerIndex);
-
-    /**
-     * Returns true if the current trigger is marked finished.
-     */
-    boolean isFinished();
-
-    /**
-     * Return true if the given subtrigger is marked finished.
-     */
-    boolean isFinished(int subtriggerIndex);
-
-    /**
-     * Returns true if all the sub-triggers of the current trigger are marked finished.
-     */
-    boolean areAllSubtriggersFinished();
-
-    /**
-     * Returns an iterable over the unfinished sub-triggers of the current trigger.
-     */
-    Iterable<ExecutableTrigger<W>> unfinishedSubTriggers();
-
-    /**
-     * Returns the first unfinished sub-trigger.
-     */
-    ExecutableTrigger<W> firstUnfinishedSubTrigger();
-
-    /**
-     * Clears all keyed state for triggers in the current sub-tree and unsets all the associated
-     * finished bits.
-     */
-    void resetTree() throws Exception;
-
-    /**
-     * Sets the finished bit for the current trigger.
-     */
-    void setFinished(boolean finished);
-
-    /**
-     * Sets the finished bit for the given sub-trigger.
-     */
-    void setFinished(boolean finished, int subTriggerIndex);
-  }
-
-  /**
-   * Interact with properties of the trigger being executed, with extensions to deal with the
-   * merging windows.
-   */
-  public interface MergingTriggerInfo<W extends BoundedWindow> extends TriggerInfo<W> {
-
-    /** Return true if the trigger is finished in any window being merged. */
-    public abstract boolean finishedInAnyMergingWindow();
-
-    /** Return true if the trigger is finished in all windows being merged. */
-    public abstract boolean finishedInAllMergingWindows();
-
-    /** Return the merging windows in which the trigger is finished. */
-    public abstract Iterable<W> getFinishedMergingWindows();
-  }
-
-  /**
-   * Information accessible to all operational hooks in this {@code Trigger}.
-   *
-   * <p>Used directly in {@link Trigger#shouldFire} and {@link Trigger#clear}, and
-   * extended with additional information in other methods.
-   */
-  public abstract class TriggerContext {
-
-    /** Returns the interface for accessing trigger info. */
-    public abstract TriggerInfo<W> trigger();
-
-    /** Returns the interface for accessing persistent state. */
-    public abstract StateAccessor<?> state();
-
-    /** The window that the current context is executing in. */
-    public abstract W window();
-
-    /** Create a sub-context for the given sub-trigger. */
-    public abstract TriggerContext forTrigger(ExecutableTrigger<W> trigger);
-
-    /**
-     * Removes the timer set in this trigger context for the given {@link Instant}
-     * and {@link TimeDomain}.
-     */
-    public abstract void deleteTimer(Instant timestamp, TimeDomain domain);
-
-    /** The current processing time. */
-    public abstract Instant currentProcessingTime();
-
-    /** The current synchronized upstream processing time or {@code null} if unknown. */
-    @Nullable
-    public abstract Instant currentSynchronizedProcessingTime();
-
-    /** The current event time for the input or {@code null} if unknown. */
-    @Nullable
-    public abstract Instant currentEventTime();
-  }
-
-  /**
-   * Extended {@link TriggerContext} containing information accessible to the {@link #onElement}
-   * operational hook.
-   */
-  public abstract class OnElementContext extends TriggerContext {
-    /** The event timestamp of the element currently being processed. */
-    public abstract Instant eventTimestamp();
-
-    /**
-     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
-     * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards.
-     *
-     * <p>As with {@link #state}, timers are implicitly scoped to the current window. All
-     * timer firings for a window will be received, but the implementation should choose to ignore
-     * those that are not applicable.
-     *
-     * @param timestamp the time at which the trigger should be re-evaluated
-     * @param domain the domain that the {@code timestamp} applies to
-     */
-    public abstract void setTimer(Instant timestamp, TimeDomain domain);
-
-    /** Create an {@code OnElementContext} for executing the given trigger. */
-    @Override
-    public abstract OnElementContext forTrigger(ExecutableTrigger<W> trigger);
-  }
-
-  /**
-   * Extended {@link TriggerContext} containing information accessible to the {@link #onMerge}
-   * operational hook.
-   */
-  public abstract class OnMergeContext extends TriggerContext {
-    /**
-     * Sets a timer to fire when the watermark or processing time is beyond the given timestamp.
-     * Timers are not guaranteed to fire immediately, but will be delivered at some time afterwards.
-     *
-     * <p>As with {@link #state}, timers are implicitly scoped to the current window. All
-     * timer firings for a window will be received, but the implementation should choose to ignore
-     * those that are not applicable.
-     *
-     * @param timestamp the time at which the trigger should be re-evaluated
-     * @param domain the domain that the {@code timestamp} applies to
-     */
-    public abstract void setTimer(Instant timestamp, TimeDomain domain);
-
-    /** Create an {@code OnMergeContext} for executing the given trigger. */
-    @Override
-    public abstract OnMergeContext forTrigger(ExecutableTrigger<W> trigger);
-
-    @Override
-    public abstract MergingStateAccessor<?, W> state();
-
-    @Override
-    public abstract MergingTriggerInfo<W> trigger();
-  }
-
-  @Nullable
-  protected final List<Trigger<W>> subTriggers;
-
-  protected Trigger(@Nullable List<Trigger<W>> subTriggers) {
-    this.subTriggers = subTriggers;
-  }
-
-
-  /**
-   * Called immediately after an element is first incorporated into a window.
-   */
-  public abstract void onElement(OnElementContext c) throws Exception;
-
-  /**
-   * Called immediately after windows have been merged.
-   *
-   * <p>Leaf triggers should update their state by inspecting their status and any state
-   * in the merging windows. Composite triggers should update their state by calling
-   * {@link ExecutableTrigger#invokeOnMerge} on their sub-triggers, and applying appropriate logic.
-   *
-   * <p>A trigger such as {@link AfterWatermark#pastEndOfWindow} may no longer be finished;
-   * it is the responsibility of the trigger itself to record this fact. It is forbidden for
-   * a trigger to become finished due to {@link #onMerge}, as it has not yet fired the pending
-   * elements that led to it being ready to fire.
-   *
-   * <p>The implementation does not need to clear out any state associated with the old windows.
-   */
-  public abstract void onMerge(OnMergeContext c) throws Exception;
-
-  /**
-   * Returns {@code true} if the current state of the trigger indicates that its condition
-   * is satisfied and it is ready to fire.
-   */
-  public abstract boolean shouldFire(TriggerContext context) throws Exception;
-
-  /**
-   * Adjusts the state of the trigger to be ready for the next pane. For example, a
-   * {@link Repeatedly} trigger will reset its inner trigger, since it has fired.
-   *
-   * <p>If the trigger is finished, it is the responsibility of the trigger itself to
-   * record that fact via the {@code context}.
-   */
-  public abstract void onFire(TriggerContext context) throws Exception;
-
-  /**
-   * Called to allow the trigger to prefetch any state it will likely need to read from during
-   * an {@link #onElement} call.
-   */
-  public void prefetchOnElement(StateAccessor<?> state) {
-    if (subTriggers != null) {
-      for (Trigger<W> trigger : subTriggers) {
-        trigger.prefetchOnElement(state);
-      }
-    }
-  }
-
-  /**
-   * Called to allow the trigger to prefetch any state it will likely need to read from during
-   * an {@link #onMerge} call.
-   */
-  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
-    if (subTriggers != null) {
-      for (Trigger<W> trigger : subTriggers) {
-        trigger.prefetchOnMerge(state);
-      }
-    }
-  }
-
-  /**
-   * Called to allow the trigger to prefetch any state it will likely need to read from during
-   * an {@link #shouldFire} call.
-   */
-  public void prefetchShouldFire(StateAccessor<?> state) {
-    if (subTriggers != null) {
-      for (Trigger<W> trigger : subTriggers) {
-        trigger.prefetchShouldFire(state);
-      }
-    }
-  }
-
-  /**
-   * Called to allow the trigger to prefetch any state it will likely need to read from during
-   * an {@link #onFire} call.
-   */
-  public void prefetchOnFire(StateAccessor<?> state) {
-    if (subTriggers != null) {
-      for (Trigger<W> trigger : subTriggers) {
-        trigger.prefetchOnFire(state);
-      }
-    }
-  }
-
-  /**
-   * Clear any state associated with this trigger in the given window.
-   *
-   * <p>This is called after a trigger has indicated it will never fire again. The trigger system
-   * keeps enough information to know that the trigger is finished, so this trigger should clear all
-   * of its state.
-   */
-  public void clear(TriggerContext c) throws Exception {
-    if (subTriggers != null) {
-      for (ExecutableTrigger<W> trigger : c.trigger().subTriggers()) {
-        trigger.invokeClear(c);
-      }
-    }
-  }
-
-  public Iterable<Trigger<W>> subTriggers() {
-    return subTriggers;
-  }
-
-  /**
-   * Return a trigger to use after a {@code GroupByKey} to preserve the
-   * intention of this trigger. Specifically, triggers that are time based
-   * and intended to provide speculative results should continue providing
-   * speculative results. Triggers that fire once (or multiple times) should
-   * continue firing once (or multiple times).
-   */
-  public Trigger<W> getContinuationTrigger() {
-    if (subTriggers == null) {
-      return getContinuationTrigger(null);
-    }
-
-    List<Trigger<W>> subTriggerContinuations = new ArrayList<>();
-    for (Trigger<W> subTrigger : subTriggers) {
-      subTriggerContinuations.add(subTrigger.getContinuationTrigger());
-    }
-    return getContinuationTrigger(subTriggerContinuations);
-  }
-
-  /**
-   * Return the {@link #getContinuationTrigger} of this {@code Trigger}. For convenience, this
-   * is provided the continuation trigger of each of the sub-triggers.
-   */
-  protected abstract Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers);
-
-  /**
-   * Returns a bound in watermark time by which this trigger would have fired at least once
-   * for a given window had there been input data.  This is a static property of a trigger
-   * that does not depend on its state.
-   *
-   * <p>For triggers that do not fire based on the watermark advancing, returns
-   * {@link BoundedWindow#TIMESTAMP_MAX_VALUE}.
-   *
-   * <p>This estimate is used to determine that there are no elements in a side-input window, which
-   * causes the default value to be used instead.
-   */
-  public abstract Instant getWatermarkThatGuaranteesFiring(W window);
-
-  /**
-   * Returns whether this performs the same triggering as the given {@code Trigger}.
-   */
-  public boolean isCompatible(Trigger<?> other) {
-    if (!getClass().equals(other.getClass())) {
-      return false;
-    }
-
-    if (subTriggers == null) {
-      return other.subTriggers == null;
-    } else if (other.subTriggers == null) {
-      return false;
-    } else if (subTriggers.size() != other.subTriggers.size()) {
-      return false;
-    }
-
-    for (int i = 0; i < subTriggers.size(); i++) {
-      if (!subTriggers.get(i).isCompatible(other.subTriggers.get(i))) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  @Override
-  public String toString() {
-    String simpleName = getClass().getSimpleName();
-    if (getClass().getEnclosingClass() != null) {
-      simpleName = getClass().getEnclosingClass().getSimpleName() + "." + simpleName;
-    }
-    if (subTriggers == null || subTriggers.size() == 0) {
-      return simpleName;
-    } else {
-      return simpleName + "(" + Joiner.on(", ").join(subTriggers) + ")";
-    }
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (!(obj instanceof Trigger)) {
-      return false;
-    }
-    @SuppressWarnings("unchecked")
-    Trigger<W> that = (Trigger<W>) obj;
-    return Objects.equals(getClass(), that.getClass())
-        && Objects.equals(subTriggers, that.subTriggers);
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(getClass(), subTriggers);
-  }
-
-  /**
-   * Specify an ending condition for this trigger. If the {@code until} fires then the combination
-   * fires.
-   *
-   * <p>The expression {@code t1.orFinally(t2)} fires every time {@code t1} fires, and finishes
-   * as soon as either {@code t1} finishes or {@code t2} fires, in which case it fires one last time
-   * for {@code t2}. Both {@code t1} and {@code t2} are executed in parallel. This means that
-   * {@code t1} may have fired since {@code t2} started, so not all of the elements that {@code t2}
-   * has seen are necessarily in the current pane.
-   *
-   * <p>For example the final firing of the following trigger may only have 1 element:
-   * <pre> {@code
-   * Repeatedly.forever(AfterPane.elementCountAtLeast(2))
-   *     .orFinally(AfterPane.elementCountAtLeast(5))
-   * } </pre>
-   *
-   * <p>Note that if {@code t1} is {@link OnceTrigger}, then {@code t1.orFinally(t2)} is the same
-   * as {@code AfterFirst.of(t1, t2)}.
-   */
-  public Trigger<W> orFinally(OnceTrigger<W> until) {
-    return new OrFinallyTrigger<W>(this, until);
-  }
-
-  @Override
-  public Trigger<W> buildTrigger() {
-    return this;
-  }
-
-  /**
-   * {@link Trigger}s that are guaranteed to fire at most once should extend from this, rather
-   * than the general {@link Trigger} class to indicate that behavior.
-   *
-   * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
-   *            {@code AtMostOnceTrigger}
-   */
-  public abstract static class OnceTrigger<W extends BoundedWindow> extends Trigger<W> {
-    protected OnceTrigger(List<Trigger<W>> subTriggers) {
-      super(subTriggers);
-    }
-
-    @Override
-    public final OnceTrigger<W> getContinuationTrigger() {
-      Trigger<W> continuation = super.getContinuationTrigger();
-      if (!(continuation instanceof OnceTrigger)) {
-        throw new IllegalStateException("Continuation of a OnceTrigger must be a OnceTrigger");
-      }
-      return (OnceTrigger<W>) continuation;
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public final void onFire(TriggerContext context) throws Exception {
-      onOnlyFiring(context);
-      context.trigger().setFinished(true);
-    }
-
-    /**
-     * Called exactly once by {@link #onFire} when the trigger is fired. By default,
-     * invokes {@link #onFire} on all subtriggers for which {@link #shouldFire} is {@code true}.
-     */
-    protected abstract void onOnlyFiring(TriggerContext context) throws Exception;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java
deleted file mode 100644
index cc817ba..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/TriggerBuilder.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-/**
- * Anything that can be used to create an instance of a {@code Trigger} implements this interface.
- *
- * <p>This includes {@code Trigger}s (which can return themselves) and any "enhanced" syntax for
- * constructing a trigger.
- *
- * @param <W> The type of windows the built trigger will operate on.
- */
-public interface TriggerBuilder<W extends BoundedWindow> {
-  /** Return the {@code Trigger} built by this builder. */
-  Trigger<W> buildTrigger();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
deleted file mode 100644
index 6793e76..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/Window.java
+++ /dev/null
@@ -1,662 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.util.AssignWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-
-import javax.annotation.Nullable;
-
-/**
- * {@code Window} logically divides up or groups the elements of a
- * {@link PCollection} into finite windows according to a {@link WindowFn}.
- * The output of {@code Window} contains the same elements as input, but they
- * have been logically assigned to windows. The next
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey GroupByKeys},
- * including one within composite transforms, will group by the combination of
- * keys and windows.
-
- * <p>See {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}
- * for more information about how grouping with windows works.
- *
- * <h2> Windowing </h2>
- *
- * <p>Windowing a {@code PCollection} divides the elements into windows based
- * on the associated event time for each element. This is especially useful
- * for {@code PCollection}s with unbounded size, since it allows operating on
- * a sub-group of the elements placed into a related window. For {@code PCollection}s
- * with a bounded size (aka. conventional batch mode), by default, all data is
- * implicitly in a single window, unless {@code Window} is applied.
- *
- * <p>For example, a simple form of windowing divides up the data into
- * fixed-width time intervals, using {@link FixedWindows}.
- * The following example demonstrates how to use {@code Window} in a pipeline
- * that counts the number of occurrences of strings each minute:
- *
- * <pre> {@code
- * PCollection<String> items = ...;
- * PCollection<String> windowed_items = items.apply(
- *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))));
- * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
- *   Count.<String>perElement());
- * } </pre>
- *
- * <p>Let (data, timestamp) denote a data element along with its timestamp.
- * Then, if the input to this pipeline consists of
- * {("foo", 15s), ("bar", 30s), ("foo", 45s), ("foo", 1m30s)},
- * the output will be
- * {(KV("foo", 2), 1m), (KV("bar", 1), 1m), (KV("foo", 1), 2m)}
- *
- * <p>Several predefined {@link WindowFn}s are provided:
- * <ul>
- *  <li> {@link FixedWindows} partitions the timestamps into fixed-width intervals.
- *  <li> {@link SlidingWindows} places data into overlapping fixed-width intervals.
- *  <li> {@link Sessions} groups data into sessions where each item in a window
- *       is separated from the next by no more than a specified gap.
- * </ul>
- *
- * <p>Additionally, custom {@link WindowFn}s can be created, by creating new
- * subclasses of {@link WindowFn}.
- *
- * <h2> Triggers </h2>
- *
- * <p>{@link Window.Bound#triggering(TriggerBuilder)} allows specifying a trigger to control when
- * (in processing time) results for the given window can be produced. If unspecified, the default
- * behavior is to trigger first when the watermark passes the end of the window, and then trigger
- * again every time there is late arriving data.
- *
- * <p>Elements are added to the current window pane as they arrive. When the root trigger fires,
- * output is produced based on the elements in the current pane.
- *
- * <p>Depending on the trigger, this can be used both to output partial results
- * early during the processing of the whole window, and to deal with late
- * arriving in batches.
- *
- * <p>Continuing the earlier example, if we wanted to emit the values that were available
- * when the watermark passed the end of the window, and then output any late arriving
- * elements once-per (actual hour) hour until we have finished processing the next 24-hours of data.
- * (The use of watermark time to stop processing tends to be more robust if the data source is slow
- * for a few days, etc.)
- *
- * <pre> {@code
- * PCollection<String> items = ...;
- * PCollection<String> windowed_items = items.apply(
- *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1)))
- *      .triggering(
- *          AfterWatermark.pastEndOfWindow()
- *              .withLateFirings(AfterProcessingTime
- *                  .pastFirstElementInPane().plusDelayOf(Duration.standardHours(1))))
- *      .withAllowedLateness(Duration.standardDays(1)));
- * PCollection<KV<String, Long>> windowed_counts = windowed_items.apply(
- *   Count.<String>perElement());
- * } </pre>
- *
- * <p>On the other hand, if we wanted to get early results every minute of processing
- * time (for which there were new elements in the given window) we could do the following:
- *
- * <pre> {@code
- * PCollection<String> windowed_items = items.apply(
- *   Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))
- *      .triggering(
- *      .triggering(
- *          AfterWatermark.pastEndOfWindow()
- *              .withEarlyFirings(AfterProcessingTime
- *                  .pastFirstElementInPane().plusDelayOf(Duration.standardMinutes(1))))
- *      .withAllowedLateness(Duration.ZERO));
- * } </pre>
- *
- * <p>After a {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} the trigger is set to
- * a trigger that will preserve the intent of the upstream trigger.  See
- * {@link Trigger#getContinuationTrigger} for more information.
- *
- * <p>See {@link Trigger} for details on the available triggers.
- */
-public class Window {
-
-  /**
-   * Specifies the conditions under which a final pane will be created when a window is permanently
-   * closed.
-   */
-  public enum ClosingBehavior {
-    /**
-     * Always fire the last pane. Even if there is no new data since the previous firing, an element
-     * with {@link PaneInfo#isLast()} {@code true} will be produced.
-     */
-    FIRE_ALWAYS,
-    /**
-     * Only fire the last pane if there is new data since the previous firing.
-     *
-     * <p>This is the default behavior.
-     */
-    FIRE_IF_NON_EMPTY;
-  }
-
-  /**
-   * Creates a {@code Window} {@code PTransform} with the given name.
-   *
-   * <p>See the discussion of Naming in
-   * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more explanation.
-   *
-   * <p>The resulting {@code PTransform} is incomplete, and its input/output
-   * type is not yet bound.  Use {@link Window.Unbound#into} to specify the
-   * {@link WindowFn} to use, which will also bind the input/output type of this
-   * {@code PTransform}.
-   */
-  public static Unbound named(String name) {
-    return new Unbound().named(name);
-  }
-
-  /**
-   * Creates a {@code Window} {@code PTransform} that uses the given
-   * {@link WindowFn} to window the data.
-   *
-   * <p>The resulting {@code PTransform}'s types have been bound, with both the
-   * input and output being a {@code PCollection<T>}, inferred from the types of
-   * the argument {@code WindowFn}.  It is ready to be applied, or further
-   * properties can be set on it first.
-   */
-  public static <T> Bound<T> into(WindowFn<? super T, ?> fn) {
-    return new Unbound().into(fn);
-  }
-
-  /**
-   * Sets a non-default trigger for this {@code Window} {@code PTransform}.
-   * Elements that are assigned to a specific window will be output when
-   * the trigger fires.
-   *
-   * <p>Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
-   * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
-   */
-  @Experimental(Kind.TRIGGER)
-  public static <T> Bound<T> triggering(TriggerBuilder<?> trigger) {
-    return new Unbound().triggering(trigger);
-  }
-
-  /**
-   * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-   * Triggering behavior, and that discards elements in a pane after they are triggered.
-   *
-   * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-   * specified to be applied, but more properties can still be specified.
-   */
-  @Experimental(Kind.TRIGGER)
-  public static <T> Bound<T> discardingFiredPanes() {
-    return new Unbound().discardingFiredPanes();
-  }
-
-  /**
-   * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-   * Triggering behavior, and that accumulates elements in a pane after they are triggered.
-   *
-   * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-   * specified to be applied, but more properties can still be specified.
-   */
-  @Experimental(Kind.TRIGGER)
-  public static <T> Bound<T> accumulatingFiredPanes() {
-    return new Unbound().accumulatingFiredPanes();
-  }
-
-  /**
-   * Override the amount of lateness allowed for data elements in the pipeline. Like
-   * the other properties on this {@link Window} operation, this will be applied at
-   * the next {@link GroupByKey}. Any elements that are later than this as decided by
-   * the system-maintained watermark will be dropped.
-   *
-   * <p>This value also determines how long state will be kept around for old windows.
-   * Once no elements will be added to a window (because this duration has passed) any state
-   * associated with the window will be cleaned up.
-   */
-  @Experimental(Kind.TRIGGER)
-  public static <T> Bound<T> withAllowedLateness(Duration allowedLateness) {
-    return new Unbound().withAllowedLateness(allowedLateness);
-  }
-
-  /**
-   * An incomplete {@code Window} transform, with unbound input/output type.
-   *
-   * <p>Before being applied, {@link Window.Unbound#into} must be
-   * invoked to specify the {@link WindowFn} to invoke, which will also
-   * bind the input/output type of this {@code PTransform}.
-   */
-  public static class Unbound {
-    String name;
-
-    Unbound() {}
-
-    Unbound(String name) {
-      this.name = name;
-    }
-
-    /**
-     * Returns a new {@code Window} transform that's like this
-     * transform but with the specified name.  Does not modify this
-     * transform.  The resulting transform is still incomplete.
-     *
-     * <p>See the discussion of Naming in
-     * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more
-     * explanation.
-     */
-    public Unbound named(String name) {
-      return new Unbound(name);
-    }
-
-    /**
-     * Returns a new {@code Window} {@code PTransform} that's like this
-     * transform but that will use the given {@link WindowFn}, and that has
-     * its input and output types bound.  Does not modify this transform.  The
-     * resulting {@code PTransform} is sufficiently specified to be applied,
-     * but more properties can still be specified.
-     */
-    public <T> Bound<T> into(WindowFn<? super T, ?> fn) {
-      return new Bound<T>(name).into(fn);
-    }
-
-    /**
-     * Sets a non-default trigger for this {@code Window} {@code PTransform}.
-     * Elements that are assigned to a specific window will be output when
-     * the trigger fires.
-     *
-     * <p>{@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
-     * has more details on the available triggers.
-     *
-     * <p>Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
-     * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
-     */
-    @Experimental(Kind.TRIGGER)
-    public <T> Bound<T> triggering(TriggerBuilder<?> trigger) {
-      return new Bound<T>(name).triggering(trigger);
-    }
-
-    /**
-     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-     * Triggering behavior, and that discards elements in a pane after they are triggered.
-     *
-     * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-     * specified to be applied, but more properties can still be specified.
-     */
-    @Experimental(Kind.TRIGGER)
-    public <T> Bound<T> discardingFiredPanes() {
-      return new Bound<T>(name).discardingFiredPanes();
-    }
-
-    /**
-     * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-     * Triggering behavior, and that accumulates elements in a pane after they are triggered.
-     *
-     * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-     * specified to be applied, but more properties can still be specified.
-     */
-    @Experimental(Kind.TRIGGER)
-    public <T> Bound<T> accumulatingFiredPanes() {
-      return new Bound<T>(name).accumulatingFiredPanes();
-    }
-
-    /**
-     * Override the amount of lateness allowed for data elements in the pipeline. Like
-     * the other properties on this {@link Window} operation, this will be applied at
-     * the next {@link GroupByKey}. Any elements that are later than this as decided by
-     * the system-maintained watermark will be dropped.
-     *
-     * <p>This value also determines how long state will be kept around for old windows.
-     * Once no elements will be added to a window (because this duration has passed) any state
-     * associated with the window will be cleaned up.
-     *
-     * <p>Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
-     * {@link ClosingBehavior#FIRE_IF_NON_EMPTY} for more details.
-     */
-    @Experimental(Kind.TRIGGER)
-    public <T> Bound<T> withAllowedLateness(Duration allowedLateness) {
-      return new Bound<T>(name).withAllowedLateness(allowedLateness);
-    }
-
-    /**
-     * Override the amount of lateness allowed for data elements in the pipeline. Like
-     * the other properties on this {@link Window} operation, this will be applied at
-     * the next {@link GroupByKey}. Any elements that are later than this as decided by
-     * the system-maintained watermark will be dropped.
-     *
-     * <p>This value also determines how long state will be kept around for old windows.
-     * Once no elements will be added to a window (because this duration has passed) any state
-     * associated with the window will be cleaned up.
-     */
-    @Experimental(Kind.TRIGGER)
-    public <T> Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior behavior) {
-      return new Bound<T>(name).withAllowedLateness(allowedLateness, behavior);
-    }
-  }
-
-  /**
-   * A {@code PTransform} that windows the elements of a {@code PCollection<T>},
-   * into finite windows according to a user-specified {@code WindowFn}.
-   *
-   * @param <T> The type of elements this {@code Window} is applied to
-   */
-  public static class Bound<T> extends PTransform<PCollection<T>, PCollection<T>> {
-
-    @Nullable private final WindowFn<? super T, ?> windowFn;
-    @Nullable private final Trigger<?> trigger;
-    @Nullable private final AccumulationMode mode;
-    @Nullable private final Duration allowedLateness;
-    @Nullable private final ClosingBehavior closingBehavior;
-    @Nullable private final OutputTimeFn<?> outputTimeFn;
-
-    private Bound(String name,
-        @Nullable WindowFn<? super T, ?> windowFn, @Nullable Trigger<?> trigger,
-        @Nullable AccumulationMode mode, @Nullable Duration allowedLateness,
-        ClosingBehavior behavior, @Nullable OutputTimeFn<?> outputTimeFn) {
-      super(name);
-      this.windowFn = windowFn;
-      this.trigger = trigger;
-      this.mode = mode;
-      this.allowedLateness = allowedLateness;
-      this.closingBehavior = behavior;
-      this.outputTimeFn = outputTimeFn;
-    }
-
-    private Bound(String name) {
-      this(name, null, null, null, null, null, null);
-    }
-
-    /**
-     * Returns a new {@code Window} {@code PTransform} that's like this
-     * transform but that will use the given {@link WindowFn}, and that has
-     * its input and output types bound.  Does not modify this transform.  The
-     * resulting {@code PTransform} is sufficiently specified to be applied,
-     * but more properties can still be specified.
-     */
-    private Bound<T> into(WindowFn<? super T, ?> windowFn) {
-      try {
-        windowFn.windowCoder().verifyDeterministic();
-      } catch (NonDeterministicException e) {
-        throw new IllegalArgumentException("Window coders must be deterministic.", e);
-      }
-
-      return new Bound<>(
-          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
-    }
-
-    /**
-     * Returns a new {@code Window} {@code PTransform} that's like this
-     * {@code PTransform} but with the specified name.  Does not
-     * modify this {@code PTransform}.
-     *
-     * <p>See the discussion of Naming in
-     * {@link com.google.cloud.dataflow.sdk.transforms.ParDo} for more
-     * explanation.
-     */
-    public Bound<T> named(String name) {
-      return new Bound<>(
-          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
-    }
-
-    /**
-     * Sets a non-default trigger for this {@code Window} {@code PTransform}.
-     * Elements that are assigned to a specific window will be output when
-     * the trigger fires.
-     *
-     * <p>{@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger}
-     * has more details on the available triggers.
-     *
-     * <p>Must also specify allowed lateness using {@link #withAllowedLateness} and accumulation
-     * mode using either {@link #discardingFiredPanes()} or {@link #accumulatingFiredPanes()}.
-     */
-    @Experimental(Kind.TRIGGER)
-    public Bound<T> triggering(TriggerBuilder<?> trigger) {
-      return new Bound<T>(
-          name,
-          windowFn,
-          trigger.buildTrigger(),
-          mode,
-          allowedLateness,
-          closingBehavior,
-          outputTimeFn);
-    }
-
-   /**
-    * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-    * Triggering behavior, and that discards elements in a pane after they are triggered.
-    *
-    * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-    * specified to be applied, but more properties can still be specified.
-    */
-    @Experimental(Kind.TRIGGER)
-   public Bound<T> discardingFiredPanes() {
-     return new Bound<T>(
-         name,
-         windowFn,
-         trigger,
-         AccumulationMode.DISCARDING_FIRED_PANES,
-         allowedLateness,
-         closingBehavior,
-         outputTimeFn);
-   }
-
-   /**
-    * Returns a new {@code Window} {@code PTransform} that uses the registered WindowFn and
-    * Triggering behavior, and that accumulates elements in a pane after they are triggered.
-    *
-    * <p>Does not modify this transform.  The resulting {@code PTransform} is sufficiently
-    * specified to be applied, but more properties can still be specified.
-    */
-   @Experimental(Kind.TRIGGER)
-   public Bound<T> accumulatingFiredPanes() {
-     return new Bound<T>(
-         name,
-         windowFn,
-         trigger,
-         AccumulationMode.ACCUMULATING_FIRED_PANES,
-         allowedLateness,
-         closingBehavior,
-         outputTimeFn);
-   }
-
-    /**
-     * Override the amount of lateness allowed for data elements in the pipeline. Like
-     * the other properties on this {@link Window} operation, this will be applied at
-     * the next {@link GroupByKey}. Any elements that are later than this as decided by
-     * the system-maintained watermark will be dropped.
-     *
-     * <p>This value also determines how long state will be kept around for old windows.
-     * Once no elements will be added to a window (because this duration has passed) any state
-     * associated with the window will be cleaned up.
-     *
-     * <p>Depending on the trigger this may not produce a pane with {@link PaneInfo#isLast}. See
-     * {@link ClosingBehavior#FIRE_IF_NON_EMPTY} for more details.
-     */
-    @Experimental(Kind.TRIGGER)
-    public Bound<T> withAllowedLateness(Duration allowedLateness) {
-      return new Bound<T>(
-          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
-    }
-
-    /**
-     * <b><i>(Experimental)</i></b> Override the default {@link OutputTimeFn}, to control
-     * the output timestamp of values output from a {@link GroupByKey} operation.
-     */
-    @Experimental(Kind.OUTPUT_TIME)
-    public Bound<T> withOutputTimeFn(OutputTimeFn<?> outputTimeFn) {
-      return new Bound<T>(
-          name, windowFn, trigger, mode, allowedLateness, closingBehavior, outputTimeFn);
-    }
-
-    /**
-     * Override the amount of lateness allowed for data elements in the pipeline. Like
-     * the other properties on this {@link Window} operation, this will be applied at
-     * the next {@link GroupByKey}. Any elements that are later than this as decided by
-     * the system-maintained watermark will be dropped.
-     *
-     * <p>This value also determines how long state will be kept around for old windows.
-     * Once no elements will be added to a window (because this duration has passed) any state
-     * associated with the window will be cleaned up.
-     */
-    @Experimental(Kind.TRIGGER)
-    public Bound<T> withAllowedLateness(Duration allowedLateness, ClosingBehavior behavior) {
-      return new Bound<T>(name, windowFn, trigger, mode, allowedLateness, behavior, outputTimeFn);
-    }
-
-    /**
-     * Get the output strategy of this {@link Window.Bound Window PTransform}. For internal use
-     * only.
-     */
-    // Rawtype cast of OutputTimeFn cannot be eliminated with intermediate variable, as it is
-    // casting between wildcards
-    public WindowingStrategy<?, ?> getOutputStrategyInternal(
-        WindowingStrategy<?, ?> inputStrategy) {
-      WindowingStrategy<?, ?> result = inputStrategy;
-      if (windowFn != null) {
-        result = result.withWindowFn(windowFn);
-      }
-      if (trigger != null) {
-        result = result.withTrigger(trigger);
-      }
-      if (mode != null) {
-        result = result.withMode(mode);
-      }
-      if (allowedLateness != null) {
-        result = result.withAllowedLateness(allowedLateness);
-      }
-      if (closingBehavior != null) {
-        result = result.withClosingBehavior(closingBehavior);
-      }
-      if (outputTimeFn != null) {
-        result = result.withOutputTimeFn(outputTimeFn);
-      }
-      return result;
-    }
-
-    /**
-     * Get the {@link WindowFn} of this {@link Window.Bound Window PTransform}.
-     */
-    public WindowFn<? super T, ?> getWindowFn() {
-      return windowFn;
-    }
-
-    @Override
-    public void validate(PCollection<T> input) {
-      WindowingStrategy<?, ?> outputStrategy =
-          getOutputStrategyInternal(input.getWindowingStrategy());
-
-      // Make sure that the windowing strategy is complete & valid.
-      if (outputStrategy.isTriggerSpecified()
-          && !(outputStrategy.getTrigger().getSpec() instanceof DefaultTrigger)) {
-        if (!(outputStrategy.getWindowFn() instanceof GlobalWindows)
-            && !outputStrategy.isAllowedLatenessSpecified()) {
-          throw new IllegalArgumentException("Except when using GlobalWindows,"
-              + " calling .triggering() to specify a trigger requires that the allowed lateness be"
-              + " specified using .withAllowedLateness() to set the upper bound on how late data"
-              + " can arrive before being dropped. See Javadoc for more details.");
-        }
-
-        if (!outputStrategy.isModeSpecified()) {
-          throw new IllegalArgumentException(
-              "Calling .triggering() to specify a trigger requires that the accumulation mode be"
-              + " specified using .discardingFiredPanes() or .accumulatingFiredPanes()."
-              + " See Javadoc for more details.");
-        }
-      }
-    }
-
-    @Override
-    public PCollection<T> apply(PCollection<T> input) {
-      WindowingStrategy<?, ?> outputStrategy =
-          getOutputStrategyInternal(input.getWindowingStrategy());
-      PCollection<T> output;
-      if (windowFn != null) {
-        // If the windowFn changed, we create a primitive, and run the AssignWindows operation here.
-        output = assignWindows(input, windowFn);
-      } else {
-        // If the windowFn didn't change, we just run a pass-through transform and then set the
-        // new windowing strategy.
-        output = input.apply(Window.<T>identity());
-      }
-      return output.setWindowingStrategyInternal(outputStrategy);
-    }
-
-    private <T, W extends BoundedWindow> PCollection<T> assignWindows(
-        PCollection<T> input, WindowFn<? super T, W> windowFn) {
-      return input.apply("AssignWindows", ParDo.of(new AssignWindowsDoFn<T, W>(windowFn)));
-    }
-
-    @Override
-    protected Coder<?> getDefaultOutputCoder(PCollection<T> input) {
-      return input.getCoder();
-    }
-
-    @Override
-    protected String getKindString() {
-      return "Window.Into()";
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static <T> PTransform<PCollection<? extends T>, PCollection<T>> identity() {
-    return ParDo.named("Identity").of(new DoFn<T, T>() {
-      @Override public void processElement(ProcessContext c) {
-        c.output(c.element());
-      }
-    });
-  }
-
-  /**
-   * Creates a {@code Window} {@code PTransform} that does not change assigned
-   * windows, but will cause windows to be merged again as part of the next
-   * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
-   */
-  public static <T> Remerge<T> remerge() {
-    return new Remerge<T>();
-  }
-
-  /**
-   * {@code PTransform} that does not change assigned windows, but will cause
-   *  windows to be merged again as part of the next
-   * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
-   */
-  public static class Remerge<T> extends PTransform<PCollection<T>, PCollection<T>> {
-    @Override
-    public PCollection<T> apply(PCollection<T> input) {
-      WindowingStrategy<?, ?> outputWindowingStrategy = getOutputWindowing(
-          input.getWindowingStrategy());
-
-      return input.apply(Window.<T>identity())
-          .setWindowingStrategyInternal(outputWindowingStrategy);
-    }
-
-    private <W extends BoundedWindow> WindowingStrategy<?, W> getOutputWindowing(
-        WindowingStrategy<?, W> inputStrategy) {
-      if (inputStrategy.getWindowFn() instanceof InvalidWindows) {
-        @SuppressWarnings("unchecked")
-        InvalidWindows<W> invalidWindows = (InvalidWindows<W>) inputStrategy.getWindowFn();
-        return inputStrategy.withWindowFn(invalidWindows.getOriginalWindowFn());
-      } else {
-        return inputStrategy;
-      }
-    }
-  }
-}

[46/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
deleted file mode 100644
index 0de606b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VoidCoder.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} for {@link Void}. Uses zero bytes per {@link Void}.
- */
-public class VoidCoder extends AtomicCoder<Void> {
-
-  @JsonCreator
-  public static VoidCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final VoidCoder INSTANCE = new VoidCoder();
-
-  private VoidCoder() {}
-
-  @Override
-  public void encode(Void value, OutputStream outStream, Context context) {
-    // Nothing to write!
-  }
-
-  @Override
-  public Void decode(InputStream inStream, Context context) {
-    // Nothing to read!
-    return null;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return  {@code true}. {@link VoidCoder} is (vacuously) injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. {@link VoidCoder#getEncodedElementByteSize} runs in constant time.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Void value, Context context) {
-    return true;
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(Void value, Context context)
-      throws Exception {
-    return 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
deleted file mode 100644
index fdf931f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/package-info.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines {@link com.google.cloud.dataflow.sdk.coders.Coder Coders}
- * to specify how data is encoded to and decoded from byte strings.
- *
- * <p>During execution of a Pipeline, elements in a
- * {@link com.google.cloud.dataflow.sdk.values.PCollection}
- * may need to be encoded into byte strings.
- * This happens both at the beginning and end of a pipeline when data is read from and written to
- * persistent storage and also during execution of a pipeline when elements are communicated between
- * machines.
- *
- * <p>Exactly when PCollection elements are encoded during execution depends on which
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} is being used and how that runner
- * chooses to execute the pipeline. As such, Dataflow requires that all PCollections have an
- * appropriate Coder in case it becomes necessary. In many cases, the Coder can be inferred from
- * the available Java type
- * information and the Pipeline's {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry}. It
- * can be specified per PCollection via
- * {@link com.google.cloud.dataflow.sdk.values.PCollection#setCoder(Coder)} or per type using the
- * {@link com.google.cloud.dataflow.sdk.coders.DefaultCoder} annotation.
- *
- * <p>This package provides a number of coders for common types like {@code Integer},
- * {@code String}, and {@code List}, as well as coders like
- * {@link com.google.cloud.dataflow.sdk.coders.AvroCoder} that can be used to encode many custom
- * types.
- *
- */
-package com.google.cloud.dataflow.sdk.coders;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java
deleted file mode 100644
index 111c24d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtoCoder.java
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.coders.protobuf;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderProvider;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.Structs;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-import com.google.protobuf.ExtensionRegistry;
-import com.google.protobuf.Message;
-import com.google.protobuf.Parser;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link Coder} using Google Protocol Buffers binary format. {@link ProtoCoder} supports both
- * Protocol Buffers syntax versions 2 and 3.
- *
- * <p>To learn more about Protocol Buffers, visit:
- * <a href="https://developers.google.com/protocol-buffers">https://developers.google.com/protocol-buffers</a>
- *
- * <p>{@link ProtoCoder} is registered in the global {@link CoderRegistry} as the default
- * {@link Coder} for any {@link Message} object. Custom message extensions are also supported, but
- * these extensions must be registered for a particular {@link ProtoCoder} instance and that
- * instance must be registered on the {@link PCollection} that needs the extensions:
- *
- * <pre>{@code
- * import MyProtoFile;
- * import MyProtoFile.MyMessage;
- *
- * Coder<MyMessage> coder = ProtoCoder.of(MyMessage.class).withExtensionsFrom(MyProtoFile.class);
- * PCollection<MyMessage> records =  input.apply(...).setCoder(coder);
- * }</pre>
- *
- * <h3>Versioning</h3>
- *
- * <p>{@link ProtoCoder} supports both versions 2 and 3 of the Protocol Buffers syntax. However,
- * the Java runtime version of the <code>google.com.protobuf</code> library must match exactly the
- * version of <code>protoc</code> that was used to produce the JAR files containing the compiled
- * <code>.proto</code> messages.
- *
- * <p>For more information, see the
- * <a href="https://developers.google.com/protocol-buffers/docs/proto3#using-proto2-message-types">Protocol Buffers documentation</a>.
- *
- * <h3>{@link ProtoCoder} and Determinism</h3>
- *
- * <p>In general, Protocol Buffers messages can be encoded deterministically within a single
- * pipeline as long as:
- *
- * <ul>
- * <li>The encoded messages (and any transitively linked messages) do not use <code>map</code>
- *     fields.</li>
- * <li>Every Java VM that encodes or decodes the messages use the same runtime version of the
- *     Protocol Buffers library and the same compiled <code>.proto</code> file JAR.</li>
- * </ul>
- *
- * <h3>{@link ProtoCoder} and Encoding Stability</h3>
- *
- * <p>When changing Protocol Buffers messages, follow the rules in the Protocol Buffers language
- * guides for
- * <a href="https://developers.google.com/protocol-buffers/docs/proto#updating">{@code proto2}</a>
- * and
- * <a href="https://developers.google.com/protocol-buffers/docs/proto3#updating">{@code proto3}</a>
- * syntaxes, depending on your message type. Following these guidelines will ensure that the
- * old encoded data can be read by new versions of the code.
- *
- * <p>Generally, any change to the message type, registered extensions, runtime library, or
- * compiled proto JARs may change the encoding. Thus even if both the original and updated messages
- * can be encoded deterministically within a single job, these deterministic encodings may not be
- * the same across jobs.
- *
- * @param <T> the Protocol Buffers {@link Message} handled by this {@link Coder}.
- */
-public class ProtoCoder<T extends Message> extends AtomicCoder<T> {
-
-  /**
-   * A {@link CoderProvider} that returns a {@link ProtoCoder} with an empty
-   * {@link ExtensionRegistry}.
-   */
-  public static CoderProvider coderProvider() {
-    return PROVIDER;
-  }
-
-  /**
-   * Returns a {@link ProtoCoder} for the given Protocol Buffers {@link Message}.
-   */
-  public static <T extends Message> ProtoCoder<T> of(Class<T> protoMessageClass) {
-    return new ProtoCoder<T>(protoMessageClass, ImmutableSet.<Class<?>>of());
-  }
-
-  /**
-   * Returns a {@link ProtoCoder} for the Protocol Buffers {@link Message} indicated by the given
-   * {@link TypeDescriptor}.
-   */
-  public static <T extends Message> ProtoCoder<T> of(TypeDescriptor<T> protoMessageType) {
-    @SuppressWarnings("unchecked")
-    Class<T> protoMessageClass = (Class<T>) protoMessageType.getRawType();
-    return of(protoMessageClass);
-  }
-
-  /**
-   * Returns a {@link ProtoCoder} like this one, but with the extensions from the given classes
-   * registered.
-   *
-   * <p>Each of the extension host classes must be an class automatically generated by the
-   * Protocol Buffers compiler, {@code protoc}, that contains messages.
-   *
-   * <p>Does not modify this object.
-   */
-  public ProtoCoder<T> withExtensionsFrom(Iterable<Class<?>> moreExtensionHosts) {
-    for (Class<?> extensionHost : moreExtensionHosts) {
-      // Attempt to access the required method, to make sure it's present.
-      try {
-        Method registerAllExtensions =
-            extensionHost.getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
-        checkArgument(
-            Modifier.isStatic(registerAllExtensions.getModifiers()),
-            "Method registerAllExtensions() must be static");
-      } catch (NoSuchMethodException | SecurityException e) {
-        throw new IllegalArgumentException(
-            String.format("Unable to register extensions for %s", extensionHost.getCanonicalName()),
-            e);
-      }
-    }
-
-    return new ProtoCoder<T>(
-        protoMessageClass,
-        new ImmutableSet.Builder<Class<?>>()
-            .addAll(extensionHostClasses)
-            .addAll(moreExtensionHosts)
-            .build());
-  }
-
-  /**
-   * See {@link #withExtensionsFrom(Iterable)}.
-   *
-   * <p>Does not modify this object.
-   */
-  public ProtoCoder<T> withExtensionsFrom(Class<?>... moreExtensionHosts) {
-    return withExtensionsFrom(Arrays.asList(moreExtensionHosts));
-  }
-
-  @Override
-  public void encode(T value, OutputStream outStream, Context context) throws IOException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null " + protoMessageClass.getSimpleName());
-    }
-    if (context.isWholeStream) {
-      value.writeTo(outStream);
-    } else {
-      value.writeDelimitedTo(outStream);
-    }
-  }
-
-  @Override
-  public T decode(InputStream inStream, Context context) throws IOException {
-    if (context.isWholeStream) {
-      return getParser().parseFrom(inStream, getExtensionRegistry());
-    } else {
-      return getParser().parseDelimitedFrom(inStream, getExtensionRegistry());
-    }
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-    if (!(other instanceof ProtoCoder)) {
-      return false;
-    }
-    ProtoCoder<?> otherCoder = (ProtoCoder<?>) other;
-    return protoMessageClass.equals(otherCoder.protoMessageClass)
-        && Sets.newHashSet(extensionHostClasses)
-            .equals(Sets.newHashSet(otherCoder.extensionHostClasses));
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(protoMessageClass, extensionHostClasses);
-  }
-
-  /**
-   * The encoding identifier is designed to support evolution as per the design of Protocol
-   * Buffers. In order to use this class effectively, carefully follow the advice in the Protocol
-   * Buffers documentation at
-   * <a href="https://developers.google.com/protocol-buffers/docs/proto#updating">Updating
-   * A Message Type</a>.
-   *
-   * <p>In particular, the encoding identifier is guaranteed to be the same for {@link ProtoCoder}
-   * instances of the same principal message class, with the same registered extension host classes,
-   * and otherwise distinct. Note that the encoding ID does not encode any version of the message
-   * or extensions, nor does it include the message schema.
-   *
-   * <p>When modifying a message class, here are the broadest guidelines; see the above link
-   * for greater detail.
-   *
-   * <ul>
-   * <li>Do not change the numeric tags for any fields.
-   * <li>Never remove a <code>required</code> field.
-   * <li>Only add <code>optional</code> or <code>repeated</code> fields, with sensible defaults.
-   * <li>When changing the type of a field, consult the Protocol Buffers documentation to ensure
-   * the new and old types are interchangeable.
-   * </ul>
-   *
-   * <p>Code consuming this message class should be prepared to support <i>all</i> versions of
-   * the class until it is certain that no remaining serialized instances exist.
-   *
-   * <p>If backwards incompatible changes must be made, the best recourse is to change the name
-   * of your Protocol Buffers message class.
-   */
-  @Override
-  public String getEncodingId() {
-    return protoMessageClass.getName() + getSortedExtensionClasses().toString();
-  }
-
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    ProtobufUtil.verifyDeterministic(this);
-  }
-
-  /**
-   * Returns the Protocol Buffers {@link Message} type this {@link ProtoCoder} supports.
-   */
-  public Class<T> getMessageType() {
-    return protoMessageClass;
-  }
-
-  /**
-   * Returns the {@link ExtensionRegistry} listing all known Protocol Buffers extension messages
-   * to {@code T} registered with this {@link ProtoCoder}.
-   */
-  public ExtensionRegistry getExtensionRegistry() {
-    if (memoizedExtensionRegistry == null) {
-      ExtensionRegistry registry = ExtensionRegistry.newInstance();
-      for (Class<?> extensionHost : extensionHostClasses) {
-        try {
-          extensionHost
-              .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
-              .invoke(null, registry);
-        } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
-          throw new IllegalStateException(e);
-        }
-      }
-      memoizedExtensionRegistry = registry.getUnmodifiable();
-    }
-    return memoizedExtensionRegistry;
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////
-  // Private implementation details below.
-
-  /** The {@link Message} type to be coded. */
-  private final Class<T> protoMessageClass;
-
-  /**
-   * All extension host classes included in this {@link ProtoCoder}. The extensions from these
-   * classes will be included in the {@link ExtensionRegistry} used during encoding and decoding.
-   */
-  private final Set<Class<?>> extensionHostClasses;
-
-  // Constants used to serialize and deserialize
-  private static final String PROTO_MESSAGE_CLASS = "proto_message_class";
-  private static final String PROTO_EXTENSION_HOSTS = "proto_extension_hosts";
-
-  // Transient fields that are lazy initialized and then memoized.
-  private transient ExtensionRegistry memoizedExtensionRegistry;
-  private transient Parser<T> memoizedParser;
-
-  /** Private constructor. */
-  private ProtoCoder(Class<T> protoMessageClass, Set<Class<?>> extensionHostClasses) {
-    this.protoMessageClass = protoMessageClass;
-    this.extensionHostClasses = extensionHostClasses;
-  }
-
-  /**
-   * @deprecated For JSON deserialization only.
-   */
-  @JsonCreator
-  @Deprecated
-  public static <T extends Message> ProtoCoder<T> of(
-      @JsonProperty(PROTO_MESSAGE_CLASS) String protoMessageClassName,
-      @Nullable @JsonProperty(PROTO_EXTENSION_HOSTS) List<String> extensionHostClassNames) {
-
-    try {
-      @SuppressWarnings("unchecked")
-      Class<T> protoMessageClass = (Class<T>) Class.forName(protoMessageClassName);
-      List<Class<?>> extensionHostClasses = Lists.newArrayList();
-      if (extensionHostClassNames != null) {
-        for (String extensionHostClassName : extensionHostClassNames) {
-          extensionHostClasses.add(Class.forName(extensionHostClassName));
-        }
-      }
-      return of(protoMessageClass).withExtensionsFrom(extensionHostClasses);
-    } catch (ClassNotFoundException e) {
-      throw new IllegalArgumentException(e);
-    }
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = super.asCloudObject();
-    Structs.addString(result, PROTO_MESSAGE_CLASS, protoMessageClass.getName());
-    List<CloudObject> extensionHostClassNames = Lists.newArrayList();
-    for (String className : getSortedExtensionClasses()) {
-      extensionHostClassNames.add(CloudObject.forString(className));
-    }
-    Structs.addList(result, PROTO_EXTENSION_HOSTS, extensionHostClassNames);
-    return result;
-  }
-
-  /** Get the memoized {@link Parser}, possibly initializing it lazily. */
-  private Parser<T> getParser() {
-    if (memoizedParser == null) {
-      try {
-        @SuppressWarnings("unchecked")
-        T protoMessageInstance = (T) protoMessageClass.getMethod("getDefaultInstance").invoke(null);
-        @SuppressWarnings("unchecked")
-        Parser<T> tParser = (Parser<T>) protoMessageInstance.getParserForType();
-        memoizedParser = tParser;
-      } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
-        throw new IllegalArgumentException(e);
-      }
-    }
-    return memoizedParser;
-  }
-
-  /**
-   * The implementation of the {@link CoderProvider} for this {@link ProtoCoder} returned by
-   * {@link #coderProvider()}.
-   */
-  private static final CoderProvider PROVIDER =
-      new CoderProvider() {
-        @Override
-        public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
-          if (!type.isSubtypeOf(new TypeDescriptor<Message>() {})) {
-            throw new CannotProvideCoderException(
-                String.format(
-                    "Cannot provide %s because %s is not a subclass of %s",
-                    ProtoCoder.class.getSimpleName(),
-                    type,
-                    Message.class.getName()));
-          }
-
-          @SuppressWarnings("unchecked")
-          TypeDescriptor<? extends Message> messageType = (TypeDescriptor<? extends Message>) type;
-          try {
-            @SuppressWarnings("unchecked")
-            Coder<T> coder = (Coder<T>) ProtoCoder.of(messageType);
-            return coder;
-          } catch (IllegalArgumentException e) {
-            throw new CannotProvideCoderException(e);
-          }
-        }
-      };
-
-  private SortedSet<String> getSortedExtensionClasses() {
-    SortedSet<String> ret = new TreeSet<>();
-    for (Class<?> clazz : extensionHostClasses) {
-      ret.add(clazz.getName());
-    }
-    return ret;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java
deleted file mode 100644
index 597b1de..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/ProtobufUtil.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders.protobuf;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.protobuf.Descriptors.Descriptor;
-import com.google.protobuf.Descriptors.FieldDescriptor;
-import com.google.protobuf.Descriptors.FileDescriptor.Syntax;
-import com.google.protobuf.Descriptors.GenericDescriptor;
-import com.google.protobuf.ExtensionRegistry;
-import com.google.protobuf.ExtensionRegistry.ExtensionInfo;
-import com.google.protobuf.Message;
-
-import java.lang.reflect.InvocationTargetException;
-import java.util.HashSet;
-import java.util.Set;
-
-/**
- * Utility functions for reflecting and analyzing Protocol Buffers classes.
- *
- * <p>Used by {@link ProtoCoder}, but in a separate file for testing and isolation.
- */
-class ProtobufUtil {
-  /**
-   * Returns the {@link Descriptor} for the given Protocol Buffers {@link Message}.
-   *
-   * @throws IllegalArgumentException if there is an error in Java reflection.
-   */
-  static Descriptor getDescriptorForClass(Class<? extends Message> clazz) {
-    try {
-      return (Descriptor) clazz.getMethod("getDescriptor").invoke(null);
-    } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
-      throw new IllegalArgumentException(e);
-    }
-  }
-
-  /**
-   * Returns the {@link Descriptor} for the given Protocol Buffers {@link Message} as well as
-   * every class it can include transitively.
-   *
-   * @throws IllegalArgumentException if there is an error in Java reflection.
-   */
-  static Set<Descriptor> getRecursiveDescriptorsForClass(
-      Class<? extends Message> clazz, ExtensionRegistry registry) {
-    Descriptor root = getDescriptorForClass(clazz);
-    Set<Descriptor> descriptors = new HashSet<>();
-    recursivelyAddDescriptors(root, descriptors, registry);
-    return descriptors;
-  }
-
-  /**
-   * Recursively walks the given {@link Message} class and verifies that every field or message
-   * linked in uses the Protocol Buffers proto2 syntax.
-   */
-  static void checkProto2Syntax(Class<? extends Message> clazz, ExtensionRegistry registry) {
-    for (GenericDescriptor d : getRecursiveDescriptorsForClass(clazz, registry)) {
-      Syntax s = d.getFile().getSyntax();
-      checkArgument(
-          s == Syntax.PROTO2,
-          "Message %s or one of its dependencies does not use proto2 syntax: %s in file %s",
-          clazz.getName(),
-          d.getFullName(),
-          d.getFile().getName());
-    }
-  }
-
-  /**
-   * Recursively checks whether the specified class uses any Protocol Buffers fields that cannot
-   * be deterministically encoded.
-   *
-   * @throws NonDeterministicException if the object cannot be encoded deterministically.
-   */
-  static void verifyDeterministic(ProtoCoder<?> coder) throws NonDeterministicException {
-    Class<? extends Message> message = coder.getMessageType();
-    ExtensionRegistry registry = coder.getExtensionRegistry();
-    Set<Descriptor> descriptors = getRecursiveDescriptorsForClass(message, registry);
-    for (Descriptor d : descriptors) {
-      for (FieldDescriptor fd : d.getFields()) {
-        // If there is a transitively reachable Protocol Buffers map field, then this object cannot
-        // be encoded deterministically.
-        if (fd.isMapField()) {
-          String reason =
-              String.format(
-                  "Protocol Buffers message %s transitively includes Map field %s (from file %s)."
-                      + " Maps cannot be deterministically encoded.",
-                  message.getName(),
-                  fd.getFullName(),
-                  fd.getFile().getFullName());
-          throw new NonDeterministicException(coder, reason);
-        }
-      }
-    }
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////////////////
-  // Disable construction of utility class
-  private ProtobufUtil() {}
-
-  private static void recursivelyAddDescriptors(
-      Descriptor message, Set<Descriptor> descriptors, ExtensionRegistry registry) {
-    if (descriptors.contains(message)) {
-      return;
-    }
-    descriptors.add(message);
-
-    for (FieldDescriptor f : message.getFields()) {
-      recursivelyAddDescriptors(f, descriptors, registry);
-    }
-    for (FieldDescriptor f : message.getExtensions()) {
-      recursivelyAddDescriptors(f, descriptors, registry);
-    }
-    for (ExtensionInfo info :
-        registry.getAllImmutableExtensionsByExtendedType(message.getFullName())) {
-      recursivelyAddDescriptors(info.descriptor, descriptors, registry);
-    }
-    for (ExtensionInfo info :
-        registry.getAllMutableExtensionsByExtendedType(message.getFullName())) {
-      recursivelyAddDescriptors(info.descriptor, descriptors, registry);
-    }
-  }
-
-  private static void recursivelyAddDescriptors(
-      FieldDescriptor field, Set<Descriptor> descriptors, ExtensionRegistry registry) {
-    switch (field.getType()) {
-      case BOOL:
-      case BYTES:
-      case DOUBLE:
-      case ENUM:
-      case FIXED32:
-      case FIXED64:
-      case FLOAT:
-      case INT32:
-      case INT64:
-      case SFIXED32:
-      case SFIXED64:
-      case SINT32:
-      case SINT64:
-      case STRING:
-      case UINT32:
-      case UINT64:
-        // Primitive types do not transitively access anything else.
-        break;
-
-      case GROUP:
-      case MESSAGE:
-        // Recursively adds all the fields from this nested Message.
-        recursivelyAddDescriptors(field.getMessageType(), descriptors, registry);
-        break;
-
-      default:
-        throw new UnsupportedOperationException(
-            "Unexpected Protocol Buffers field type: " + field.getType());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java
deleted file mode 100644
index b5bcf18..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/protobuf/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines a {@link com.google.cloud.dataflow.sdk.coders.Coder}
- * for Protocol Buffers messages, {@code ProtoCoder}.
- *
- * @see com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder
- */
-package com.google.cloud.dataflow.sdk.coders.protobuf;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
deleted file mode 100644
index f016b5b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroIO.java
+++ /dev/null
@@ -1,810 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.reflect.ReflectData;
-
-import java.io.IOException;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link PTransform}s for reading and writing Avro files.
- *
- * <p>To read a {@link PCollection} from one or more Avro files, use
- * {@link AvroIO.Read}, specifying {@link AvroIO.Read#from} to specify
- * the path of the file(s) to read from (e.g., a local filename or
- * filename pattern if running locally, or a Google Cloud Storage
- * filename or filename pattern of the form
- * {@code "gs://<bucket>/<filepath>"}), and optionally
- * {@link AvroIO.Read#named} to specify the name of the pipeline step.
- *
- * <p>It is required to specify {@link AvroIO.Read#withSchema}. To
- * read specific records, such as Avro-generated classes, provide an
- * Avro-generated class type. To read {@link GenericRecord GenericRecords}, provide either
- * a {@link Schema} object or an Avro schema in a JSON-encoded string form.
- * An exception will be thrown if a record doesn't match the specified
- * schema.
- *
- * <p>For example:
- * <pre> {@code
- * Pipeline p = ...;
- *
- * // A simple Read of a local file (only runs locally):
- * PCollection<AvroAutoGenClass> records =
- *     p.apply(AvroIO.Read.from("/path/to/file.avro")
- *                        .withSchema(AvroAutoGenClass.class));
- *
- * // A Read from a GCS file (runs locally and via the Google Cloud
- * // Dataflow service):
- * Schema schema = new Schema.Parser().parse(new File("schema.avsc"));
- * PCollection<GenericRecord> records =
- *     p.apply(AvroIO.Read.named("ReadFromAvro")
- *                        .from("gs://my_bucket/path/to/records-*.avro")
- *                        .withSchema(schema));
- * } </pre>
- *
- * <p>To write a {@link PCollection} to one or more Avro files, use
- * {@link AvroIO.Write}, specifying {@link AvroIO.Write#to} to specify
- * the path of the file to write to (e.g., a local filename or sharded
- * filename pattern if running locally, or a Google Cloud Storage
- * filename or sharded filename pattern of the form
- * {@code "gs://<bucket>/<filepath>"}), and optionally
- * {@link AvroIO.Write#named} to specify the name of the pipeline step.
- *
- * <p>It is required to specify {@link AvroIO.Write#withSchema}. To
- * write specific records, such as Avro-generated classes, provide an
- * Avro-generated class type. To write {@link GenericRecord GenericRecords}, provide either
- * a {@link Schema} object or a schema in a JSON-encoded string form.
- * An exception will be thrown if a record doesn't match the specified
- * schema.
- *
- * <p>For example:
- * <pre> {@code
- * // A simple Write to a local file (only runs locally):
- * PCollection<AvroAutoGenClass> records = ...;
- * records.apply(AvroIO.Write.to("/path/to/file.avro")
- *                           .withSchema(AvroAutoGenClass.class));
- *
- * // A Write to a sharded GCS file (runs locally and via the Google Cloud
- * // Dataflow service):
- * Schema schema = new Schema.Parser().parse(new File("schema.avsc"));
- * PCollection<GenericRecord> records = ...;
- * records.apply(AvroIO.Write.named("WriteToAvro")
- *                           .to("gs://my_bucket/path/to/numbers")
- *                           .withSchema(schema)
- *                           .withSuffix(".avro"));
- * } </pre>
- *
- * <p><h3>Permissions</h3>
- * Permission requirements depend on the {@link PipelineRunner} that is used to execute the
- * Dataflow job. Please refer to the documentation of corresponding {@link PipelineRunner}s for
- * more details.
- */
-public class AvroIO {
-  /**
-   * A root {@link PTransform} that reads from an Avro file (or multiple Avro
-   * files matching a pattern) and returns a {@link PCollection} containing
-   * the decoding of each record.
-   */
-  public static class Read {
-    /**
-     * Returns a {@link PTransform} with the given step name.
-     */
-    public static Bound<GenericRecord> named(String name) {
-      return new Bound<>(GenericRecord.class).named(name);
-    }
-
-    /**
-     * Returns a {@link PTransform} that reads from the file(s)
-     * with the given name or pattern. This can be a local filename
-     * or filename pattern (if running locally), or a Google Cloud
-     * Storage filename or filename pattern of the form
-     * {@code "gs://<bucket>/<filepath>"} (if running locally or via
-     * the Google Cloud Dataflow service). Standard
-     * <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html">Java
-     * Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
-     */
-    public static Bound<GenericRecord> from(String filepattern) {
-      return new Bound<>(GenericRecord.class).from(filepattern);
-    }
-
-    /**
-     * Returns a {@link PTransform} that reads Avro file(s)
-     * containing records whose type is the specified Avro-generated class.
-     *
-     * @param <T> the type of the decoded elements, and the elements
-     * of the resulting {@link PCollection}
-     */
-    public static <T> Bound<T> withSchema(Class<T> type) {
-      return new Bound<>(type).withSchema(type);
-    }
-
-    /**
-     * Returns a {@link PTransform} that reads Avro file(s)
-     * containing records of the specified schema.
-     */
-    public static Bound<GenericRecord> withSchema(Schema schema) {
-      return new Bound<>(GenericRecord.class).withSchema(schema);
-    }
-
-    /**
-     * Returns a {@link PTransform} that reads Avro file(s)
-     * containing records of the specified schema in a JSON-encoded
-     * string form.
-     */
-    public static Bound<GenericRecord> withSchema(String schema) {
-      return withSchema((new Schema.Parser()).parse(schema));
-    }
-
-    /**
-     * Returns a {@link PTransform} that reads Avro file(s)
-     * that has GCS path validation on pipeline creation disabled.
-     *
-     * <p>This can be useful in the case where the GCS input location does
-     * not exist at the pipeline creation time, but is expected to be available
-     * at execution time.
-     */
-    public static Bound<GenericRecord> withoutValidation() {
-      return new Bound<>(GenericRecord.class).withoutValidation();
-    }
-
-    /**
-     * A {@link PTransform} that reads from an Avro file (or multiple Avro
-     * files matching a pattern) and returns a bounded {@link PCollection} containing
-     * the decoding of each record.
-     *
-     * @param <T> the type of each of the elements of the resulting
-     * PCollection
-     */
-    public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
-      /** The filepattern to read from. */
-      @Nullable
-      final String filepattern;
-      /** The class type of the records. */
-      final Class<T> type;
-      /** The schema of the input file. */
-      @Nullable
-      final Schema schema;
-      /** An option to indicate if input validation is desired. Default is true. */
-      final boolean validate;
-
-      Bound(Class<T> type) {
-        this(null, null, type, null, true);
-      }
-
-      Bound(String name, String filepattern, Class<T> type, Schema schema, boolean validate) {
-        super(name);
-        this.filepattern = filepattern;
-        this.type = type;
-        this.schema = schema;
-        this.validate = validate;
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * with the given step name.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> named(String name) {
-        return new Bound<>(name, filepattern, type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that reads from the file(s) with the given name or pattern.
-       * (See {@link AvroIO.Read#from} for a description of
-       * filepatterns.)
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> from(String filepattern) {
-        return new Bound<>(name, filepattern, type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that reads Avro file(s) containing records whose type is the
-       * specified Avro-generated class.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param <X> the type of the decoded elements and the elements of
-       * the resulting PCollection
-       */
-      public <X> Bound<X> withSchema(Class<X> type) {
-        return new Bound<>(name, filepattern, type, ReflectData.get().getSchema(type), validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that reads Avro file(s) containing records of the specified schema.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<GenericRecord> withSchema(Schema schema) {
-        return new Bound<>(name, filepattern, GenericRecord.class, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that reads Avro file(s) containing records of the specified schema
-       * in a JSON-encoded string form.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<GenericRecord> withSchema(String schema) {
-        return withSchema((new Schema.Parser()).parse(schema));
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that has GCS input path validation on pipeline creation disabled.
-       *
-       * <p>Does not modify this object.
-       *
-       * <p>This can be useful in the case where the GCS input location does
-       * not exist at the pipeline creation time, but is expected to be
-       * available at execution time.
-       */
-      public Bound<T> withoutValidation() {
-        return new Bound<>(name, filepattern, type, schema, false);
-      }
-
-      @Override
-      public PCollection<T> apply(PInput input) {
-        if (filepattern == null) {
-          throw new IllegalStateException(
-              "need to set the filepattern of an AvroIO.Read transform");
-        }
-        if (schema == null) {
-          throw new IllegalStateException("need to set the schema of an AvroIO.Read transform");
-        }
-        if (validate) {
-          try {
-            checkState(
-                !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(),
-                "Unable to find any files matching %s",
-                filepattern);
-          } catch (IOException e) {
-            throw new IllegalStateException(
-                String.format("Failed to validate %s", filepattern), e);
-          }
-        }
-
-        @SuppressWarnings("unchecked")
-        Bounded<T> read =
-            type == GenericRecord.class
-                ? (Bounded<T>) com.google.cloud.dataflow.sdk.io.Read.from(
-                    AvroSource.from(filepattern).withSchema(schema))
-                : com.google.cloud.dataflow.sdk.io.Read.from(
-                    AvroSource.from(filepattern).withSchema(type));
-
-        PCollection<T> pcol = input.getPipeline().apply("Read", read);
-        // Honor the default output coder that would have been used by this PTransform.
-        pcol.setCoder(getDefaultOutputCoder());
-        return pcol;
-      }
-
-      @Override
-      protected Coder<T> getDefaultOutputCoder() {
-        return AvroCoder.of(type, schema);
-      }
-
-      public String getFilepattern() {
-        return filepattern;
-      }
-
-      public Schema getSchema() {
-        return schema;
-      }
-
-      public boolean needsValidation() {
-        return validate;
-      }
-    }
-
-    /** Disallow construction of utility class. */
-    private Read() {}
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A root {@link PTransform} that writes a {@link PCollection} to an Avro file (or
-   * multiple Avro files matching a sharding pattern).
-   */
-  public static class Write {
-    /**
-     * Returns a {@link PTransform} with the given step name.
-     */
-    public static Bound<GenericRecord> named(String name) {
-      return new Bound<>(GenericRecord.class).named(name);
-    }
-
-    /**
-     * Returns a {@link PTransform} that writes to the file(s)
-     * with the given prefix. This can be a local filename
-     * (if running locally), or a Google Cloud Storage filename of
-     * the form {@code "gs://<bucket>/<filepath>"}
-     * (if running locally or via the Google Cloud Dataflow service).
-     *
-     * <p>The files written will begin with this prefix, followed by
-     * a shard identifier (see {@link Bound#withNumShards}, and end
-     * in a common extension, if given by {@link Bound#withSuffix}.
-     */
-    public static Bound<GenericRecord> to(String prefix) {
-      return new Bound<>(GenericRecord.class).to(prefix);
-    }
-
-    /**
-     * Returns a {@link PTransform} that writes to the file(s) with the
-     * given filename suffix.
-     */
-    public static Bound<GenericRecord> withSuffix(String filenameSuffix) {
-      return new Bound<>(GenericRecord.class).withSuffix(filenameSuffix);
-    }
-
-    /**
-     * Returns a {@link PTransform} that uses the provided shard count.
-     *
-     * <p>Constraining the number of shards is likely to reduce
-     * the performance of a pipeline. Setting this value is not recommended
-     * unless you require a specific number of output files.
-     *
-     * @param numShards the number of shards to use, or 0 to let the system
-     *                  decide.
-     */
-    public static Bound<GenericRecord> withNumShards(int numShards) {
-      return new Bound<>(GenericRecord.class).withNumShards(numShards);
-    }
-
-    /**
-     * Returns a {@link PTransform} that uses the given shard name
-     * template.
-     *
-     * <p>See {@link ShardNameTemplate} for a description of shard templates.
-     */
-    public static Bound<GenericRecord> withShardNameTemplate(String shardTemplate) {
-      return new Bound<>(GenericRecord.class).withShardNameTemplate(shardTemplate);
-    }
-
-    /**
-     * Returns a {@link PTransform} that forces a single file as
-     * output.
-     *
-     * <p>Constraining the number of shards is likely to reduce
-     * the performance of a pipeline. Setting this value is not recommended
-     * unless you require a specific number of output files.
-     */
-    public static Bound<GenericRecord> withoutSharding() {
-      return new Bound<>(GenericRecord.class).withoutSharding();
-    }
-
-    /**
-     * Returns a {@link PTransform} that writes Avro file(s)
-     * containing records whose type is the specified Avro-generated class.
-     *
-     * @param <T> the type of the elements of the input PCollection
-     */
-    public static <T> Bound<T> withSchema(Class<T> type) {
-      return new Bound<>(type).withSchema(type);
-    }
-
-    /**
-     * Returns a {@link PTransform} that writes Avro file(s)
-     * containing records of the specified schema.
-     */
-    public static Bound<GenericRecord> withSchema(Schema schema) {
-      return new Bound<>(GenericRecord.class).withSchema(schema);
-    }
-
-    /**
-     * Returns a {@link PTransform} that writes Avro file(s)
-     * containing records of the specified schema in a JSON-encoded
-     * string form.
-     */
-    public static Bound<GenericRecord> withSchema(String schema) {
-      return withSchema((new Schema.Parser()).parse(schema));
-    }
-
-    /**
-     * Returns a {@link PTransform} that writes Avro file(s) that has GCS path validation on
-     * pipeline creation disabled.
-     *
-     * <p>This can be useful in the case where the GCS output location does
-     * not exist at the pipeline creation time, but is expected to be available
-     * at execution time.
-     */
-    public static Bound<GenericRecord> withoutValidation() {
-      return new Bound<>(GenericRecord.class).withoutValidation();
-    }
-
-    /**
-     * A {@link PTransform} that writes a bounded {@link PCollection} to an Avro file (or
-     * multiple Avro files matching a sharding pattern).
-     *
-     * @param <T> the type of each of the elements of the input PCollection
-     */
-    public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-      /** The filename to write to. */
-      @Nullable
-      final String filenamePrefix;
-      /** Suffix to use for each filename. */
-      final String filenameSuffix;
-      /** Requested number of shards. 0 for automatic. */
-      final int numShards;
-      /** Shard template string. */
-      final String shardTemplate;
-      /** The class type of the records. */
-      final Class<T> type;
-      /** The schema of the output file. */
-      @Nullable
-      final Schema schema;
-      /** An option to indicate if output validation is desired. Default is true. */
-      final boolean validate;
-
-      Bound(Class<T> type) {
-        this(null, null, "", 0, ShardNameTemplate.INDEX_OF_MAX, type, null, true);
-      }
-
-      Bound(
-          String name,
-          String filenamePrefix,
-          String filenameSuffix,
-          int numShards,
-          String shardTemplate,
-          Class<T> type,
-          Schema schema,
-          boolean validate) {
-        super(name);
-        this.filenamePrefix = filenamePrefix;
-        this.filenameSuffix = filenameSuffix;
-        this.numShards = numShards;
-        this.shardTemplate = shardTemplate;
-        this.type = type;
-        this.schema = schema;
-        this.validate = validate;
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * with the given step name.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> named(String name) {
-        return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that writes to the file(s) with the given filename prefix.
-       *
-       * <p>See {@link AvroIO.Write#to(String)} for more information
-       * about filenames.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> to(String filenamePrefix) {
-        validateOutputComponent(filenamePrefix);
-        return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that writes to the file(s) with the given filename suffix.
-       *
-       * <p>See {@link ShardNameTemplate} for a description of shard templates.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> withSuffix(String filenameSuffix) {
-        validateOutputComponent(filenameSuffix);
-        return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that uses the provided shard count.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. Setting this value is not recommended
-       * unless you require a specific number of output files.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param numShards the number of shards to use, or 0 to let the system
-       *                  decide.
-       * @see ShardNameTemplate
-       */
-      public Bound<T> withNumShards(int numShards) {
-        Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that uses the given shard name template.
-       *
-       * <p>Does not modify this object.
-       *
-       * @see ShardNameTemplate
-       */
-      public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that forces a single file as output.
-       *
-       * <p>This is a shortcut for
-       * {@code .withNumShards(1).withShardNameTemplate("")}
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, 1, "", type, schema, validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that writes to Avro file(s) containing records whose type is the
-       * specified Avro-generated class.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param <X> the type of the elements of the input PCollection
-       */
-      public <X> Bound<X> withSchema(Class<X> type) {
-        return new Bound<>(
-            name,
-            filenamePrefix,
-            filenameSuffix,
-            numShards,
-            shardTemplate,
-            type,
-            ReflectData.get().getSchema(type),
-            validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that writes to Avro file(s) containing records of the specified
-       * schema.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<GenericRecord> withSchema(Schema schema) {
-        return new Bound<>(
-            name,
-            filenamePrefix,
-            filenameSuffix,
-            numShards,
-            shardTemplate,
-            GenericRecord.class,
-            schema,
-            validate);
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that writes to Avro file(s) containing records of the specified
-       * schema in a JSON-encoded string form.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<GenericRecord> withSchema(String schema) {
-        return withSchema((new Schema.Parser()).parse(schema));
-      }
-
-      /**
-       * Returns a new {@link PTransform} that's like this one but
-       * that has GCS output path validation on pipeline creation disabled.
-       *
-       * <p>Does not modify this object.
-       *
-       * <p>This can be useful in the case where the GCS output location does
-       * not exist at the pipeline creation time, but is expected to be
-       * available at execution time.
-       */
-      public Bound<T> withoutValidation() {
-        return new Bound<>(
-            name, filenamePrefix, filenameSuffix, numShards, shardTemplate, type, schema, false);
-      }
-
-      @Override
-      public PDone apply(PCollection<T> input) {
-        if (filenamePrefix == null) {
-          throw new IllegalStateException(
-              "need to set the filename prefix of an AvroIO.Write transform");
-        }
-        if (schema == null) {
-          throw new IllegalStateException("need to set the schema of an AvroIO.Write transform");
-        }
-
-        // Note that custom sinks currently do not expose sharding controls.
-        // Thus pipeline runner writers need to individually add support internally to
-        // apply user requested sharding limits.
-        return input.apply(
-            "Write",
-            com.google.cloud.dataflow.sdk.io.Write.to(
-                new AvroSink<>(
-                    filenamePrefix, filenameSuffix, shardTemplate, AvroCoder.of(type, schema))));
-      }
-
-      /**
-       * Returns the current shard name template string.
-       */
-      public String getShardNameTemplate() {
-        return shardTemplate;
-      }
-
-      @Override
-      protected Coder<Void> getDefaultOutputCoder() {
-        return VoidCoder.of();
-      }
-
-      public String getFilenamePrefix() {
-        return filenamePrefix;
-      }
-
-      public String getShardTemplate() {
-        return shardTemplate;
-      }
-
-      public int getNumShards() {
-        return numShards;
-      }
-
-      public String getFilenameSuffix() {
-        return filenameSuffix;
-      }
-
-      public Class<T> getType() {
-        return type;
-      }
-
-      public Schema getSchema() {
-        return schema;
-      }
-
-      public boolean needsValidation() {
-        return validate;
-      }
-    }
-
-    /** Disallow construction of utility class. */
-    private Write() {}
-  }
-
-  // Pattern which matches old-style shard output patterns, which are now
-  // disallowed.
-  private static final Pattern SHARD_OUTPUT_PATTERN = Pattern.compile("@([0-9]+|\\*)");
-
-  private static void validateOutputComponent(String partialFilePattern) {
-    Preconditions.checkArgument(
-        !SHARD_OUTPUT_PATTERN.matcher(partialFilePattern).find(),
-        "Output name components are not allowed to contain @* or @N patterns: "
-        + partialFilePattern);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /** Disallow construction of utility class. */
-  private AvroIO() {}
-
-  /**
-   * A {@link FileBasedSink} for Avro files.
-   */
-  @VisibleForTesting
-  static class AvroSink<T> extends FileBasedSink<T> {
-    private final AvroCoder<T> coder;
-
-    @VisibleForTesting
-    AvroSink(
-        String baseOutputFilename, String extension, String fileNameTemplate, AvroCoder<T> coder) {
-      super(baseOutputFilename, extension, fileNameTemplate);
-      this.coder = coder;
-    }
-
-    @Override
-    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
-      return new AvroWriteOperation<>(this, coder);
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
-     * FileBasedWriteOperation} for Avro files.
-     */
-    private static class AvroWriteOperation<T> extends FileBasedWriteOperation<T> {
-      private final AvroCoder<T> coder;
-
-      private AvroWriteOperation(AvroSink<T> sink, AvroCoder<T> coder) {
-        super(sink);
-        this.coder = coder;
-      }
-
-      @Override
-      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
-        return new AvroWriter<>(this, coder);
-      }
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
-     * for Avro files.
-     */
-    private static class AvroWriter<T> extends FileBasedWriter<T> {
-      private final AvroCoder<T> coder;
-      private DataFileWriter<T> dataFileWriter;
-
-      public AvroWriter(FileBasedWriteOperation<T> writeOperation, AvroCoder<T> coder) {
-        super(writeOperation);
-        this.mimeType = MimeTypes.BINARY;
-        this.coder = coder;
-      }
-
-      @SuppressWarnings("deprecation") // uses internal test functionality.
-      @Override
-      protected void prepareWrite(WritableByteChannel channel) throws Exception {
-        dataFileWriter = new DataFileWriter<>(coder.createDatumWriter());
-        dataFileWriter.create(coder.getSchema(), Channels.newOutputStream(channel));
-      }
-
-      @Override
-      public void write(T value) throws Exception {
-        dataFileWriter.append(value);
-      }
-
-      @Override
-      protected void writeFooter() throws Exception {
-        dataFileWriter.flush();
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
deleted file mode 100644
index 297663e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/AvroSource.java
+++ /dev/null
@@ -1,647 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.util.AvroUtils;
-import com.google.cloud.dataflow.sdk.util.AvroUtils.AvroMetadata;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Preconditions;
-
-import org.apache.avro.Schema;
-import org.apache.avro.file.CodecFactory;
-import org.apache.avro.file.DataFileConstants;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.io.DecoderFactory;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.avro.reflect.ReflectDatumReader;
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
-import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
-import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import java.util.Collection;
-import java.util.zip.Inflater;
-import java.util.zip.InflaterInputStream;
-
-// CHECKSTYLE.OFF: JavadocStyle
-/**
- * A {@link FileBasedSource} for reading Avro files.
- *
- * <p>To read a {@link PCollection} of objects from one or more Avro files, use
- * {@link AvroSource#from} to specify the path(s) of the files to read. The {@link AvroSource} that
- * is returned will read objects of type {@link GenericRecord} with the schema(s) that were written
- * at file creation. To further configure the {@link AvroSource} to read with a user-defined schema,
- * or to return records of a type other than {@link GenericRecord}, use
- * {@link AvroSource#withSchema(Schema)} (using an Avro {@link Schema}),
- * {@link AvroSource#withSchema(String)} (using a JSON schema), or
- * {@link AvroSource#withSchema(Class)} (to return objects of the Avro-generated class specified).
- *
- * <p>An {@link AvroSource} can be read from using the {@link Read} transform. For example:
- *
- * <pre>
- * {@code
- * AvroSource<MyType> source = AvroSource.from(file.toPath()).withSchema(MyType.class);
- * PCollection<MyType> records = Read.from(mySource);
- * }
- * </pre>
- *
- * <p>The {@link AvroSource#readFromFileWithClass(String, Class)} method is a convenience method
- * that returns a read transform. For example:
- *
- * <pre>
- * {@code
- * PCollection<MyType> records = AvroSource.readFromFileWithClass(file.toPath(), MyType.class));
- * }
- * </pre>
- *
- * <p>This class's implementation is based on the <a
- * href="https://avro.apache.org/docs/1.7.7/spec.html">Avro 1.7.7</a> specification and implements
- * parsing of some parts of Avro Object Container Files. The rationale for doing so is that the Avro
- * API does not provide efficient ways of computing the precise offsets of blocks within a file,
- * which is necessary to support dynamic work rebalancing. However, whenever it is possible to use
- * the Avro API in a way that supports maintaining precise offsets, this class uses the Avro API.
- *
- * <p>Avro Object Container files store records in blocks. Each block contains a collection of
- * records. Blocks may be encoded (e.g., with bzip2, deflate, snappy, etc.). Blocks are delineated
- * from one another by a 16-byte sync marker.
- *
- * <p>An {@link AvroSource} for a subrange of a single file contains records in the blocks such that
- * the start offset of the block is greater than or equal to the start offset of the source and less
- * than the end offset of the source.
- *
- * <p>To use XZ-encoded Avro files, please include an explicit dependency on {@code xz-1.5.jar},
- * which has been marked as optional in the Maven {@code sdk/pom.xml} for Google Cloud Dataflow:
- *
- * <pre>{@code
- * <dependency>
- *   <groupId>org.tukaani</groupId>
- *   <artifactId>xz</artifactId>
- *   <version>1.5</version>
- * </dependency>
- * }</pre>
- *
- * <h3>Permissions</h3>
- * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
- * Dataflow job. Please refer to the documentation of corresponding {@link PipelineRunner}s for
- * more details.
- *
- * @param <T> The type of records to be read from the source.
- */
-// CHECKSTYLE.ON: JavadocStyle
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public class AvroSource<T> extends BlockBasedSource<T> {
-  // Default minimum bundle size (chosen as two default-size Avro blocks to attempt to
-  // ensure that every source has at least one block of records).
-  // The default sync interval is 64k.
-  static final long DEFAULT_MIN_BUNDLE_SIZE = 2 * DataFileConstants.DEFAULT_SYNC_INTERVAL;
-
-  // The JSON schema used to encode records.
-  private final String readSchemaString;
-
-  // The JSON schema that was used to write the source Avro file (may differ from the schema we will
-  // use to read from it).
-  private final String fileSchemaString;
-
-  // The type of the records contained in the file.
-  private final Class<T> type;
-
-  // The following metadata fields are not user-configurable. They are extracted from the object
-  // container file header upon subsource creation.
-
-  // The codec used to encode the blocks in the Avro file. String value drawn from those in
-  // https://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/file/CodecFactory.html
-  private final String codec;
-
-  // The object container file's 16-byte sync marker.
-  private final byte[] syncMarker;
-
-  // Default output coder, lazily initialized.
-  private transient AvroCoder<T> coder = null;
-
-  // Schema of the file, lazily initialized.
-  private transient Schema fileSchema;
-
-  // Schema used to encode records, lazily initialized.
-  private transient Schema readSchema;
-
-  /**
-   * Creates a {@link Read} transform that will read from an {@link AvroSource} that is configured
-   * to read records of the given type from a file pattern.
-   */
-  public static <T> Read.Bounded<T> readFromFileWithClass(String filePattern, Class<T> clazz) {
-    return Read.from(new AvroSource<T>(filePattern, DEFAULT_MIN_BUNDLE_SIZE,
-        ReflectData.get().getSchema(clazz).toString(), clazz, null, null));
-  }
-
-  /**
-   * Creates an {@link AvroSource} that reads from the given file name or pattern ("glob"). The
-   * returned source can be further configured by calling {@link #withSchema} to return a type other
-   * than {@link GenericRecord}.
-   */
-  public static AvroSource<GenericRecord> from(String fileNameOrPattern) {
-    return new AvroSource<>(
-        fileNameOrPattern, DEFAULT_MIN_BUNDLE_SIZE, null, GenericRecord.class, null, null);
-  }
-
-  /**
-   * Returns an {@link AvroSource} that's like this one but reads files containing records that
-   * conform to the given schema.
-   *
-   * <p>Does not modify this object.
-   */
-  public AvroSource<GenericRecord> withSchema(String schema) {
-    return new AvroSource<>(
-        getFileOrPatternSpec(), getMinBundleSize(), schema, GenericRecord.class, codec, syncMarker);
-  }
-
-  /**
-   * Returns an {@link AvroSource} that's like this one but reads files containing records that
-   * conform to the given schema.
-   *
-   * <p>Does not modify this object.
-   */
-  public AvroSource<GenericRecord> withSchema(Schema schema) {
-    return new AvroSource<>(getFileOrPatternSpec(), getMinBundleSize(), schema.toString(),
-        GenericRecord.class, codec, syncMarker);
-  }
-
-  /**
-   * Returns an {@link AvroSource} that's like this one but reads files containing records of the
-   * type of the given class.
-   *
-   * <p>Does not modify this object.
-   */
-  public <X> AvroSource<X> withSchema(Class<X> clazz) {
-    return new AvroSource<X>(getFileOrPatternSpec(), getMinBundleSize(),
-        ReflectData.get().getSchema(clazz).toString(), clazz, codec, syncMarker);
-  }
-
-  /**
-   * Returns an {@link AvroSource} that's like this one but uses the supplied minimum bundle size.
-   * Refer to {@link OffsetBasedSource} for a description of {@code minBundleSize} and its use.
-   *
-   * <p>Does not modify this object.
-   */
-  public AvroSource<T> withMinBundleSize(long minBundleSize) {
-    return new AvroSource<T>(
-        getFileOrPatternSpec(), minBundleSize, readSchemaString, type, codec, syncMarker);
-  }
-
-  private AvroSource(String fileNameOrPattern, long minBundleSize, String schema, Class<T> type,
-      String codec, byte[] syncMarker) {
-    super(fileNameOrPattern, minBundleSize);
-    this.readSchemaString = schema;
-    this.codec = codec;
-    this.syncMarker = syncMarker;
-    this.type = type;
-    this.fileSchemaString = null;
-  }
-
-  private AvroSource(String fileName, long minBundleSize, long startOffset, long endOffset,
-      String schema, Class<T> type, String codec, byte[] syncMarker, String fileSchema) {
-    super(fileName, minBundleSize, startOffset, endOffset);
-    this.readSchemaString = schema;
-    this.codec = codec;
-    this.syncMarker = syncMarker;
-    this.type = type;
-    this.fileSchemaString = fileSchema;
-  }
-
-  @Override
-  public void validate() {
-    // AvroSource objects do not need to be configured with more than a file pattern. Overridden to
-    // make this explicit.
-    super.validate();
-  }
-
-  @Override
-  public BlockBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
-    byte[] syncMarker = this.syncMarker;
-    String codec = this.codec;
-    String readSchemaString = this.readSchemaString;
-    String fileSchemaString = this.fileSchemaString;
-    // codec and syncMarker are initially null when the source is created, as they differ
-    // across input files and must be read from the file. Here, when we are creating a source
-    // for a subrange of a file, we can initialize these values. When the resulting AvroSource
-    // is further split, they do not need to be read again.
-    if (codec == null || syncMarker == null || fileSchemaString == null) {
-      AvroMetadata metadata;
-      try {
-        Collection<String> files = FileBasedSource.expandFilePattern(fileName);
-        Preconditions.checkArgument(files.size() <= 1, "More than 1 file matched %s");
-        metadata = AvroUtils.readMetadataFromFile(fileName);
-      } catch (IOException e) {
-        throw new RuntimeException("Error reading metadata from file " + fileName, e);
-      }
-      codec = metadata.getCodec();
-      syncMarker = metadata.getSyncMarker();
-      fileSchemaString = metadata.getSchemaString();
-      // If the source was created with a null schema, use the schema that we read from the file's
-      // metadata.
-      if (readSchemaString == null) {
-        readSchemaString = metadata.getSchemaString();
-      }
-    }
-    return new AvroSource<T>(fileName, getMinBundleSize(), start, end, readSchemaString, type,
-        codec, syncMarker, fileSchemaString);
-  }
-
-  @Override
-  protected BlockBasedReader<T> createSingleFileReader(PipelineOptions options) {
-    return new AvroReader<T>(this);
-  }
-
-  @Override
-  public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-    return false;
-  }
-
-  @Override
-  public AvroCoder<T> getDefaultOutputCoder() {
-    if (coder == null) {
-      Schema.Parser parser = new Schema.Parser();
-      coder = AvroCoder.of(type, parser.parse(readSchemaString));
-    }
-    return coder;
-  }
-
-  public String getSchema() {
-    return readSchemaString;
-  }
-
-  private Schema getReadSchema() {
-    if (readSchemaString == null) {
-      return null;
-    }
-
-    // If the schema has not been parsed, parse it.
-    if (readSchema == null) {
-      Schema.Parser parser = new Schema.Parser();
-      readSchema = parser.parse(readSchemaString);
-    }
-    return readSchema;
-  }
-
-  private Schema getFileSchema() {
-    if (fileSchemaString == null) {
-      return null;
-    }
-
-    // If the schema has not been parsed, parse it.
-    if (fileSchema == null) {
-      Schema.Parser parser = new Schema.Parser();
-      fileSchema = parser.parse(fileSchemaString);
-    }
-    return fileSchema;
-  }
-
-  private byte[] getSyncMarker() {
-    return syncMarker;
-  }
-
-  private String getCodec() {
-    return codec;
-  }
-
-  private DatumReader<T> createDatumReader() {
-    Schema readSchema = getReadSchema();
-    Schema fileSchema = getFileSchema();
-    Preconditions.checkNotNull(
-        readSchema, "No read schema has been initialized for source %s", this);
-    Preconditions.checkNotNull(
-        fileSchema, "No file schema has been initialized for source %s", this);
-    if (type == GenericRecord.class) {
-      return new GenericDatumReader<>(fileSchema, readSchema);
-    } else {
-      return new ReflectDatumReader<>(fileSchema, readSchema);
-    }
-  }
-
-  /**
-   * A {@link BlockBasedSource.Block} of Avro records.
-   *
-   * @param <T> The type of records stored in the block.
-   */
-  @Experimental(Experimental.Kind.SOURCE_SINK)
-  static class AvroBlock<T> extends Block<T> {
-    // The number of records in the block.
-    private final long numRecords;
-
-    // The current record in the block.
-    private T currentRecord;
-
-    // The index of the current record in the block.
-    private long currentRecordIndex = 0;
-
-    // A DatumReader to read records from the block.
-    private final DatumReader<T> reader;
-
-    // A BinaryDecoder used by the reader to decode records.
-    private final BinaryDecoder decoder;
-
-    /**
-     * Decodes a byte array as an InputStream. The byte array may be compressed using some
-     * codec. Reads from the returned stream will result in decompressed bytes.
-     *
-     * <p>This supports the same codecs as Avro's {@link CodecFactory}, namely those defined in
-     * {@link DataFileConstants}.
-     *
-     * <ul>
-     * <li>"snappy" : Google's Snappy compression
-     * <li>"deflate" : deflate compression
-     * <li>"bzip2" : Bzip2 compression
-     * <li>"xz" : xz compression
-     * <li>"null" (the string, not the value): Uncompressed data
-     * </ul>
-     */
-    private static InputStream decodeAsInputStream(byte[] data, String codec) throws IOException {
-      ByteArrayInputStream byteStream = new ByteArrayInputStream(data);
-      switch (codec) {
-        case DataFileConstants.SNAPPY_CODEC:
-          return new SnappyCompressorInputStream(byteStream);
-        case DataFileConstants.DEFLATE_CODEC:
-          // nowrap == true: Do not expect ZLIB header or checksum, as Avro does not write them.
-          Inflater inflater = new Inflater(true);
-          return new InflaterInputStream(byteStream, inflater);
-        case DataFileConstants.XZ_CODEC:
-          return new XZCompressorInputStream(byteStream);
-        case DataFileConstants.BZIP2_CODEC:
-          return new BZip2CompressorInputStream(byteStream);
-        case DataFileConstants.NULL_CODEC:
-          return byteStream;
-        default:
-          throw new IllegalArgumentException("Unsupported codec: " + codec);
-      }
-    }
-
-    AvroBlock(byte[] data, long numRecords, AvroSource<T> source) throws IOException {
-      this.numRecords = numRecords;
-      this.reader = source.createDatumReader();
-      this.decoder =
-          DecoderFactory.get().binaryDecoder(decodeAsInputStream(data, source.getCodec()), null);
-    }
-
-    @Override
-    public T getCurrentRecord() {
-      return currentRecord;
-    }
-
-    @Override
-    public boolean readNextRecord() throws IOException {
-      if (currentRecordIndex >= numRecords) {
-        return false;
-      }
-      currentRecord = reader.read(null, decoder);
-      currentRecordIndex++;
-      return true;
-    }
-
-    @Override
-    public double getFractionOfBlockConsumed() {
-      return ((double) currentRecordIndex) / numRecords;
-    }
-  }
-
-  /**
-   * A {@link BlockBasedSource.BlockBasedReader} for reading blocks from Avro files.
-   *
-   * <p>An Avro Object Container File consists of a header followed by a 16-bit sync marker
-   * and then a sequence of blocks, where each block begins with two encoded longs representing
-   * the total number of records in the block and the block's size in bytes, followed by the
-   * block's (optionally-encoded) records. Each block is terminated by a 16-bit sync marker.
-   *
-   * <p>Here, we consider the sync marker that precedes a block to be its offset, as this allows
-   * a reader that begins reading at that offset to detect the sync marker and the beginning of
-   * the block.
-   *
-   * @param <T> The type of records contained in the block.
-   */
-  @Experimental(Experimental.Kind.SOURCE_SINK)
-  public static class AvroReader<T> extends BlockBasedReader<T> {
-    // The current block.
-    private AvroBlock<T> currentBlock;
-
-    // Offset of the block.
-    private long currentBlockOffset = 0;
-
-    // Size of the current block.
-    private long currentBlockSizeBytes = 0;
-
-    // Current offset within the stream.
-    private long currentOffset = 0;
-
-    // Stream used to read from the underlying file.
-    // A pushback stream is used to restore bytes buffered during seeking/decoding.
-    private PushbackInputStream stream;
-
-    // Small buffer for reading encoded values from the stream.
-    // The maximum size of an encoded long is 10 bytes, and this buffer will be used to read two.
-    private final byte[] readBuffer = new byte[20];
-
-    // Decoder to decode binary-encoded values from the buffer.
-    private BinaryDecoder decoder;
-
-    /**
-     * Reads Avro records of type {@code T} from the specified source.
-     */
-    public AvroReader(AvroSource<T> source) {
-      super(source);
-    }
-
-    @Override
-    public synchronized AvroSource<T> getCurrentSource() {
-      return (AvroSource<T>) super.getCurrentSource();
-    }
-
-    @Override
-    public boolean readNextBlock() throws IOException {
-      // The next block in the file is after the first sync marker that can be read starting from
-      // the current offset. First, we seek past the next sync marker, if it exists. After a sync
-      // marker is the start of a block. A block begins with the number of records contained in
-      // the block, encoded as a long, followed by the size of the block in bytes, encoded as a
-      // long. The currentOffset after this method should be last byte after this block, and the
-      // currentBlockOffset should be the start of the sync marker before this block.
-
-      // Seek to the next sync marker, if one exists.
-      currentOffset += advancePastNextSyncMarker(stream, getCurrentSource().getSyncMarker());
-
-      // The offset of the current block includes its preceding sync marker.
-      currentBlockOffset = currentOffset - getCurrentSource().getSyncMarker().length;
-
-      // Read a small buffer to parse the block header.
-      // We cannot use a BinaryDecoder to do this directly from the stream because a BinaryDecoder
-      // internally buffers data and we only want to read as many bytes from the stream as the size
-      // of the header. Though BinaryDecoder#InputStream returns an input stream that is aware of
-      // its internal buffering, we would have to re-wrap this input stream to seek for the next
-      // block in the file.
-      int read = stream.read(readBuffer);
-      // We reached the last sync marker in the file.
-      if (read <= 0) {
-        return false;
-      }
-      decoder = DecoderFactory.get().binaryDecoder(readBuffer, decoder);
-      long numRecords = decoder.readLong();
-      long blockSize = decoder.readLong();
-
-      // The decoder buffers data internally, but since we know the size of the stream the
-      // decoder has constructed from the readBuffer, the number of bytes available in the
-      // input stream is equal to the number of unconsumed bytes.
-      int headerSize = readBuffer.length - decoder.inputStream().available();
-      stream.unread(readBuffer, headerSize, read - headerSize);
-
-      // Create the current block by reading blockSize bytes. Block sizes permitted by the Avro
-      // specification are [32, 2^30], so this narrowing is ok.
-      byte[] data = new byte[(int) blockSize];
-      stream.read(data);
-      currentBlock = new AvroBlock<>(data, numRecords, getCurrentSource());
-      currentBlockSizeBytes = blockSize;
-
-      // Update current offset with the number of bytes we read to get the next block.
-      currentOffset += headerSize + blockSize;
-      return true;
-    }
-
-    @Override
-    public AvroBlock<T> getCurrentBlock() {
-      return currentBlock;
-    }
-
-    @Override
-    public long getCurrentBlockOffset() {
-      return currentBlockOffset;
-    }
-
-    @Override
-    public long getCurrentBlockSize() {
-      return currentBlockSizeBytes;
-    }
-
-    /**
-     * Creates a {@link PushbackInputStream} that has a large enough pushback buffer to be able
-     * to push back the syncBuffer and the readBuffer.
-     */
-    private PushbackInputStream createStream(ReadableByteChannel channel) {
-      return new PushbackInputStream(
-          Channels.newInputStream(channel),
-          getCurrentSource().getSyncMarker().length + readBuffer.length);
-    }
-
-    /**
-     * Starts reading from the provided channel. Assumes that the channel is already seeked to
-     * the source's start offset.
-     */
-    @Override
-    protected void startReading(ReadableByteChannel channel) throws IOException {
-      stream = createStream(channel);
-      currentOffset = getCurrentSource().getStartOffset();
-    }
-
-    /**
-     * Advances to the first byte after the next occurrence of the sync marker in the
-     * stream when reading from the current offset. Returns the number of bytes consumed
-     * from the stream. Note that this method requires a PushbackInputStream with a buffer
-     * at least as big as the marker it is seeking for.
-     */
-    static long advancePastNextSyncMarker(PushbackInputStream stream, byte[] syncMarker)
-        throws IOException {
-      Seeker seeker = new Seeker(syncMarker);
-      byte[] syncBuffer = new byte[syncMarker.length];
-      long totalBytesConsumed = 0;
-      // Seek until either a sync marker is found or we reach the end of the file.
-      int mark = -1; // Position of the last byte in the sync marker.
-      int read; // Number of bytes read.
-      do {
-        read = stream.read(syncBuffer);
-        if (read >= 0) {
-          mark = seeker.find(syncBuffer, read);
-          // Update the currentOffset with the number of bytes read.
-          totalBytesConsumed += read;
-        }
-      } while (mark < 0 && read > 0);
-
-      // If the sync marker was found, unread block data and update the current offsets.
-      if (mark >= 0) {
-        // The current offset after this call should be just past the sync marker, so we should
-        // unread the remaining buffer contents and update the currentOffset accordingly.
-        stream.unread(syncBuffer, mark + 1, read - (mark + 1));
-        totalBytesConsumed = totalBytesConsumed - (read - (mark + 1));
-      }
-      return totalBytesConsumed;
-    }
-
-    /**
-     * A {@link Seeker} looks for a given marker within a byte buffer. Uses naive string matching
-     * with a sliding window, as sync markers are small and random.
-     */
-    static class Seeker {
-      // The marker to search for.
-      private byte[] marker;
-
-      // Buffer used for the sliding window.
-      private byte[] searchBuffer;
-
-      // Number of bytes available to be matched in the buffer.
-      private int available = 0;
-
-      /**
-       * Create a {@link Seeker} that looks for the given marker.
-       */
-      public Seeker(byte[] marker) {
-        this.marker = marker;
-        this.searchBuffer = new byte[marker.length];
-      }
-
-      /**
-       * Find the marker in the byte buffer. Returns the index of the end of the marker in the
-       * buffer. If the marker is not found, returns -1.
-       *
-       * <p>State is maintained between calls. If the marker was partially matched, a subsequent
-       * call to find will resume matching the marker.
-       *
-       * @param buffer
-       * @return the index of the end of the marker within the buffer, or -1 if the buffer was not
-       * found.
-       */
-      public int find(byte[] buffer, int length) {
-        for (int i = 0; i < length; i++) {
-          System.arraycopy(searchBuffer, 1, searchBuffer, 0, searchBuffer.length - 1);
-          searchBuffer[searchBuffer.length - 1] = buffer[i];
-          available = Math.min(available + 1, searchBuffer.length);
-          if (ByteBuffer.wrap(searchBuffer, searchBuffer.length - available, available)
-                  .equals(ByteBuffer.wrap(marker))) {
-            available = 0;
-            return i;
-          }
-        }
-        return -1;
-      }
-    }
-  }
-}

[19/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
deleted file mode 100644
index 5b30475..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Sum.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
-import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
-
-/**
- * {@code PTransform}s for computing the sum of the elements in a
- * {@code PCollection}, or the sum of the values associated with
- * each key in a {@code PCollection} of {@code KV}s.
- *
- * <p>Example 1: get the sum of a {@code PCollection} of {@code Double}s.
- * <pre> {@code
- * PCollection<Double> input = ...;
- * PCollection<Double> sum = input.apply(Sum.doublesGlobally());
- * } </pre>
- *
- * <p>Example 2: calculate the sum of the {@code Integer}s
- * associated with each unique key (which is of type {@code String}).
- * <pre> {@code
- * PCollection<KV<String, Integer>> input = ...;
- * PCollection<KV<String, Integer>> sumPerKey = input
- *     .apply(Sum.<String>integersPerKey());
- * } </pre>
- */
-public class Sum {
-
-  private Sum() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Integer>} and returns a
-   * {@code PCollection<Integer>} whose contents is the sum of the
-   * input {@code PCollection}'s elements, or
-   * {@code 0} if there are no elements.
-   */
-  public static Combine.Globally<Integer, Integer> integersGlobally() {
-    return Combine.globally(new SumIntegerFn()).named("Sum.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Integer>>} and returns a
-   * {@code PCollection<KV<K, Integer>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the sum of the values associated with
-   * that key in the input {@code PCollection}.
-   */
-  public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
-    return Combine.<K, Integer, Integer>perKey(new SumIntegerFn()).named("Sum.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Long>} and returns a
-   * {@code PCollection<Long>} whose contents is the sum of the
-   * input {@code PCollection}'s elements, or
-   * {@code 0} if there are no elements.
-   */
-  public static Combine.Globally<Long, Long> longsGlobally() {
-    return Combine.globally(new SumLongFn()).named("Sum.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Long>>} and returns a
-   * {@code PCollection<KV<K, Long>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the sum of the values associated with
-   * that key in the input {@code PCollection}.
-   */
-  public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
-    return Combine.<K, Long, Long>perKey(new SumLongFn()).named("Sum.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<Double>} and returns a
-   * {@code PCollection<Double>} whose contents is the sum of the
-   * input {@code PCollection}'s elements, or
-   * {@code 0} if there are no elements.
-   */
-  public static Combine.Globally<Double, Double> doublesGlobally() {
-    return Combine.globally(new SumDoubleFn()).named("Sum.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, Double>>} and returns a
-   * {@code PCollection<KV<K, Double>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the sum of the values associated with
-   * that key in the input {@code PCollection}.
-   */
-  public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
-    return Combine.<K, Double, Double>perKey(new SumDoubleFn()).named("Sum.PerKey");
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code SerializableFunction} that computes the sum of an
-   * {@code Iterable} of {@code Integer}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class SumIntegerFn
-      extends Combine.BinaryCombineIntegerFn implements CounterProvider<Integer> {
-    @Override
-    public int apply(int a, int b) {
-      return a + b;
-    }
-
-    @Override
-    public int identity() {
-      return 0;
-    }
-
-    @Override
-    public Counter<Integer> getCounter(String name) {
-      return Counter.ints(name, AggregationKind.SUM);
-    }
-  }
-
-  /**
-   * A {@code SerializableFunction} that computes the sum of an
-   * {@code Iterable} of {@code Long}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class SumLongFn
-      extends Combine.BinaryCombineLongFn implements CounterProvider<Long> {
-    @Override
-    public long apply(long a, long b) {
-      return a + b;
-    }
-
-    @Override
-    public long identity() {
-      return 0;
-    }
-
-    @Override
-    public Counter<Long> getCounter(String name) {
-      return Counter.longs(name, AggregationKind.SUM);
-    }
-  }
-
-  /**
-   * A {@code SerializableFunction} that computes the sum of an
-   * {@code Iterable} of {@code Double}s, useful as an argument to
-   * {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class SumDoubleFn
-      extends Combine.BinaryCombineDoubleFn implements CounterProvider<Double> {
-    @Override
-    public double apply(double a, double b) {
-      return a + b;
-    }
-
-    @Override
-    public double identity() {
-      return 0;
-    }
-
-    @Override
-    public Counter<Double> getCounter(String name) {
-      return Counter.doubles(name, AggregationKind.SUM);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
deleted file mode 100644
index 98fe53c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Top.java
+++ /dev/null
@@ -1,559 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.CustomCoder;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.PerKey;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-import java.util.PriorityQueue;
-
-/**
- * {@code PTransform}s for finding the largest (or smallest) set
- * of elements in a {@code PCollection}, or the largest (or smallest)
- * set of values associated with each key in a {@code PCollection} of
- * {@code KV}s.
- */
-public class Top {
-
-  private Top() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<List<T>>} with a
-   * single element containing the largest {@code count} elements of the input
-   * {@code PCollection<T>}, in decreasing order, sorted using the
-   * given {@code Comparator<T>}.  The {@code Comparator<T>} must also
-   * be {@code Serializable}.
-   *
-   * <p>If {@code count} {@code <} the number of elements in the
-   * input {@code PCollection}, then all the elements of the input
-   * {@code PCollection} will be in the resulting
-   * {@code List}, albeit in sorted order.
-   *
-   * <p>All the elements of the result's {@code List}
-   * must fit into the memory of a single machine.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Student> students = ...;
-   * PCollection<List<Student>> top10Students =
-   *     students.apply(Top.of(10, new CompareStudentsByAvgGrade()));
-   * } </pre>
-   *
-   * <p>By default, the {@code Coder} of the output {@code PCollection}
-   * is a {@code ListCoder} of the {@code Coder} of the elements of
-   * the input {@code PCollection}.
-   *
-   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * an empty {@code List<T>} in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link Combine.Globally#withoutDefaults withoutDefaults} or
-   * {@link Combine.Globally#asSingletonView asSingletonView} must be called.
-   *
-   * <p>See also {@link #smallest} and {@link #largest}, which sort
-   * {@code Comparable} elements using their natural ordering.
-   *
-   * <p>See also {@link #perKey}, {@link #smallestPerKey}, and
-   * {@link #largestPerKey}, which take a {@code PCollection} of
-   * {@code KV}s and return the top values associated with each key.
-   */
-  public static <T, ComparatorT extends Comparator<T> & Serializable>
-      Combine.Globally<T, List<T>> of(int count, ComparatorT compareFn) {
-    return Combine.globally(new TopCombineFn<>(count, compareFn)).named("Top.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<List<T>>} with a
-   * single element containing the smallest {@code count} elements of the input
-   * {@code PCollection<T>}, in increasing order, sorted according to
-   * their natural order.
-   *
-   * <p>If {@code count} {@code <} the number of elements in the
-   * input {@code PCollection}, then all the elements of the input
-   * {@code PCollection} will be in the resulting {@code PCollection}'s
-   * {@code List}, albeit in sorted order.
-   *
-   * <p>All the elements of the result {@code List}
-   * must fit into the memory of a single machine.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> values = ...;
-   * PCollection<List<Integer>> smallest10Values = values.apply(Top.smallest(10));
-   * } </pre>
-   *
-   * <p>By default, the {@code Coder} of the output {@code PCollection}
-   * is a {@code ListCoder} of the {@code Coder} of the elements of
-   * the input {@code PCollection}.
-   *
-   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * an empty {@code List<T>} in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link Combine.Globally#withoutDefaults withoutDefaults} or
-   * {@link Combine.Globally#asSingletonView asSingletonView} must be called.
-   *
-   * <p>See also {@link #largest}.
-   *
-   * <p>See also {@link #of}, which sorts using a user-specified
-   * {@code Comparator} function.
-   *
-   * <p>See also {@link #perKey}, {@link #smallestPerKey}, and
-   * {@link #largestPerKey}, which take a {@code PCollection} of
-   * {@code KV}s and return the top values associated with each key.
-   */
-  public static <T extends Comparable<T>> Combine.Globally<T, List<T>> smallest(int count) {
-    return Combine.globally(new TopCombineFn<>(count, new Smallest<T>()))
-        .named("Smallest.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<List<T>>} with a
-   * single element containing the largest {@code count} elements of the input
-   * {@code PCollection<T>}, in decreasing order, sorted according to
-   * their natural order.
-   *
-   * <p>If {@code count} {@code <} the number of elements in the
-   * input {@code PCollection}, then all the elements of the input
-   * {@code PCollection} will be in the resulting {@code PCollection}'s
-   * {@code List}, albeit in sorted order.
-   *
-   * <p>All the elements of the result's {@code List}
-   * must fit into the memory of a single machine.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> values = ...;
-   * PCollection<List<Integer>> largest10Values = values.apply(Top.largest(10));
-   * } </pre>
-   *
-   * <p>By default, the {@code Coder} of the output {@code PCollection}
-   * is a {@code ListCoder} of the {@code Coder} of the elements of
-   * the input {@code PCollection}.
-   *
-   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * an empty {@code List<T>} in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link Combine.Globally#withoutDefaults withoutDefaults} or
-   * {@link Combine.Globally#asSingletonView asSingletonView} must be called.
-   *
-   * <p>See also {@link #smallest}.
-   *
-   * <p>See also {@link #of}, which sorts using a user-specified
-   * {@code Comparator} function.
-   *
-   * <p>See also {@link #perKey}, {@link #smallestPerKey}, and
-   * {@link #largestPerKey}, which take a {@code PCollection} of
-   * {@code KV}s and return the top values associated with each key.
-   */
-  public static <T extends Comparable<T>> Combine.Globally<T, List<T>> largest(int count) {
-    return Combine.globally(new TopCombineFn<>(count, new Largest<T>())).named("Largest.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, V>>} and returns a
-   * {@code PCollection<KV<K, List<V>>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the largest {@code count} values
-   * associated with that key in the input
-   * {@code PCollection<KV<K, V>>}, in decreasing order, sorted using
-   * the given {@code Comparator<V>}.  The
-   * {@code Comparator<V>} must also be {@code Serializable}.
-   *
-   * <p>If there are fewer than {@code count} values associated with
-   * a particular key, then all those values will be in the result
-   * mapping for that key, albeit in sorted order.
-   *
-   * <p>All the values associated with a single key must fit into the
-   * memory of a single machine, but there can be many more
-   * {@code KV}s in the resulting {@code PCollection} than can fit
-   * into the memory of a single machine.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<School, Student>> studentsBySchool = ...;
-   * PCollection<KV<School, List<Student>>> top10StudentsBySchool =
-   *     studentsBySchool.apply(
-   *         Top.perKey(10, new CompareStudentsByAvgGrade()));
-   * } </pre>
-   *
-   * <p>By default, the {@code Coder} of the keys of the output
-   * {@code PCollection} is the same as that of the keys of the input
-   * {@code PCollection}, and the {@code Coder} of the values of the
-   * output {@code PCollection} is a {@code ListCoder} of the
-   * {@code Coder} of the values of the input {@code PCollection}.
-   *
-   * <p>See also {@link #smallestPerKey} and {@link #largestPerKey}, which
-   * sort {@code Comparable<V>} values using their natural
-   * ordering.
-   *
-   * <p>See also {@link #of}, {@link #smallest}, and {@link #largest}, which
-   * take a {@code PCollection} and return the top elements.
-   */
-  public static <K, V, ComparatorT extends Comparator<V> & Serializable>
-      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
-      perKey(int count, ComparatorT compareFn) {
-    return Combine.perKey(
-        new TopCombineFn<>(count, compareFn).<K>asKeyedFn()).named("Top.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, V>>} and returns a
-   * {@code PCollection<KV<K, List<V>>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the smallest {@code count} values
-   * associated with that key in the input
-   * {@code PCollection<KV<K, V>>}, in increasing order, sorted
-   * according to their natural order.
-   *
-   * <p>If there are fewer than {@code count} values associated with
-   * a particular key, then all those values will be in the result
-   * mapping for that key, albeit in sorted order.
-   *
-   * <p>All the values associated with a single key must fit into the
-   * memory of a single machine, but there can be many more
-   * {@code KV}s in the resulting {@code PCollection} than can fit
-   * into the memory of a single machine.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<String, Integer>> keyedValues = ...;
-   * PCollection<KV<String, List<Integer>>> smallest10ValuesPerKey =
-   *     keyedValues.apply(Top.smallestPerKey(10));
-   * } </pre>
-   *
-   * <p>By default, the {@code Coder} of the keys of the output
-   * {@code PCollection} is the same as that of the keys of the input
-   * {@code PCollection}, and the {@code Coder} of the values of the
-   * output {@code PCollection} is a {@code ListCoder} of the
-   * {@code Coder} of the values of the input {@code PCollection}.
-   *
-   * <p>See also {@link #largestPerKey}.
-   *
-   * <p>See also {@link #perKey}, which sorts values using a user-specified
-   * {@code Comparator} function.
-   *
-   * <p>See also {@link #of}, {@link #smallest}, and {@link #largest}, which
-   * take a {@code PCollection} and return the top elements.
-   */
-  public static <K, V extends Comparable<V>>
-      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
-      smallestPerKey(int count) {
-    return Combine.perKey(new TopCombineFn<>(count, new Smallest<V>()).<K>asKeyedFn())
-        .named("Smallest.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, V>>} and returns a
-   * {@code PCollection<KV<K, List<V>>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the largest {@code count} values
-   * associated with that key in the input
-   * {@code PCollection<KV<K, V>>}, in decreasing order, sorted
-   * according to their natural order.
-   *
-   * <p>If there are fewer than {@code count} values associated with
-   * a particular key, then all those values will be in the result
-   * mapping for that key, albeit in sorted order.
-   *
-   * <p>All the values associated with a single key must fit into the
-   * memory of a single machine, but there can be many more
-   * {@code KV}s in the resulting {@code PCollection} than can fit
-   * into the memory of a single machine.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<String, Integer>> keyedValues = ...;
-   * PCollection<KV<String, List<Integer>>> largest10ValuesPerKey =
-   *     keyedValues.apply(Top.largestPerKey(10));
-   * } </pre>
-   *
-   * <p>By default, the {@code Coder} of the keys of the output
-   * {@code PCollection} is the same as that of the keys of the input
-   * {@code PCollection}, and the {@code Coder} of the values of the
-   * output {@code PCollection} is a {@code ListCoder} of the
-   * {@code Coder} of the values of the input {@code PCollection}.
-   *
-   * <p>See also {@link #smallestPerKey}.
-   *
-   * <p>See also {@link #perKey}, which sorts values using a user-specified
-   * {@code Comparator} function.
-   *
-   * <p>See also {@link #of}, {@link #smallest}, and {@link #largest}, which
-   * take a {@code PCollection} and return the top elements.
-   */
-  public static <K, V extends Comparable<V>>
-      PerKey<K, V, List<V>>
-      largestPerKey(int count) {
-    return Combine.perKey(
-new TopCombineFn<>(count, new Largest<V>()).<K>asKeyedFn())
-        .named("Largest.PerKey");
-  }
-
-  /**
-   * A {@code Serializable} {@code Comparator} that that uses the compared elements' natural
-   * ordering.
-   */
-  public static class Largest<T extends Comparable<? super T>>
-      implements Comparator<T>, Serializable {
-    @Override
-    public int compare(T a, T b) {
-      return a.compareTo(b);
-    }
-  }
-
-  /**
-   * {@code Serializable} {@code Comparator} that that uses the reverse of the compared elements'
-   * natural ordering.
-   */
-  public static class Smallest<T extends Comparable<? super T>>
-      implements Comparator<T>, Serializable {
-    @Override
-    public int compare(T a, T b) {
-      return b.compareTo(a);
-    }
-  }
-
-
-  ////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * {@code CombineFn} for {@code Top} transforms that combines a
-   * bunch of {@code T}s into a single {@code count}-long
-   * {@code List<T>}, using {@code compareFn} to choose the largest
-   * {@code T}s.
-   *
-   * @param <T> type of element being compared
-   */
-  public static class TopCombineFn<T, ComparatorT extends Comparator<T> & Serializable>
-      extends AccumulatingCombineFn<T, BoundedHeap<T, ComparatorT>, List<T>> {
-
-    private final int count;
-    private final ComparatorT compareFn;
-
-    public TopCombineFn(int count, ComparatorT compareFn) {
-      Preconditions.checkArgument(
-          count >= 0,
-          "count must be >= 0");
-      this.count = count;
-      this.compareFn = compareFn;
-    }
-
-    @Override
-    public BoundedHeap<T, ComparatorT> createAccumulator() {
-      return new BoundedHeap<>(count, compareFn, new ArrayList<T>());
-    }
-
-    @Override
-    public Coder<BoundedHeap<T, ComparatorT>> getAccumulatorCoder(
-        CoderRegistry registry, Coder<T> inputCoder) {
-      return new BoundedHeapCoder<>(count, compareFn, inputCoder);
-    }
-
-    @Override
-    public String getIncompatibleGlobalWindowErrorMessage() {
-      return "Default values are not supported in Top.[of, smallest, largest]() if the output "
-          + "PCollection is not windowed by GlobalWindows. Instead, use "
-          + "Top.[of, smallest, largest]().withoutDefaults() to output an empty PCollection if the"
-          + " input PCollection is empty, or Top.[of, smallest, largest]().asSingletonView() to "
-          + "get a PCollection containing the empty list if the input PCollection is empty.";
-    }
-  }
-
-  /**
-   * A heap that stores only a finite number of top elements according to its provided
-   * {@code Comparator}. Implemented as an {@link Accumulator} to facilitate implementation of
-   * {@link Top}.
-   *
-   * <p>This class is <i>not</i> safe for multithreaded use, except read-only.
-   */
-  static class BoundedHeap<T, ComparatorT extends Comparator<T> & Serializable>
-      implements Accumulator<T, BoundedHeap<T, ComparatorT>, List<T>> {
-
-    /**
-     * A queue with smallest at the head, for quick adds.
-     *
-     * <p>Only one of asList and asQueue may be non-null.
-     */
-    private PriorityQueue<T> asQueue;
-
-    /**
-     * A list in with largest first, the form of extractOutput().
-     *
-     * <p>Only one of asList and asQueue may be non-null.
-     */
-    private List<T> asList;
-
-    /** The user-provided Comparator. */
-    private final ComparatorT compareFn;
-
-    /** The maximum size of the heap. */
-    private final int maximumSize;
-
-    /**
-     * Creates a new heap with the provided size, comparator, and initial elements.
-     */
-    private BoundedHeap(int maximumSize, ComparatorT compareFn, List<T> asList) {
-      this.maximumSize = maximumSize;
-      this.asList = asList;
-      this.compareFn = compareFn;
-    }
-
-    @Override
-    public void addInput(T value) {
-      maybeAddInput(value);
-    }
-
-    /**
-     * Adds {@code value} to this heap if it is larger than any of the current elements.
-     * Returns {@code true} if {@code value} was added.
-     */
-    private boolean maybeAddInput(T value) {
-      if (maximumSize == 0) {
-        // Don't add anything.
-        return false;
-      }
-
-      // If asQueue == null, then this is the first add after the latest call to the
-      // constructor or asList().
-      if (asQueue == null) {
-        asQueue = new PriorityQueue<>(maximumSize, compareFn);
-        for (T item : asList) {
-          asQueue.add(item);
-        }
-        asList = null;
-      }
-
-      if (asQueue.size() < maximumSize) {
-        asQueue.add(value);
-        return true;
-      } else if (compareFn.compare(value, asQueue.peek()) > 0) {
-        asQueue.poll();
-        asQueue.add(value);
-        return true;
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public void mergeAccumulator(BoundedHeap<T, ComparatorT> accumulator) {
-      for (T value : accumulator.asList()) {
-        if (!maybeAddInput(value)) {
-          // If this element of accumulator does not make the top N, neither
-          // will the rest, which are all smaller.
-          break;
-        }
-      }
-    }
-
-    @Override
-    public List<T> extractOutput() {
-      return asList();
-    }
-
-    /**
-     * Returns the contents of this Heap as a List sorted largest-to-smallest.
-     */
-    private List<T> asList() {
-      if (asList == null) {
-        List<T> smallestFirstList = Lists.newArrayListWithCapacity(asQueue.size());
-        while (!asQueue.isEmpty()) {
-          smallestFirstList.add(asQueue.poll());
-        }
-        asList = Lists.reverse(smallestFirstList);
-        asQueue = null;
-      }
-      return asList;
-    }
-  }
-
-  /**
-   * A {@link Coder} for {@link BoundedHeap}, using Java serialization via {@link CustomCoder}.
-   */
-  private static class BoundedHeapCoder<T, ComparatorT extends Comparator<T> & Serializable>
-      extends CustomCoder<BoundedHeap<T, ComparatorT>> {
-    private final Coder<List<T>> listCoder;
-    private final ComparatorT compareFn;
-    private final int maximumSize;
-
-    public BoundedHeapCoder(int maximumSize, ComparatorT compareFn, Coder<T> elementCoder) {
-      listCoder = ListCoder.of(elementCoder);
-      this.compareFn = compareFn;
-      this.maximumSize = maximumSize;
-    }
-
-    @Override
-    public void encode(
-        BoundedHeap<T, ComparatorT> value, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      listCoder.encode(value.asList(), outStream, context);
-    }
-
-    @Override
-    public BoundedHeap<T, ComparatorT> decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      return new BoundedHeap<>(maximumSize, compareFn, listCoder.decode(inStream, context));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "HeapCoder requires a deterministic list coder", listCoder);
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(
-        BoundedHeap<T, ComparatorT> value, Context context) {
-      return listCoder.isRegisterByteSizeObserverCheap(
-          value.asList(), context);
-    }
-
-    @Override
-    public void registerByteSizeObserver(
-        BoundedHeap<T, ComparatorT> value, ElementByteSizeObserver observer, Context context)
-            throws Exception {
-      listCoder.registerByteSizeObserver(value.asList(), observer, context);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
deleted file mode 100644
index d84bc77..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Values.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-/**
- * {@code Values<V>} takes a {@code PCollection} of {@code KV<K, V>}s and
- * returns a {@code PCollection<V>} of the values.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<KV<String, Long>> wordCounts = ...;
- * PCollection<Long> counts = wordCounts.apply(Values.<String>create());
- * } </pre>
- *
- * <p>Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and the output {@code PCollection}
- * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * associated with it as the input.
- *
- * <p>See also {@link Keys}.
- *
- * @param <V> the type of the values in the input {@code PCollection},
- * and the type of the elements in the output {@code PCollection}
- */
-public class Values<V> extends PTransform<PCollection<? extends KV<?, V>>,
-                                          PCollection<V>> {
-  /**
-   * Returns a {@code Values<V>} {@code PTransform}.
-   *
-   * @param <V> the type of the values in the input {@code PCollection},
-   * and the type of the elements in the output {@code PCollection}
-   */
-  public static <V> Values<V> create() {
-    return new Values<>();
-  }
-
-  private Values() { }
-
-  @Override
-  public PCollection<V> apply(PCollection<? extends KV<?, V>> in) {
-    return
-        in.apply(ParDo.named("Values")
-                 .of(new DoFn<KV<?, V>, V>() {
-                     @Override
-                     public void processElement(ProcessContext c) {
-                       c.output(c.element().getValue());
-                     }
-                    }));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
deleted file mode 100644
index e2c4487..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/View.java
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.util.PCollectionViews;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * Transforms for creating {@link PCollectionView PCollectionViews} from
- * {@link PCollection PCollections} (to read them as side inputs).
- *
- * <p>While a {@link PCollection PCollection&lt;ElemT&gt;} has many values of type {@code ElemT} per
- * window, a {@link PCollectionView PCollectionView&lt;ViewT&gt;} has a single value of type
- * {@code ViewT} for each window. It can be thought of as a mapping from windows to values of
- * type {@code ViewT}. The transforms here represent ways of converting the {@code ElemT} values
- * in a window into a {@code ViewT} for that window.
- *
- * <p>When a {@link ParDo} tranform is processing a main input
- * element in a window {@code w} and a {@link PCollectionView} is read via
- * {@link DoFn.ProcessContext#sideInput}, the value of the view for {@code w} is
- * returned.
- *
- * <p>The SDK supports viewing a {@link PCollection}, per window, as a single value,
- * a {@link List}, an {@link Iterable}, a {@link Map}, or a multimap (iterable-valued {@link Map}).
- *
- * <p>For a {@link PCollection} that contains a single value of type {@code T}
- * per window, such as the output of {@link Combine#globally},
- * use {@link View#asSingleton()} to prepare it for use as a side input:
- *
- * <pre>
- * {@code
- * PCollectionView<T> output = someOtherPCollection
- *     .apply(Combine.globally(...))
- *     .apply(View.<T>asSingleton());
- * }
- * </pre>
- *
- * <p>For a small {@link PCollection} with windows that can fit entirely in memory,
- * use {@link View#asList()} to prepare it for use as a {@code List}.
- * When read as a side input, the entire list for a window will be cached in memory.
- *
- * <pre>
- * {@code
- * PCollectionView<List<T>> output =
- *    smallPCollection.apply(View.<T>asList());
- * }
- * </pre>
- *
- * <p>If a {@link PCollection} of {@code KV<K, V>} is known to
- * have a single value per window for each key, then use {@link View#asMap()}
- * to view it as a {@code Map<K, V>}:
- *
- * <pre>
- * {@code
- * PCollectionView<Map<K, V> output =
- *     somePCollection.apply(View.<K, V>asMap());
- * }
- * </pre>
- *
- * <p>Otherwise, to access a {@link PCollection} of {@code KV<K, V>} as a
- * {@code Map<K, Iterable<V>>} side input, use {@link View#asMultimap()}:
- *
- * <pre>
- * {@code
- * PCollectionView<Map<K, Iterable<V>> output =
- *     somePCollection.apply(View.<K, Iterable<V>>asMap());
- * }
- * </pre>
- *
- * <p>To iterate over an entire window of a {@link PCollection} via
- * side input, use {@link View#asIterable()}:
- *
- * <pre>
- * {@code
- * PCollectionView<Iterable<T>> output =
- *     somePCollection.apply(View.<T>asIterable());
- * }
- * </pre>
- *
- *
- * <p>Both {@link View#asMultimap()} and {@link View#asMap()} are useful
- * for implementing lookup based "joins" with the main input, when the
- * side input is small enough to fit into memory.
- *
- * <p>For example, if you represent a page on a website via some {@code Page} object and
- * have some type {@code UrlVisits} logging that a URL was visited, you could convert these
- * to more fully structured {@code PageVisit} objects using a side input, something like the
- * following:
- *
- * <pre>
- * {@code
- * PCollection<Page> pages = ... // pages fit into memory
- * PCollection<UrlVisit> urlVisits = ... // very large collection
- * final PCollectionView<Map<URL, Page>> = urlToPage
- *     .apply(WithKeys.of( ... )) // extract the URL from the page
- *     .apply(View.<URL, Page>asMap());
- *
- * PCollection PageVisits = urlVisits
- *     .apply(ParDo.withSideInputs(urlToPage)
- *         .of(new DoFn<UrlVisit, PageVisit>() {
- *             {@literal @}Override
- *             void processElement(ProcessContext context) {
- *               UrlVisit urlVisit = context.element();
- *               Page page = urlToPage.get(urlVisit.getUrl());
- *               c.output(new PageVisit(page, urlVisit.getVisitData()));
- *             }
- *         }));
- * }
- * </pre>
- *
- * <p>See {@link ParDo#withSideInputs} for details on how to access
- * this variable inside a {@link ParDo} over another {@link PCollection}.
- */
-public class View {
-
-  // Do not instantiate
-  private View() { }
-
-  /**
-   * Returns a {@link AsSingleton} transform that takes a
-   * {@link PCollection} with a single value per window
-   * as input and produces a {@link PCollectionView} that returns
-   * the value in the main input window when read as a side input.
-   *
-   * <pre>
-   * {@code
-   * PCollection<InputT> input = ...
-   * CombineFn<InputT, OutputT> yourCombineFn = ...
-   * PCollectionView<OutputT> output = input
-   *     .apply(Combine.globally(yourCombineFn))
-   *     .apply(View.<OutputT>asSingleton());
-   * }</pre>
-   *
-   * <p>If the input {@link PCollection} is empty,
-   * throws {@link java.util.NoSuchElementException} in the consuming
-   * {@link DoFn}.
-   *
-   * <p>If the input {@link PCollection} contains more than one
-   * element, throws {@link IllegalArgumentException} in the
-   * consuming {@link DoFn}.
-   */
-  public static <T> AsSingleton<T> asSingleton() {
-    return new AsSingleton<>();
-  }
-
-  /**
-   * Returns a {@link View.AsList} transform that takes a {@link PCollection} and returns a
-   * {@link PCollectionView} mapping each window to a {@link List} containing
-   * all of the elements in the window.
-   *
-   * <p>The resulting list is required to fit in memory.
-   */
-  public static <T> AsList<T> asList() {
-    return new AsList<>();
-  }
-
-  /**
-   * Returns a {@link View.AsIterable} transform that takes a {@link PCollection} as input
-   * and produces a {@link PCollectionView} mapping each window to an
-   * {@link Iterable} of the values in that window.
-   *
-   * <p>The values of the {@link Iterable} for a window are not required to fit in memory,
-   * but they may also not be effectively cached. If it is known that every window fits in memory,
-   * and stronger caching is desired, use {@link #asList}.
-   */
-  public static <T> AsIterable<T> asIterable() {
-    return new AsIterable<>();
-  }
-
-  /**
-   * Returns a {@link View.AsMap} transform that takes a
-   * {@link PCollection PCollection&lt;KV&lt;K V&gt;&gt;} as
-   * input and produces a {@link PCollectionView} mapping each window to
-   * a {@link Map Map&gt;K, V&gt;}. It is required that each key of the input be
-   * associated with a single value, per window. If this is not the case, precede this
-   * view with {@code Combine.perKey}, as in the example below, or alternatively
-   * use {@link View#asMultimap()}.
-   *
-   * <pre>
-   * {@code
-   * PCollection<KV<K, V>> input = ...
-   * CombineFn<V, OutputT> yourCombineFn = ...
-   * PCollectionView<Map<K, OutputT>> output = input
-   *     .apply(Combine.perKey(yourCombineFn.<K>asKeyedFn()))
-   *     .apply(View.<K, OutputT>asMap());
-   * }</pre>
-   *
-   * <p>Currently, the resulting map is required to fit into memory.
-   */
-  public static <K, V> AsMap<K, V> asMap() {
-    return new AsMap<K, V>();
-  }
-
-  /**
-   * Returns a {@link View.AsMultimap} transform that takes a
-   * {@link PCollection PCollection&lt;KV&ltK, V&gt;&gt;}
-   * as input and produces a {@link PCollectionView} mapping
-   * each window to its contents as a {@link Map Map&lt;K, Iterable&lt;V&gt;&gt;}
-   * for use as a side input.
-   * In contrast to {@link View#asMap()}, it is not required that the keys in the
-   * input collection be unique.
-   *
-   * <pre>
-   * {@code
-   * PCollection<KV<K, V>> input = ... // maybe more than one occurrence of a some keys
-   * PCollectionView<Map<K, V>> output = input.apply(View.<K, V>asMultimap());
-   * }</pre>
-   *
-   * <p>Currently, the resulting map is required to fit into memory.
-   */
-  public static <K, V> AsMultimap<K, V> asMultimap() {
-    return new AsMultimap<K, V>();
-  }
-
-  /**
-   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
-   * override its behavior.
-   *
-   * <p>See {@link View#asList()}.
-   */
-  public static class AsList<T> extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
-    private AsList() { }
-
-    @Override
-    public void validate(PCollection<T> input) {
-      try {
-        GroupByKey.applicableTo(input);
-      } catch (IllegalStateException e) {
-        throw new IllegalStateException("Unable to create a side-input view from input", e);
-      }
-    }
-
-    @Override
-    public PCollectionView<List<T>> apply(PCollection<T> input) {
-      return input.apply(CreatePCollectionView.<T, List<T>>of(PCollectionViews.listView(
-          input.getPipeline(), input.getWindowingStrategy(), input.getCoder())));
-    }
-  }
-
-  /**
-   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
-   * override its behavior.
-   *
-   * <p>See {@link View#asIterable()}.
-   */
-  public static class AsIterable<T>
-      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
-    private AsIterable() { }
-
-    @Override
-    public void validate(PCollection<T> input) {
-      try {
-        GroupByKey.applicableTo(input);
-      } catch (IllegalStateException e) {
-        throw new IllegalStateException("Unable to create a side-input view from input", e);
-      }
-    }
-
-    @Override
-    public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
-      return input.apply(CreatePCollectionView.<T, Iterable<T>>of(PCollectionViews.iterableView(
-          input.getPipeline(), input.getWindowingStrategy(), input.getCoder())));
-    }
-  }
-
-  /**
-   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
-   * override its behavior.
-   *
-   * <p>See {@link View#asSingleton()}.
-   */
-  public static class AsSingleton<T> extends PTransform<PCollection<T>, PCollectionView<T>> {
-    private final T defaultValue;
-    private final boolean hasDefault;
-
-    private AsSingleton() {
-      this.defaultValue = null;
-      this.hasDefault = false;
-    }
-
-    private AsSingleton(T defaultValue) {
-      this.defaultValue = defaultValue;
-      this.hasDefault = true;
-    }
-
-    /**
-     * Returns whether this transform has a default value.
-     */
-    public boolean hasDefaultValue() {
-      return hasDefault;
-    }
-
-    /**
-     * Returns the default value of this transform, or null if there isn't one.
-     */
-    public T defaultValue() {
-      return defaultValue;
-    }
-
-    /**
-     * Default value to return for windows with no value in them.
-     */
-    public AsSingleton<T> withDefaultValue(T defaultValue) {
-      return new AsSingleton<>(defaultValue);
-    }
-
-    @Override
-    public void validate(PCollection<T> input) {
-      try {
-        GroupByKey.applicableTo(input);
-      } catch (IllegalStateException e) {
-        throw new IllegalStateException("Unable to create a side-input view from input", e);
-      }
-    }
-
-    @Override
-    public PCollectionView<T> apply(PCollection<T> input) {
-      return input.apply(CreatePCollectionView.<T, T>of(PCollectionViews.singletonView(
-          input.getPipeline(),
-          input.getWindowingStrategy(),
-          hasDefault,
-          defaultValue,
-          input.getCoder())));
-    }
-  }
-
-  /**
-   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
-   * override its behavior.
-   *
-   * <p>See {@link View#asMultimap()}.
-   */
-  public static class AsMultimap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
-    private AsMultimap() { }
-
-    @Override
-    public void validate(PCollection<KV<K, V>> input) {
-      try {
-        GroupByKey.applicableTo(input);
-      } catch (IllegalStateException e) {
-        throw new IllegalStateException("Unable to create a side-input view from input", e);
-      }
-    }
-
-    @Override
-    public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      return input.apply(CreatePCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(
-          PCollectionViews.multimapView(
-              input.getPipeline(),
-              input.getWindowingStrategy(),
-              input.getCoder())));
-    }
-  }
-
-  /**
-   * Not intended for direct use by pipeline authors; public only so a {@link PipelineRunner} may
-   * override its behavior.
-   *
-   * <p>See {@link View#asMap()}.
-   */
-  public static class AsMap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
-    private AsMap() { }
-
-    /**
-     * @deprecated this method simply returns this AsMap unmodified
-     */
-    @Deprecated()
-    public AsMap<K, V> withSingletonValues() {
-      return this;
-    }
-
-    @Override
-    public void validate(PCollection<KV<K, V>> input) {
-      try {
-        GroupByKey.applicableTo(input);
-      } catch (IllegalStateException e) {
-        throw new IllegalStateException("Unable to create a side-input view from input", e);
-      }
-    }
-
-    @Override
-    public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
-      return input.apply(CreatePCollectionView.<KV<K, V>, Map<K, V>>of(
-          PCollectionViews.mapView(
-              input.getPipeline(),
-              input.getWindowingStrategy(),
-              input.getCoder())));
-    }
-  }
-
-  ////////////////////////////////////////////////////////////////////////////
-  // Internal details below
-
-  /**
-   * Creates a primitive {@link PCollectionView}.
-   *
-   * <p>For internal use only by runner implementors.
-   *
-   * @param <ElemT> The type of the elements of the input PCollection
-   * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
-   */
-  public static class CreatePCollectionView<ElemT, ViewT>
-      extends PTransform<PCollection<ElemT>, PCollectionView<ViewT>> {
-    private PCollectionView<ViewT> view;
-
-    private CreatePCollectionView(PCollectionView<ViewT> view) {
-      this.view = view;
-    }
-
-    public static <ElemT, ViewT> CreatePCollectionView<ElemT, ViewT> of(
-        PCollectionView<ViewT> view) {
-      return new CreatePCollectionView<>(view);
-    }
-
-    public PCollectionView<ViewT> getView() {
-      return view;
-    }
-
-    @Override
-    public PCollectionView<ViewT> apply(PCollection<ElemT> input) {
-      return view;
-    }
-
-    static {
-      DirectPipelineRunner.registerDefaultTransformEvaluator(
-          CreatePCollectionView.class,
-          new DirectPipelineRunner.TransformEvaluator<CreatePCollectionView>() {
-            @SuppressWarnings("rawtypes")
-            @Override
-            public void evaluate(
-                CreatePCollectionView transform,
-                DirectPipelineRunner.EvaluationContext context) {
-              evaluateTyped(transform, context);
-            }
-
-            private <ElemT, ViewT> void evaluateTyped(
-                CreatePCollectionView<ElemT, ViewT> transform,
-                DirectPipelineRunner.EvaluationContext context) {
-              List<WindowedValue<ElemT>> elems =
-                  context.getPCollectionWindowedValues(context.getInput(transform));
-              context.setPCollectionView(context.getOutput(transform), elems);
-            }
-          });
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
deleted file mode 100644
index c06795c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithKeys.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-/**
- * {@code WithKeys<K, V>} takes a {@code PCollection<V>}, and either a
- * constant key of type {@code K} or a function from {@code V} to
- * {@code K}, and returns a {@code PCollection<KV<K, V>>}, where each
- * of the values in the input {@code PCollection} has been paired with
- * either the constant key or a key computed from the value.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<String> words = ...;
- * PCollection<KV<Integer, String>> lengthsToWords =
- *     words.apply(WithKeys.of(new SerializableFunction<String, Integer>() {
- *         public Integer apply(String s) { return s.length(); } }));
- * } </pre>
- *
- * <p>Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and the output {@code PCollection}
- * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * associated with it as the input.
- *
- * @param <K> the type of the keys in the output {@code PCollection}
- * @param <V> the type of the elements in the input
- * {@code PCollection} and the values in the output
- * {@code PCollection}
- */
-public class WithKeys<K, V> extends PTransform<PCollection<V>,
-                                               PCollection<KV<K, V>>> {
-  /**
-   * Returns a {@code PTransform} that takes a {@code PCollection<V>}
-   * and returns a {@code PCollection<KV<K, V>>}, where each of the
-   * values in the input {@code PCollection} has been paired with a
-   * key computed from the value by invoking the given
-   * {@code SerializableFunction}.
-   *
-   * <p>If using a lambda in Java 8, {@link #withKeyType(TypeDescriptor)} must
-   * be called on the result {@link PTransform}.
-   */
-  public static <K, V> WithKeys<K, V> of(SerializableFunction<V, K> fn) {
-    return new WithKeys<>(fn, null);
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes a {@code PCollection<V>}
-   * and returns a {@code PCollection<KV<K, V>>}, where each of the
-   * values in the input {@code PCollection} has been paired with the
-   * given key.
-   */
-  @SuppressWarnings("unchecked")
-  public static <K, V> WithKeys<K, V> of(final K key) {
-    return new WithKeys<>(
-        new SerializableFunction<V, K>() {
-          @Override
-          public K apply(V value) {
-            return key;
-          }
-        },
-        (Class<K>) (key == null ? null : key.getClass()));
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private SerializableFunction<V, K> fn;
-  private transient Class<K> keyClass;
-
-  private WithKeys(SerializableFunction<V, K> fn, Class<K> keyClass) {
-    this.fn = fn;
-    this.keyClass = keyClass;
-  }
-
-  /**
-   * Return a {@link WithKeys} that is like this one with the specified key type descriptor.
-   *
-   * For use with lambdas in Java 8, either this method must be called with an appropriate type
-   * descriptor or {@link PCollection#setCoder(Coder)} must be called on the output
-   * {@link PCollection}.
-   */
-  public WithKeys<K, V> withKeyType(TypeDescriptor<K> keyType) {
-    // Safe cast
-    @SuppressWarnings("unchecked")
-    Class<K> rawType = (Class<K>) keyType.getRawType();
-    return new WithKeys<>(fn, rawType);
-  }
-
-  @Override
-  public PCollection<KV<K, V>> apply(PCollection<V> in) {
-    PCollection<KV<K, V>> result =
-        in.apply(ParDo.named("AddKeys")
-                 .of(new DoFn<V, KV<K, V>>() {
-                     @Override
-                     public void processElement(ProcessContext c) {
-                       c.output(KV.of(fn.apply(c.element()),
-                                    c.element()));
-                     }
-                    }));
-
-    try {
-      Coder<K> keyCoder;
-      CoderRegistry coderRegistry = in.getPipeline().getCoderRegistry();
-      if (keyClass == null) {
-        keyCoder = coderRegistry.getDefaultOutputCoder(fn, in.getCoder());
-      } else {
-        keyCoder = coderRegistry.getDefaultCoder(TypeDescriptor.of(keyClass));
-      }
-      // TODO: Remove when we can set the coder inference context.
-      result.setCoder(KvCoder.of(keyCoder, in.getCoder()));
-    } catch (CannotProvideCoderException exc) {
-      // let lazy coder inference have a try
-    }
-
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
deleted file mode 100644
index 85a93bf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/WithTimestamps.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.io.Source;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-/**
- * A {@link PTransform} for assigning timestamps to all the elements of a {@link PCollection}.
- *
- * <p>Timestamps are used to assign {@link BoundedWindow Windows} to elements within the
- * {@link Window#into(com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn)}
- * {@link PTransform}. Assigning timestamps is useful when the input data set comes from a
- * {@link Source} without implicit timestamps (such as
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read TextIO}).
- *
- */
-public class WithTimestamps<T> extends PTransform<PCollection<T>, PCollection<T>> {
-  /**
-   * For a {@link SerializableFunction} {@code fn} from {@code T} to {@link Instant}, outputs a
-   * {@link PTransform} that takes an input {@link PCollection PCollection&lt;T&gt;} and outputs a
-   * {@link PCollection PCollection&lt;T&gt;} containing every element {@code v} in the input where
-   * each element is output with a timestamp obtained as the result of {@code fn.apply(v)}.
-   *
-   * <p>If the input {@link PCollection} elements have timestamps, the output timestamp for each
-   * element must not be before the input element's timestamp minus the value of
-   * {@link #getAllowedTimestampSkew()}. If an output timestamp is before this time, the transform
-   * will throw an {@link IllegalArgumentException} when executed. Use
-   * {@link #withAllowedTimestampSkew(Duration)} to update the allowed skew.
-   *
-   * <p>Each output element will be in the same windows as the input element. If a new window based
-   * on the new output timestamp is desired, apply a new instance of {@link Window#into(WindowFn)}.
-   *
-   * <p>This transform will fail at execution time with a {@link NullPointerException} if for any
-   * input element the result of {@code fn.apply(v)} is {@code null}.
-   *
-   * <p>Example of use in Java 8:
-   * <pre>{@code
-   * PCollection<Record> timestampedRecords = records.apply(
-   *     WithTimestamps.of((Record rec) -> rec.getInstant());
-   * }</pre>
-   */
-  public static <T> WithTimestamps<T> of(SerializableFunction<T, Instant> fn) {
-    return new WithTimestamps<>(fn, Duration.ZERO);
-  }
-
-  ///////////////////////////////////////////////////////////////////
-
-  private final SerializableFunction<T, Instant> fn;
-  private final Duration allowedTimestampSkew;
-
-  private WithTimestamps(SerializableFunction<T, Instant> fn, Duration allowedTimestampSkew) {
-    this.fn = checkNotNull(fn, "WithTimestamps fn cannot be null");
-    this.allowedTimestampSkew = allowedTimestampSkew;
-  }
-
-  /**
-   * Return a new WithTimestamps like this one with updated allowed timestamp skew, which is the
-   * maximum duration that timestamps can be shifted backward. Does not modify this object.
-   *
-   * <p>The default value is {@code Duration.ZERO}, allowing timestamps to only be shifted into the
-   * future. For infinite skew, use {@code new Duration(Long.MAX_VALUE)}.
-   */
-  public WithTimestamps<T> withAllowedTimestampSkew(Duration allowedTimestampSkew) {
-    return new WithTimestamps<>(this.fn, allowedTimestampSkew);
-  }
-
-  /**
-   * Returns the allowed timestamp skew duration, which is the maximum
-   * duration that timestamps can be shifted backwards from the timestamp of the input element.
-   *
-   * @see DoFn#getAllowedTimestampSkew()
-   */
-  public Duration getAllowedTimestampSkew() {
-    return allowedTimestampSkew;
-  }
-
-  @Override
-  public PCollection<T> apply(PCollection<T> input) {
-    return input
-        .apply(ParDo.named("AddTimestamps").of(new AddTimestampsDoFn<T>(fn, allowedTimestampSkew)))
-        .setTypeDescriptorInternal(input.getTypeDescriptor());
-  }
-
-  private static class AddTimestampsDoFn<T> extends DoFn<T, T> {
-    private final SerializableFunction<T, Instant> fn;
-    private final Duration allowedTimestampSkew;
-
-    public AddTimestampsDoFn(SerializableFunction<T, Instant> fn, Duration allowedTimestampSkew) {
-      this.fn = fn;
-      this.allowedTimestampSkew = allowedTimestampSkew;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      Instant timestamp = fn.apply(c.element());
-      checkNotNull(
-          timestamp, "Timestamps for WithTimestamps cannot be null. Timestamp provided by %s.", fn);
-      c.outputWithTimestamp(c.element(), timestamp);
-    }
-
-    @Override
-    public Duration getAllowedTimestampSkew() {
-      return allowedTimestampSkew;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
deleted file mode 100644
index 5cf655a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Write.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-/**
- * A backwards-compatible {@code Write} class that simply inherits from the
- * {@link com.google.cloud.dataflow.sdk.io.Write} class that should be used instead.
- *
- * @deprecated: use {@link com.google.cloud.dataflow.sdk.io.Write} from the
- * {@code com.google.cloud.dataflow.sdk.io} package instead.
- */
-@Deprecated
-public class Write extends com.google.cloud.dataflow.sdk.io.Write {
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/DisplayData.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/DisplayData.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/DisplayData.java
deleted file mode 100644
index dadc730..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/DisplayData.java
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.display;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-
-import com.fasterxml.jackson.annotation.JsonGetter;
-import com.fasterxml.jackson.annotation.JsonInclude;
-
-import org.apache.avro.reflect.Nullable;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.format.DateTimeFormatter;
-import org.joda.time.format.ISODateTimeFormat;
-
-import java.util.Collection;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
-
-/**
- * Static display metadata associated with a pipeline component. Display data is useful for
- * pipeline runner UIs and diagnostic dashboards to display details about
- * {@link PTransform PTransforms} that make up a pipeline.
- *
- * <p>Components specify their display data by implementing the {@link HasDisplayData}
- * interface.
- */
-public class DisplayData {
-  private static final DisplayData EMPTY = new DisplayData(Maps.<Identifier, Item>newHashMap());
-  private static final DateTimeFormatter TIMESTAMP_FORMATTER = ISODateTimeFormat.dateTime();
-
-  private final ImmutableMap<Identifier, Item> entries;
-
-  private DisplayData(Map<Identifier, Item> entries) {
-    this.entries = ImmutableMap.copyOf(entries);
-  }
-
-  /**
-   * Default empty {@link DisplayData} instance.
-   */
-  public static DisplayData none() {
-    return EMPTY;
-  }
-
-  /**
-   * Collect the {@link DisplayData} from a component. This will traverse all subcomponents
-   * specified via {@link Builder#include} in the given component. Data in this component will be in
-   * a namespace derived from the component.
-   */
-  public static DisplayData from(HasDisplayData component) {
-    checkNotNull(component);
-    return InternalBuilder.forRoot(component).build();
-  }
-
-  public Collection<Item> items() {
-    return entries.values();
-  }
-
-  public Map<Identifier, Item> asMap() {
-    return entries;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder builder = new StringBuilder();
-    boolean isFirstLine = true;
-    for (Map.Entry<Identifier, Item> entry : entries.entrySet()) {
-      if (isFirstLine) {
-        isFirstLine = false;
-      } else {
-        builder.append("\n");
-      }
-
-      builder.append(entry);
-    }
-
-    return builder.toString();
-  }
-
-  /**
-   * Utility to build up display metadata from a component and its included
-   * subcomponents.
-   */
-  public interface Builder {
-    /**
-     * Include display metadata from the specified subcomponent. For example, a {@link ParDo}
-     * transform includes display metadata from the encapsulated {@link DoFn}.
-     *
-     * @return A builder instance to continue to build in a fluent-style.
-     */
-    Builder include(HasDisplayData subComponent);
-
-    /**
-     * Register the given string display metadata. The metadata item will be registered with type
-     * {@link DisplayData.Type#STRING}, and is identified by the specified key and namespace from
-     * the current transform or component.
-     */
-    ItemBuilder add(String key, String value);
-
-    /**
-     * Register the given numeric display metadata. The metadata item will be registered with type
-     * {@link DisplayData.Type#INTEGER}, and is identified by the specified key and namespace from
-     * the current transform or component.
-     */
-    ItemBuilder add(String key, long value);
-
-    /**
-     * Register the given floating point display metadata. The metadata item will be registered with
-     * type {@link DisplayData.Type#FLOAT}, and is identified by the specified key and namespace
-     * from the current transform or component.
-     */
-    ItemBuilder add(String key, double value);
-
-    /**
-     * Register the given timestamp display metadata. The metadata item will be registered with type
-     * {@link DisplayData.Type#TIMESTAMP}, and is identified by the specified key and namespace from
-     * the current transform or component.
-     */
-    ItemBuilder add(String key, Instant value);
-
-    /**
-     * Register the given duration display metadata. The metadata item will be registered with type
-     * {@link DisplayData.Type#DURATION}, and is identified by the specified key and namespace from
-     * the current transform or component.
-     */
-    ItemBuilder add(String key, Duration value);
-
-    /**
-     * Register the given class display metadata. The metadata item will be registered with type
-     * {@link DisplayData.Type#JAVA_CLASS}, and is identified by the specified key and namespace
-     * from the current transform or component.
-     */
-    ItemBuilder add(String key, Class<?> value);
-  }
-
-  /**
-   * Utility to append optional fields to display metadata, or register additional display metadata
-   * items.
-   */
-  public interface ItemBuilder extends Builder {
-    /**
-     * Add a human-readable label to describe the most-recently added metadata field.
-     * A label is optional; if unspecified, UIs should display the metadata key to identify the
-     * display item.
-     *
-     * <p>Specifying a null value will clear the label if it was previously defined.
-     */
-    ItemBuilder withLabel(@Nullable String label);
-
-    /**
-     * Add a link URL to the most-recently added display metadata. A link URL is optional and
-     * can be provided to point the reader to additional details about the metadata.
-     *
-     * <p>Specifying a null value will clear the URL if it was previously defined.
-     */
-    ItemBuilder withLinkUrl(@Nullable String url);
-  }
-
-  /**
-   * A display metadata item. DisplayData items are registered via {@link Builder#add} within
-   * {@link HasDisplayData#populateDisplayData} implementations. Each metadata item is uniquely
-   * identified by the specified key and namespace generated from the registering component's
-   * class name.
-   */
-  public static class Item {
-    private final String key;
-    private final String ns;
-    private final Type type;
-    private final String value;
-    private final String shortValue;
-    private final String label;
-    private final String url;
-
-    private static <T> Item create(String namespace, String key, Type type, T value) {
-      FormattedItemValue formatted = type.format(value);
-      return new Item(
-        namespace, key, type, formatted.getLongValue(), formatted.getShortValue(), null, null);
-    }
-
-    private Item(
-        String namespace,
-        String key,
-        Type type,
-        String value,
-        String shortValue,
-        String url,
-        String label) {
-      this.ns = namespace;
-      this.key = key;
-      this.type = type;
-      this.value = value;
-      this.shortValue = shortValue;
-      this.url = url;
-      this.label = label;
-    }
-
-    @JsonGetter("namespace")
-    public String getNamespace() {
-      return ns;
-    }
-
-    @JsonGetter("key")
-    public String getKey() {
-      return key;
-    }
-
-    /**
-     * Retrieve the {@link DisplayData.Type} of display metadata. All metadata conforms to a
-     * predefined set of allowed types.
-     */
-    @JsonGetter("type")
-    public Type getType() {
-      return type;
-    }
-
-    /**
-     * Retrieve the value of the metadata item.
-     */
-    @JsonGetter("value")
-    public String getValue() {
-      return value;
-    }
-
-    /**
-     * Return the optional short value for an item. Types may provide a short-value to displayed
-     * instead of or in addition to the full {@link Item#value}.
-     *
-     * <p>Some display data types will not provide a short value, in which case the return value
-     * will be null.
-     */
-    @JsonGetter("shortValue")
-    @JsonInclude(JsonInclude.Include.NON_NULL)
-    @Nullable
-    public String getShortValue() {
-      return shortValue;
-    }
-
-    /**
-     * Retrieve the optional label for an item. The label is a human-readable description of what
-     * the metadata represents. UIs may choose to display the label instead of the item key.
-     *
-     * <p>If no label was specified, this will return {@code null}.
-     */
-    @JsonGetter("label")
-    @JsonInclude(JsonInclude.Include.NON_NULL)
-    @Nullable
-    public String getLabel() {
-      return label;
-    }
-
-    /**
-     * Retrieve the optional link URL for an item. The URL points to an address where the reader
-     * can find additional context for the display metadata.
-     *
-     * <p>If no URL was specified, this will return {@code null}.
-     */
-    @JsonGetter("linkUrl")
-    @JsonInclude(JsonInclude.Include.NON_NULL)
-    @Nullable
-    public String getLinkUrl() {
-      return url;
-    }
-
-    @Override
-    public String toString() {
-      return getValue();
-    }
-
-    private Item withLabel(String label) {
-      return new Item(this.ns, this.key, this.type, this.value, this.shortValue, this.url, label);
-    }
-
-    private Item withUrl(String url) {
-      return new Item(this.ns, this.key, this.type, this.value, this.shortValue, url, this.label);
-    }
-  }
-
-  /**
-   * Unique identifier for a display metadata item within a component.
-   * Identifiers are composed of the key they are registered with and a namespace generated from
-   * the class of the component which registered the item.
-   *
-   * <p>Display metadata registered with the same key from different components will have different
-   * namespaces and thus will both be represented in the composed {@link DisplayData}. If a
-   * single component registers multiple metadata items with the same key, only the most recent
-   * item will be retained; previous versions are discarded.
-   */
-  public static class Identifier {
-    private final String ns;
-    private final String key;
-
-    static Identifier of(Class<?> namespace, String key) {
-      return new Identifier(namespace.getName(), key);
-    }
-
-    private Identifier(String ns, String key) {
-      this.ns = ns;
-      this.key = key;
-    }
-
-    public String getNamespace() {
-      return ns;
-    }
-
-    public String getKey() {
-      return key;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj instanceof Identifier) {
-        Identifier that = (Identifier) obj;
-        return Objects.equals(this.ns, that.ns)
-          && Objects.equals(this.key, that.key);
-      }
-
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(ns, key);
-    }
-
-    @Override
-    public String toString() {
-      return String.format("%s:%s", ns, key);
-    }
-  }
-
-  /**
-   * Display metadata type.
-   */
-  enum Type {
-    STRING {
-      @Override
-      FormattedItemValue format(Object value) {
-        return new FormattedItemValue((String) value);
-      }
-    },
-    INTEGER {
-      @Override
-      FormattedItemValue format(Object value) {
-        return new FormattedItemValue(Long.toString((long) value));
-      }
-    },
-    FLOAT {
-      @Override
-      FormattedItemValue format(Object value) {
-        return new FormattedItemValue(Double.toString((Double) value));
-      }
-    },
-    TIMESTAMP() {
-      @Override
-      FormattedItemValue format(Object value) {
-        return new FormattedItemValue((TIMESTAMP_FORMATTER.print((Instant) value)));
-      }
-    },
-    DURATION {
-      @Override
-      FormattedItemValue format(Object value) {
-        return new FormattedItemValue(Long.toString(((Duration) value).getMillis()));
-      }
-    },
-    JAVA_CLASS {
-      @Override
-      FormattedItemValue format(Object value) {
-        Class<?> clazz = (Class<?>) value;
-        return new FormattedItemValue(clazz.getName(), clazz.getSimpleName());
-      }
-    };
-
-    /**
-     * Format the display metadata value into a long string representation, and optionally
-     * a shorter representation for display.
-     *
-     * <p>Internal-only. Value objects can be safely cast to the expected Java type.
-     */
-    abstract FormattedItemValue format(Object value);
-  }
-
-  private static class FormattedItemValue {
-    private final String shortValue;
-    private final String longValue;
-
-    private FormattedItemValue(String longValue) {
-      this(longValue, null);
-    }
-
-    private FormattedItemValue(String longValue, String shortValue) {
-      this.longValue = longValue;
-      this.shortValue = shortValue;
-    }
-
-    private String getLongValue () {
-      return this.longValue;
-    }
-
-    private String getShortValue() {
-      return this.shortValue;
-    }
-  }
-
-  private static class InternalBuilder implements ItemBuilder {
-    private final Map<Identifier, Item> entries;
-    private final Set<Object> visited;
-
-    private Class<?> latestNs;
-    private Item latestItem;
-    private Identifier latestIdentifier;
-
-    private InternalBuilder() {
-      this.entries = Maps.newHashMap();
-      this.visited = Sets.newIdentityHashSet();
-    }
-
-    private static InternalBuilder forRoot(HasDisplayData instance) {
-      InternalBuilder builder = new InternalBuilder();
-      builder.include(instance);
-      return builder;
-    }
-
-    @Override
-    public Builder include(HasDisplayData subComponent) {
-      checkNotNull(subComponent);
-      boolean newComponent = visited.add(subComponent);
-      if (newComponent) {
-        Class prevNs = this.latestNs;
-        this.latestNs = subComponent.getClass();
-        subComponent.populateDisplayData(this);
-        this.latestNs = prevNs;
-      }
-
-      return this;
-    }
-
-    @Override
-    public ItemBuilder add(String key, String value) {
-      checkNotNull(value);
-      return addItem(key, Type.STRING, value);
-    }
-
-    @Override
-    public ItemBuilder add(String key, long value) {
-      return addItem(key, Type.INTEGER, value);
-    }
-
-    @Override
-    public ItemBuilder add(String key, double value) {
-      return addItem(key, Type.FLOAT, value);
-    }
-
-    @Override
-    public ItemBuilder add(String key, Instant value) {
-      checkNotNull(value);
-      return addItem(key, Type.TIMESTAMP, value);
-    }
-
-    @Override
-    public ItemBuilder add(String key, Duration value) {
-      checkNotNull(value);
-      return addItem(key, Type.DURATION, value);
-    }
-
-    @Override
-    public ItemBuilder add(String key, Class<?> value) {
-      checkNotNull(value);
-      return addItem(key, Type.JAVA_CLASS, value);
-    }
-
-    private <T> ItemBuilder addItem(String key, Type type, T value) {
-      checkNotNull(key);
-      checkArgument(!key.isEmpty());
-
-      Identifier id = Identifier.of(latestNs, key);
-      if (entries.containsKey(id)) {
-        throw new IllegalArgumentException("DisplayData key already exists. All display data "
-          + "for a component must be registered with a unique key.\nKey: " + id);
-      }
-      Item item = Item.create(id.getNamespace(), key, type, value);
-      entries.put(id, item);
-
-      latestItem = item;
-      latestIdentifier = id;
-
-      return this;
-    }
-
-    @Override
-    public ItemBuilder withLabel(String label) {
-      latestItem = latestItem.withLabel(label);
-      entries.put(latestIdentifier, latestItem);
-      return this;
-    }
-
-    @Override
-    public ItemBuilder withLinkUrl(String url) {
-      latestItem = latestItem.withUrl(url);
-      entries.put(latestIdentifier, latestItem);
-      return this;
-    }
-
-    private DisplayData build() {
-      return new DisplayData(this.entries);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/HasDisplayData.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/HasDisplayData.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/HasDisplayData.java
deleted file mode 100644
index b2eca3d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/display/HasDisplayData.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.display;
-
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-/**
- * Marker interface for {@link PTransform PTransforms} and components used within
- * {@link PTransform PTransforms} to specify display metadata to be used within UIs and diagnostic
- * tools.
- *
- * <p>Display metadata is optional and may be collected during pipeline construction. It should
- * only be used to informational purposes. Tools and components should not assume that display data
- * will always be collected, or that collected display data will always be displayed.
- */
-public interface HasDisplayData {
-  /**
-   * Register display metadata for the given transform or component. Metadata can be registered
-   * directly on the provided builder, as well as via included sub-components.
-   *
-   * <pre>
-   * {@code
-   * @Override
-   * public void populateDisplayData(DisplayData.Builder builder) {
-   *  builder
-   *     .include(subComponent)
-   *     .add("minFilter", 42)
-   *     .add("topic", "projects/myproject/topics/mytopic")
-   *       .withLabel("Pub/Sub Topic")
-   *     .add("serviceInstance", "myservice.com/fizzbang")
-   *       .withLinkUrl("http://www.myservice.com/fizzbang");
-   * }
-   * }
-   * </pre>
-   *
-   * @param builder The builder to populate with display metadata.
-   */
-  void populateDisplayData(DisplayData.Builder builder);
-}

[03/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
deleted file mode 100644
index 58550e4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionTuple.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.common.collect.ImmutableMap;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-/**
- * A {@link PCollectionTuple} is an immutable tuple of
- * heterogeneously-typed {@link PCollection PCollections}, "keyed" by
- * {@link TupleTag TupleTags}. A {@link PCollectionTuple} can be used as the input or
- * output of a
- * {@link PTransform} taking
- * or producing multiple PCollection inputs or outputs that can be of
- * different types, for instance a
- * {@link ParDo} with side
- * outputs.
- *
- * <p>A {@link PCollectionTuple} can be created and accessed like follows:
- * <pre> {@code
- * PCollection<String> pc1 = ...;
- * PCollection<Integer> pc2 = ...;
- * PCollection<Iterable<String>> pc3 = ...;
- *
- * // Create TupleTags for each of the PCollections to put in the
- * // PCollectionTuple (the type of the TupleTag enables tracking the
- * // static type of each of the PCollections in the PCollectionTuple):
- * TupleTag<String> tag1 = new TupleTag<>();
- * TupleTag<Integer> tag2 = new TupleTag<>();
- * TupleTag<Iterable<String>> tag3 = new TupleTag<>();
- *
- * // Create a PCollectionTuple with three PCollections:
- * PCollectionTuple pcs =
- *     PCollectionTuple.of(tag1, pc1)
- *                     .and(tag2, pc2)
- *                     .and(tag3, pc3);
- *
- * // Create an empty PCollectionTuple:
- * Pipeline p = ...;
- * PCollectionTuple pcs2 = PCollectionTuple.empty(p);
- *
- * // Get PCollections out of a PCollectionTuple, using the same tags
- * // that were used to put them in:
- * PCollection<Integer> pcX = pcs.get(tag2);
- * PCollection<String> pcY = pcs.get(tag1);
- * PCollection<Iterable<String>> pcZ = pcs.get(tag3);
- *
- * // Get a map of all PCollections in a PCollectionTuple:
- * Map<TupleTag<?>, PCollection<?>> allPcs = pcs.getAll();
- * } </pre>
- */
-public class PCollectionTuple implements PInput, POutput {
-  /**
-   * Returns an empty {@link PCollectionTuple} that is part of the given {@link Pipeline}.
-   *
-   * <p>A {@link PCollectionTuple} containing additional elements can be created by calling
-   * {@link #and} on the result.
-   */
-  public static PCollectionTuple empty(Pipeline pipeline) {
-    return new PCollectionTuple(pipeline);
-  }
-
-  /**
-   * Returns a singleton {@link PCollectionTuple} containing the given
-   * {@link PCollection} keyed by the given {@link TupleTag}.
-   *
-   * <p>A {@link PCollectionTuple} containing additional elements can be created by calling
-   * {@link #and} on the result.
-   */
-  public static <T> PCollectionTuple of(TupleTag<T> tag, PCollection<T> pc) {
-    return empty(pc.getPipeline()).and(tag, pc);
-  }
-
-  /**
-   * Returns a new {@link PCollectionTuple} that has each {@link PCollection} and
-   * {@link TupleTag} of this {@link PCollectionTuple} plus the given {@link PCollection}
-   * associated with the given {@link TupleTag}.
-   *
-   * <p>The given {@link TupleTag} should not already be mapped to a
-   * {@link PCollection} in this {@link PCollectionTuple}.
-   *
-   * <p>Each {@link PCollection} in the resulting {@link PCollectionTuple} must be
-   * part of the same {@link Pipeline}.
-   */
-  public <T> PCollectionTuple and(TupleTag<T> tag, PCollection<T> pc) {
-    if (pc.getPipeline() != pipeline) {
-      throw new IllegalArgumentException(
-          "PCollections come from different Pipelines");
-    }
-
-    return new PCollectionTuple(pipeline,
-        new ImmutableMap.Builder<TupleTag<?>, PCollection<?>>()
-            .putAll(pcollectionMap)
-            .put(tag, pc)
-            .build());
-  }
-
-  /**
-   * Returns whether this {@link PCollectionTuple} contains a {@link PCollection} with
-   * the given tag.
-   */
-  public <T> boolean has(TupleTag<T> tag) {
-    return pcollectionMap.containsKey(tag);
-  }
-
-  /**
-   * Returns the {@link PCollection} associated with the given {@link TupleTag}
-   * in this {@link PCollectionTuple}. Throws {@link IllegalArgumentException} if there is no
-   * such {@link PCollection}, i.e., {@code !has(tag)}.
-   */
-  public <T> PCollection<T> get(TupleTag<T> tag) {
-    @SuppressWarnings("unchecked")
-    PCollection<T> pcollection = (PCollection<T>) pcollectionMap.get(tag);
-    if (pcollection == null) {
-      throw new IllegalArgumentException(
-          "TupleTag not found in this PCollectionTuple tuple");
-    }
-    return pcollection;
-  }
-
-  /**
-   * Returns an immutable Map from {@link TupleTag} to corresponding
-   * {@link PCollection}, for all the members of this {@link PCollectionTuple}.
-   */
-  public Map<TupleTag<?>, PCollection<?>> getAll() {
-    return pcollectionMap;
-  }
-
-  /**
-   * Like {@link #apply(String, PTransform)} but defaulting to the name
-   * of the {@link PTransform}.
-   *
-   * @return the output of the applied {@link PTransform}
-   */
-  public <OutputT extends POutput> OutputT apply(
-      PTransform<PCollectionTuple, OutputT> t) {
-    return Pipeline.applyTransform(this, t);
-  }
-
-  /**
-   * Applies the given {@link PTransform} to this input {@link PCollectionTuple},
-   * using {@code name} to identify this specific application of the transform.
-   * This name is used in various places, including the monitoring UI, logging,
-   * and to stably identify this application node in the job graph.
-   *
-   * @return the output of the applied {@link PTransform}
-   */
-  public <OutputT extends POutput> OutputT apply(
-      String name, PTransform<PCollectionTuple, OutputT> t) {
-    return Pipeline.applyTransform(name, this, t);
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal details below here.
-
-  Pipeline pipeline;
-  final Map<TupleTag<?>, PCollection<?>> pcollectionMap;
-
-  PCollectionTuple(Pipeline pipeline) {
-    this(pipeline, new LinkedHashMap<TupleTag<?>, PCollection<?>>());
-  }
-
-  PCollectionTuple(Pipeline pipeline,
-                   Map<TupleTag<?>, PCollection<?>> pcollectionMap) {
-    this.pipeline = pipeline;
-    this.pcollectionMap = Collections.unmodifiableMap(pcollectionMap);
-  }
-
-  /**
-   * Returns a {@link PCollectionTuple} with each of the given tags mapping to a new
-   * output {@link PCollection}.
-   *
-   * <p>For use by primitive transformations only.
-   */
-  public static PCollectionTuple ofPrimitiveOutputsInternal(
-      Pipeline pipeline,
-      TupleTagList outputTags,
-      WindowingStrategy<?, ?> windowingStrategy,
-      IsBounded isBounded) {
-    Map<TupleTag<?>, PCollection<?>> pcollectionMap = new LinkedHashMap<>();
-    for (TupleTag<?> outputTag : outputTags.tupleTags) {
-      if (pcollectionMap.containsKey(outputTag)) {
-        throw new IllegalArgumentException(
-            "TupleTag already present in this tuple");
-      }
-
-      // In fact, `token` and `outputCollection` should have
-      // types TypeDescriptor<T> and PCollection<T> for some
-      // unknown T. It is safe to create `outputCollection`
-      // with type PCollection<Object> because it has the same
-      // erasure as the correct type. When a transform adds
-      // elements to `outputCollection` they will be of type T.
-      @SuppressWarnings("unchecked")
-      TypeDescriptor<Object> token = (TypeDescriptor<Object>) outputTag.getTypeDescriptor();
-      PCollection<Object> outputCollection = PCollection
-          .createPrimitiveOutputInternal(pipeline, windowingStrategy, isBounded)
-          .setTypeDescriptorInternal(token);
-
-      pcollectionMap.put(outputTag, outputCollection);
-    }
-    return new PCollectionTuple(pipeline, pcollectionMap);
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-    return pipeline;
-  }
-
-  @Override
-  public Collection<? extends PValue> expand() {
-    return pcollectionMap.values();
-  }
-
-  @Override
-  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
-    int i = 0;
-    for (Map.Entry<TupleTag<?>, PCollection<?>> entry
-             : pcollectionMap.entrySet()) {
-      TupleTag<?> tag = entry.getKey();
-      PCollection<?> pc = entry.getValue();
-      pc.recordAsOutput(transform, tag.getOutName(i));
-      i++;
-    }
-  }
-
-  @Override
-  public void finishSpecifying() {
-    for (PCollection<?> pc : pcollectionMap.values()) {
-      pc.finishSpecifying();
-    }
-  }
-
-  @Override
-  public void finishSpecifyingOutput() {
-    for (PCollection<?> pc : pcollectionMap.values()) {
-      pc.finishSpecifyingOutput();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
deleted file mode 100644
index 515e21b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PCollectionView.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-
-import java.io.Serializable;
-
-/**
- * A {@link PCollectionView PCollectionView&lt;T&gt;} is an immutable view of a {@link PCollection}
- * as a value of type {@code T} that can be accessed
- * as a side input to a {@link ParDo} transform.
- *
- * <p>A {@link PCollectionView} should always be the output of a
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}. It is the joint responsibility of
- * this transform and each {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to implement
- * the view in a runner-specific manner.
- *
- * <p>The most common case is using the {@link View} transforms to prepare a {@link PCollection}
- * for use as a side input to {@link ParDo}. See {@link View#asSingleton()},
- * {@link View#asIterable()}, and {@link View#asMap()} for more detail on specific views
- * available in the SDK.
- *
- * @param <T> the type of the value(s) accessible via this {@link PCollectionView}
- */
-public interface PCollectionView<T> extends PValue, Serializable {
-  /**
-   * A unique identifier, for internal use.
-   */
-  public TupleTag<Iterable<WindowedValue<?>>> getTagInternal();
-
-  /**
-   * For internal use only.
-   */
-  public T fromIterableInternal(Iterable<WindowedValue<?>> contents);
-
-  /**
-   * For internal use only.
-   */
-  public WindowingStrategy<?, ?> getWindowingStrategyInternal();
-
-  /**
-   * For internal use only.
-   */
-  public Coder<Iterable<WindowedValue<?>>> getCoderInternal();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
deleted file mode 100644
index 39a0061..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PDone.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-import java.util.Collection;
-import java.util.Collections;
-
-/**
- * {@link PDone} is the output of a {@link PTransform} that has a trivial result,
- * such as a {@link Write}.
- */
-public class PDone extends POutputValueBase {
-
-  /**
-   * Creates a {@link PDone} in the given {@link Pipeline}.
-   */
-  public static PDone in(Pipeline pipeline) {
-    return new PDone(pipeline);
-  }
-
-  @Override
-  public Collection<? extends PValue> expand() {
-    // A PDone contains no PValues.
-    return Collections.emptyList();
-  }
-
-  private PDone(Pipeline pipeline) {
-    super(pipeline);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
deleted file mode 100644
index 89b097a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PInput.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-
-import java.util.Collection;
-
-/**
- * The interface for things that might be input to a
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.
- */
-public interface PInput {
-  /**
-   * Returns the owning {@link Pipeline} of this {@link PInput}.
-   */
-  public Pipeline getPipeline();
-
-  /**
-   * Expands this {@link PInput} into a list of its component output
-   * {@link PValue PValues}.
-   *
-   * <ul>
-   *   <li>A {@link PValue} expands to itself.</li>
-   *   <li>A tuple or list of {@link PValue PValues} (such as
-   *     {@link PCollectionTuple} or {@link PCollectionList})
-   *     expands to its component {@code PValue PValues}.</li>
-   * </ul>
-   *
-   * <p>Not intended to be invoked directly by user code.
-   */
-  public Collection<? extends PValue> expand();
-
-  /**
-   * <p>After building, finalizes this {@code PInput} to make it ready for
-   * being used as an input to a {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.
-   *
-   * <p>Automatically invoked whenever {@code apply()} is invoked on
-   * this {@code PInput}, so users do not normally call this explicitly.
-   */
-  public void finishSpecifying();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
deleted file mode 100644
index f99bc0b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutput.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-import java.util.Collection;
-
-/**
- * The interface for things that might be output from a {@link PTransform}.
- */
-public interface POutput {
-
-  /**
-   * Returns the owning {@link Pipeline} of this {@link POutput}.
-   */
-  public Pipeline getPipeline();
-
-  /**
-   * Expands this {@link POutput} into a list of its component output
-   * {@link PValue PValues}.
-   *
-   * <ul>
-   *   <li>A {@link PValue} expands to itself.</li>
-   *   <li>A tuple or list of {@link PValue PValues} (such as
-   *     {@link PCollectionTuple} or {@link PCollectionList})
-   *     expands to its component {@code PValue PValues}.</li>
-   * </ul>
-   *
-   * <p>Not intended to be invoked directly by user code.
-   */
-  public Collection<? extends PValue> expand();
-
-  /**
-   * Records that this {@code POutput} is an output of the given
-   * {@code PTransform}.
-   *
-   * <p>For a compound {@code POutput}, it is advised to call
-   * this method on each component {@code POutput}.
-   *
-   * <p>This is not intended to be invoked by user code, but
-   * is automatically invoked as part of applying the
-   * producing {@link PTransform}.
-   */
-  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform);
-
-  /**
-   * As part of applying the producing {@link PTransform}, finalizes this
-   * output to make it ready for being used as an input and for running.
-   *
-   * <p>This includes ensuring that all {@link PCollection PCollections}
-   * have {@link Coder Coders} specified or defaulted.
-   *
-   * <p>Automatically invoked whenever this {@link POutput} is used
-   * as a {@link PInput} to another {@link PTransform}, or if never
-   * used as a {@link PInput}, when {@link Pipeline#run}
-   * is called, so users do not normally call this explicitly.
-   */
-  public void finishSpecifyingOutput();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
deleted file mode 100644
index 69e04c3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/POutputValueBase.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-/**
- * A {@link POutputValueBase} is the abstract base class of
- * {@code PTransform} outputs.
- *
- * <p>A {@link PValueBase} that adds tracking of its producing
- * {@link AppliedPTransform}.
- *
- * <p>For internal use.
- */
-public abstract class POutputValueBase implements POutput {
-
-  private final Pipeline pipeline;
-
-  protected POutputValueBase(Pipeline pipeline) {
-    this.pipeline = pipeline;
-  }
-
-  /**
-   * No-arg constructor for Java serialization only.
-   * The resulting {@link POutputValueBase} is unlikely to be
-   * valid.
-   */
-  protected POutputValueBase() {
-    pipeline = null;
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-    return pipeline;
-  }
-
-  /**
-   * Returns the {@link AppliedPTransform} that this {@link POutputValueBase}
-   * is an output of.
-   *
-   * <p>For internal use only.
-   */
-  public AppliedPTransform<?, ?, ?> getProducingTransformInternal() {
-    return producingTransform;
-  }
-
-  /**
-   * Records that this {@link POutputValueBase} is an output with the
-   * given name of the given {@link AppliedPTransform}.
-   *
-   * <p>To be invoked only by {@link POutput#recordAsOutput}
-   * implementations.  Not to be invoked directly by user code.
-   */
-  @Override
-  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
-    if (producingTransform != null) {
-      // Already used this POutput as a PTransform output.  This can
-      // happen if the POutput is an output of a transform within a
-      // composite transform, and is also the result of the composite.
-      // We want to record the "immediate" atomic transform producing
-      // this output, and ignore all later composite transforms that
-      // also produce this output.
-      //
-      // Pipeline.applyInternal() uses !hasProducingTransform() to
-      // avoid calling this operation redundantly, but
-      // hasProducingTransform() doesn't apply to POutputValueBases
-      // that aren't PValues or composites of PValues, e.g., PDone.
-      return;
-    }
-    producingTransform = transform;
-  }
-
-  /**
-   * Default behavior for {@link #finishSpecifyingOutput()} is
-   * to do nothing. Override if your {@link PValue} requires
-   * finalization.
-   */
-  @Override
-  public void finishSpecifyingOutput() { }
-
-  /**
-   * The {@link PTransform} that produces this {@link POutputValueBase}.
-   */
-  private AppliedPTransform<?, ?, ?> producingTransform;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
deleted file mode 100644
index eb95a23..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValue.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-
-/**
- * The interface for values that can be input to and output from {@link PTransform PTransforms}.
- */
-public interface PValue extends POutput, PInput {
-
-  /**
-   * Returns the name of this {@link PValue}.
-   */
-  public String getName();
-
-  /**
-   * Returns the {@link AppliedPTransform} that this {@link PValue} is an output of.
-   *
-   * <p>For internal use only.
-   */
-  public AppliedPTransform<?, ?, ?> getProducingTransformInternal();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
deleted file mode 100644
index 7e57204..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/PValueBase.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-
-import java.util.Collection;
-import java.util.Collections;
-
-/**
- * A {@link PValueBase} is an abstract base class that provides
- * sensible default implementations for methods of {@link PValue}.
- * In particular, this includes functionality for getting/setting:
- *
- * <ul>
- *   <li> The {@link Pipeline} that the {@link PValue} is part of.</li>
- *   <li> Whether the {@link PValue} has bee finalized (as an input
- *     or an output), after which its properties can no longer be changed.</li>
- * </ul>
- *
- * <p>For internal use.
- */
-public abstract class PValueBase extends POutputValueBase implements PValue {
-  /**
-   * Returns the name of this {@link PValueBase}.
-   *
-   * <p>By default, the name of a {@link PValueBase} is based on the
-   * name of the {@link PTransform} that produces it.  It can be
-   * specified explicitly by calling {@link #setName}.
-   *
-   * @throws IllegalStateException if the name hasn't been set yet
-   */
-  @Override
-  public String getName() {
-    if (name == null) {
-      throw new IllegalStateException("name not set");
-    }
-    return name;
-  }
-
-  /**
-   * Sets the name of this {@link PValueBase}.  Returns {@code this}.
-   *
-   * @throws IllegalStateException if this {@link PValueBase} has
-   * already been finalized and may no longer be set.
-   */
-  public PValueBase setName(String name) {
-    if (finishedSpecifying) {
-      throw new IllegalStateException(
-          "cannot change the name of " + this + " once it's been used");
-    }
-    this.name = name;
-    return this;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  protected PValueBase(Pipeline pipeline) {
-    super(pipeline);
-  }
-
-  /**
-   * No-arg constructor for Java serialization only.
-   * The resulting {@link PValueBase} is unlikely to be
-   * valid.
-   */
-  protected PValueBase() {
-    super();
-  }
-
-  /**
-   * The name of this {@link PValueBase}, or null if not yet set.
-   */
-  private String name;
-
-  /**
-   * Whether this {@link PValueBase} has been finalized, and its core
-   * properties, e.g., name, can no longer be changed.
-   */
-  private boolean finishedSpecifying = false;
-
-  @Override
-  public void recordAsOutput(AppliedPTransform<?, ?, ?> transform) {
-    recordAsOutput(transform, "out");
-  }
-
-  /**
-   * Records that this {@link POutputValueBase} is an output with the
-   * given name of the given {@link AppliedPTransform} in the given
-   * {@link Pipeline}.
-   *
-   * <p>To be invoked only by {@link POutput#recordAsOutput}
-   * implementations.  Not to be invoked directly by user code.
-   */
-  protected void recordAsOutput(AppliedPTransform<?, ?, ?> transform,
-                                String outName) {
-    super.recordAsOutput(transform);
-    if (name == null) {
-      name = transform.getFullName() + "." + outName;
-    }
-  }
-
-  /**
-   * Returns whether this {@link PValueBase} has been finalized, and
-   * its core properties, e.g., name, can no longer be changed.
-   *
-   * <p>For internal use only.
-   */
-  public boolean isFinishedSpecifyingInternal() {
-    return finishedSpecifying;
-  }
-
-  @Override
-  public Collection<? extends PValue> expand() {
-    return Collections.singletonList(this);
-  }
-
-  @Override
-  public void finishSpecifying() {
-    finishSpecifyingOutput();
-    finishedSpecifying = true;
-  }
-
-  @Override
-  public String toString() {
-    return (name == null ? "<unnamed>" : getName())
-        + " [" + getKindString() + "]";
-  }
-
-  /**
-   * Returns a {@link String} capturing the kind of this
-   * {@link PValueBase}.
-   *
-   * <p>By default, uses the base name of the current class as its kind string.
-   */
-  protected String getKindString() {
-    return StringUtils.approximateSimpleName(getClass());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
deleted file mode 100644
index 1085d44..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TimestampedValue.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * An immutable pair of a value and a timestamp.
- *
- * <p>The timestamp of a value determines many properties, such as its assignment to
- * windows and whether the value is late (with respect to the watermark of a {@link PCollection}).
- *
- * @param <V> the type of the value
- */
-public class TimestampedValue<V> {
-
-  /**
-   * Returns a new {@code TimestampedValue} with the given value and timestamp.
-   */
-  public static <V> TimestampedValue<V> of(V value, Instant timestamp) {
-    return new TimestampedValue<>(value, timestamp);
-  }
-
-  public V getValue() {
-    return value;
-  }
-
-  public Instant getTimestamp() {
-    return timestamp;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (!(other instanceof TimestampedValue)) {
-      return false;
-    }
-    TimestampedValue<?> that = (TimestampedValue<?>) other;
-    return Objects.equals(value, that.value) && Objects.equals(timestamp, that.timestamp);
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(value, timestamp);
-  }
-
-  @Override
-  public String toString() {
-    return "TimestampedValue(" + value + ", " + timestamp + ")";
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@link Coder} for {@link TimestampedValue}.
-   */
-  public static class TimestampedValueCoder<T>
-      extends StandardCoder<TimestampedValue<T>> {
-
-    private final Coder<T> valueCoder;
-
-    public static <T> TimestampedValueCoder<T> of(Coder<T> valueCoder) {
-      return new TimestampedValueCoder<>(valueCoder);
-    }
-
-    @JsonCreator
-    public static TimestampedValueCoder<?> of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Object> components) {
-      checkArgument(components.size() == 1,
-                    "Expecting 1 component, got " + components.size());
-      return of((Coder<?>) components.get(0));
-    }
-
-    @SuppressWarnings("unchecked")
-    TimestampedValueCoder(Coder<T> valueCoder) {
-      this.valueCoder = checkNotNull(valueCoder);
-    }
-
-    @Override
-    public void encode(TimestampedValue<T> windowedElem,
-                       OutputStream outStream,
-                       Context context)
-        throws IOException {
-      valueCoder.encode(windowedElem.getValue(), outStream, context.nested());
-      InstantCoder.of().encode(
-          windowedElem.getTimestamp(), outStream, context);
-    }
-
-    @Override
-    public TimestampedValue<T> decode(InputStream inStream, Context context)
-        throws IOException {
-      T value = valueCoder.decode(inStream, context.nested());
-      Instant timestamp = InstantCoder.of().decode(inStream, context);
-      return TimestampedValue.of(value, timestamp);
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "TimestampedValueCoder requires a deterministic valueCoder",
-          valueCoder);
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.<Coder<?>>asList(valueCoder);
-    }
-
-    public static <T> List<Object> getInstanceComponents(TimestampedValue<T> exampleValue) {
-      return Arrays.<Object>asList(exampleValue.getValue());
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private final V value;
-  private final Instant timestamp;
-
-  protected TimestampedValue(V value, Instant timestamp) {
-    this.value = value;
-    this.timestamp = timestamp;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
deleted file mode 100644
index 7494921..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTag.java
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.common.collect.HashMultiset;
-import com.google.common.collect.Multiset;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.Serializable;
-import java.util.Random;
-
-/**
- * A {@link TupleTag} is a typed tag to use as the key of a
- * heterogeneously typed tuple, like {@link PCollectionTuple}.
- * Its generic type parameter allows tracking
- * the static type of things stored in tuples.
- *
- * <p>To aid in assigning default {@link Coder Coders} for results of
- * side outputs of {@link ParDo}, an output
- * {@link TupleTag} should be instantiated with an extra {@code {}} so
- * it is an instance of an anonymous subclass without generic type
- * parameters.  Input {@link TupleTag TupleTags} require no such extra
- * instantiation (although it doesn't hurt).  For example:
- *
- * <pre> {@code
- * TupleTag<SomeType> inputTag = new TupleTag<>();
- * TupleTag<SomeOtherType> outputTag = new TupleTag<SomeOtherType>(){};
- * } </pre>
- *
- * @param <V> the type of the elements or values of the tagged thing,
- * e.g., a {@code PCollection<V>}.
- */
-public class TupleTag<V> implements Serializable {
-  /**
-   * Constructs a new {@code TupleTag}, with a fresh unique id.
-   *
-   * <p>This is the normal way {@code TupleTag}s are constructed.
-   */
-  public TupleTag() {
-    this(genId(), true);
-  }
-
-  /**
-   * Constructs a new {@code TupleTag} with the given id.
-   *
-   * <p>It is up to the user to ensure that two {@code TupleTag}s
-   * with the same id actually mean the same tag and carry the same
-   * generic type parameter.  Violating this invariant can lead to
-   * hard-to-diagnose runtime type errors.  Consequently, this
-   * operation should be used very sparingly, such as when the
-   * producer and consumer of {@code TupleTag}s are written in
-   * separate modules and can only coordinate via ids rather than
-   * shared {@code TupleTag} instances.  Most of the time,
-   * {@link #TupleTag()} should be preferred.
-   */
-  public TupleTag(String id) {
-    this(id, false);
-  }
-
-  /**
-   * Returns the id of this {@code TupleTag}.
-   *
-   * <p>Two {@code TupleTag}s with the same id are considered equal.
-   *
-   * <p>{@code TupleTag}s are not ordered, i.e., the class does not implement
-   * Comparable interface. TupleTags implement equals and hashCode, making them
-   * suitable for use as keys in HashMap and HashSet.
-   */
-  public String getId() {
-    return id;
-  }
-
-  /**
-   * If this {@code TupleTag} is tagging output {@code outputIndex} of
-   * a {@code PTransform}, returns the name that should be used by
-   * default for the output.
-   */
-  public String getOutName(int outIndex) {
-    if (generated) {
-      return "out" + outIndex;
-    } else {
-      return id;
-    }
-  }
-
-  /**
-   * Returns a {@code TypeDescriptor} capturing what is known statically
-   * about the type of this {@code TupleTag} instance's most-derived
-   * class.
-   *
-   * <p>This is useful for a {@code TupleTag} constructed as an
-   * instance of an anonymous subclass with a trailing {@code {}},
-   * e.g., {@code new TupleTag<SomeType>(){}}.
-   */
-  public TypeDescriptor<V> getTypeDescriptor() {
-    return new TypeDescriptor<V>(getClass()) {};
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal details below here.
-
-  static final Random RANDOM = new Random(0);
-  private static final Multiset<String> staticInits = HashMultiset.create();
-
-  final String id;
-  final boolean generated;
-
-  /** Generates and returns a fresh unique id for a TupleTag's id. */
-  static synchronized String genId() {
-    // It is a common pattern to store tags that are shared between the main
-    // program and workers in static variables, but such references are not
-    // serialized as part of the *Fns state.  Fortunately, most such tags
-    // are constructed in static class initializers, e.g.
-    //
-    //     static final TupleTag<T> MY_TAG = new TupleTag<>();
-    //
-    // and class initialization order is well defined by the JVM spec, so in
-    // this case we can assign deterministic ids.
-    StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
-    for (StackTraceElement frame : stackTrace) {
-      if (frame.getMethodName().equals("<clinit>")) {
-        int counter = staticInits.add(frame.getClassName(), 1);
-        return frame.getClassName() + "#" + counter;
-      }
-    }
-    // Otherwise, assume it'll be serialized and choose a random value to reduce
-    // the chance of collision.
-    String nonce = Long.toHexString(RANDOM.nextLong());
-    // [Thread.getStackTrace, TupleTag.getId, TupleTag.<init>, caller, ...]
-    String caller = stackTrace.length >= 4
-        ? stackTrace[3].getClassName() + "." + stackTrace[3].getMethodName()
-            + ":" + stackTrace[3].getLineNumber()
-        : "unknown";
-    return caller + "#" + nonce;
-  }
-
-  @JsonCreator
-  @SuppressWarnings("unused")
-  private static TupleTag<?> fromJson(
-      @JsonProperty(PropertyNames.VALUE) String id,
-      @JsonProperty(PropertyNames.IS_GENERATED) boolean generated) {
-    return new TupleTag<>(id, generated);
-  }
-
-  private TupleTag(String id, boolean generated) {
-    this.id = id;
-    this.generated = generated;
-  }
-
-  public CloudObject asCloudObject() {
-    CloudObject result = CloudObject.forClass(getClass());
-    addString(result, PropertyNames.VALUE, id);
-    addBoolean(result, PropertyNames.IS_GENERATED, generated);
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that instanceof TupleTag) {
-      return this.id.equals(((TupleTag<?>) that).id);
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public int hashCode() {
-    return id.hashCode();
-  }
-
-  @Override
-  public String toString() {
-    return "Tag<" + id + ">";
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
deleted file mode 100644
index f019fc2..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TupleTagList.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.common.collect.ImmutableList;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * A {@link TupleTagList} is an immutable list of heterogeneously
- * typed {@link TupleTag TupleTags}. A {@link TupleTagList} is used, for instance, to
- * specify the tags of the side outputs of a
- * {@link ParDo}.
- *
- * <p>A {@link TupleTagList} can be created and accessed like follows:
- * <pre> {@code
- * TupleTag<String> tag1 = ...;
- * TupleTag<Integer> tag2 = ...;
- * TupleTag<Iterable<String>> tag3 = ...;
- *
- * // Create a TupleTagList with three TupleTags:
- * TupleTagList tags = TupleTagList.of(tag1).and(tag2).and(tag3);
- *
- * // Create an empty TupleTagList:
- * Pipeline p = ...;
- * TupleTagList tags2 = TupleTagList.empty(p);
- *
- * // Get TupleTags out of a TupleTagList, by index (origin 0):
- * TupleTag<?> tagX = tags.get(1);
- * TupleTag<?> tagY = tags.get(0);
- * TupleTag<?> tagZ = tags.get(2);
- *
- * // Get a list of all TupleTags in a TupleTagList:
- * List<TupleTag<?>> allTags = tags.getAll();
- * } </pre>
- */
-public class TupleTagList implements Serializable {
-  /**
-   * Returns an empty {@link TupleTagList}.
-   *
-   * <p>Longer {@link TupleTagList TupleTagLists} can be created by calling
-   * {@link #and} on the result.
-   */
-  public static TupleTagList empty() {
-    return new TupleTagList();
-  }
-
-  /**
-   * Returns a singleton {@link TupleTagList} containing the given {@link TupleTag}.
-   *
-   * <p>Longer {@link TupleTagList TupleTagLists} can be created by calling
-   * {@link #and} on the result.
-   */
-  public static TupleTagList of(TupleTag<?> tag) {
-    return empty().and(tag);
-  }
-
-  /**
-   * Returns a {@link TupleTagList} containing the given {@link TupleTag TupleTags}, in order.
-   *
-   * <p>Longer {@link TupleTagList TupleTagLists} can be created by calling
-   * {@link #and} on the result.
-   */
-  public static TupleTagList of(List<TupleTag<?>> tags) {
-    return empty().and(tags);
-  }
-
-  /**
-   * Returns a new {@link TupleTagList} that has all the {@link TupleTag TupleTags} of
-   * this {@link TupleTagList} plus the given {@link TupleTag} appended to the end.
-   */
-  public TupleTagList and(TupleTag<?> tag) {
-    return new TupleTagList(
-        new ImmutableList.Builder<TupleTag<?>>()
-            .addAll(tupleTags)
-            .add(tag)
-            .build());
-  }
-
-  /**
-   * Returns a new {@link TupleTagList} that has all the {@link TupleTag TupleTags} of
-   * this {@link TupleTagList} plus the given {@link TupleTag TupleTags} appended to the end,
-   * in order.
-   */
-  public TupleTagList and(List<TupleTag<?>> tags) {
-    return new TupleTagList(
-        new ImmutableList.Builder<TupleTag<?>>()
-            .addAll(tupleTags)
-            .addAll(tags)
-            .build());
-  }
-
-  /**
-   * Returns the number of TupleTags in this TupleTagList.
-   */
-  public int size() {
-    return tupleTags.size();
-  }
-
-  /**
-   * Returns the {@link TupleTag} at the given index (origin zero).
-   *
-   * @throws IndexOutOfBoundsException if the index is out of the range
-   * {@code [0..size()-1]}.
-   */
-  public TupleTag<?> get(int index) {
-    return tupleTags.get(index);
-  }
-
-  /**
-   * Returns an immutable List of all the {@link TupleTag TupleTags} in this {@link TupleTagList}.
-   */
-  public List<TupleTag<?>> getAll() {
-    return tupleTags;
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal details below here.
-
-  final List<TupleTag<?>> tupleTags;
-
-  TupleTagList() {
-    this(new ArrayList<TupleTag<?>>());
-  }
-
-  TupleTagList(List<TupleTag<?>> tupleTags) {
-    this.tupleTags = Collections.unmodifiableList(tupleTags);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
deleted file mode 100644
index 559d67c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypeDescriptor.java
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.common.collect.Lists;
-import com.google.common.reflect.Invokable;
-import com.google.common.reflect.Parameter;
-import com.google.common.reflect.TypeToken;
-
-import java.io.Serializable;
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.Type;
-import java.lang.reflect.TypeVariable;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A description of a Java type, including actual generic parameters where possible.
- *
- * <p>To prevent losing actual type arguments due to erasure, create an anonymous subclass
- * with concrete types:
- * <pre>
- * {@code
- * TypeDecriptor<List<String>> = new TypeDescriptor<List<String>>() {};
- * }
- * </pre>
- *
- * <p>If the above were not an anonymous subclass, the type {@code List<String>}
- * would be erased and unavailable at run time.
- *
- * @param <T> the type represented by this {@link TypeDescriptor}
- */
-public abstract class TypeDescriptor<T> implements Serializable {
-
-  // This class is just a wrapper for TypeToken
-  private final TypeToken<T> token;
-
-  /**
-   * Creates a {@link TypeDescriptor} wrapping the provided token.
-   * This constructor is private so Guava types do not leak.
-   */
-  private TypeDescriptor(TypeToken<T> token) {
-    this.token = token;
-  }
-
-  /**
-   * Creates a {@link TypeDescriptor} representing
-   * the type parameter {@code T}. To use this constructor
-   * properly, the type parameter must be a concrete type, for example
-   * {@code new TypeDescriptor<List<String>>(){}}.
-   */
-  protected TypeDescriptor() {
-    token = new TypeToken<T>(getClass()) {};
-  }
-
-  /**
-   * Creates a {@link TypeDescriptor} representing the type parameter {@code T}, which should
-   * resolve to a concrete type in the context of the class {@code clazz}.
-   *
-   * <p>Unlike {@link TypeDescriptor#TypeDescriptor(Class)} this will also use context's of the
-   * enclosing instances while attempting to resolve the type. This means that the types of any
-   * classes instantiated in the concrete instance should be resolvable.
-   */
-  protected TypeDescriptor(Object instance) {
-    TypeToken<?> unresolvedToken = new TypeToken<T>(getClass()) {};
-
-    // While we haven't fully resolved the parameters, refine it using the captured
-    // enclosing instance of the object.
-    unresolvedToken = TypeToken.of(instance.getClass()).resolveType(unresolvedToken.getType());
-
-    if (hasUnresolvedParameters(unresolvedToken.getType())) {
-      for (Field field : instance.getClass().getDeclaredFields()) {
-        Object fieldInstance = getEnclosingInstance(field, instance);
-        if (fieldInstance != null) {
-          unresolvedToken =
-              TypeToken.of(fieldInstance.getClass()).resolveType(unresolvedToken.getType());
-          if (!hasUnresolvedParameters(unresolvedToken.getType())) {
-            break;
-          }
-        }
-      }
-    }
-
-    // Once we've either fully resolved the parameters or exhausted enclosing instances, we have
-    // the best approximation to the token we can get.
-    @SuppressWarnings("unchecked")
-    TypeToken<T> typedToken = (TypeToken<T>) unresolvedToken;
-    token = typedToken;
-  }
-
-  private boolean hasUnresolvedParameters(Type type) {
-    if (type instanceof TypeVariable) {
-      return true;
-    } else if (type instanceof ParameterizedType) {
-      ParameterizedType param = (ParameterizedType) type;
-      for (Type arg : param.getActualTypeArguments()) {
-        if (hasUnresolvedParameters(arg)) {
-          return true;
-        }
-      }
-    }
-    return false;
-  }
-
-  /**
-   * Returns the enclosing instance if the field is synthetic and it is able to access it, or
-   * {@literal null} if not.
-   */
-  @Nullable
-  private Object getEnclosingInstance(Field field, Object instance) {
-    if (!field.isSynthetic()) {
-      return null;
-    }
-
-    boolean accessible = field.isAccessible();
-    try {
-      field.setAccessible(true);
-      return field.get(instance);
-    } catch (IllegalArgumentException | IllegalAccessException e) {
-      // If we fail to get the enclosing instance field, do nothing. In the worst case, we won't
-      // refine the type based on information in this enclosing class -- that is consistent with
-      // previous behavior and is still a correct answer that can be fixed by returning the correct
-      // type descriptor.
-      return null;
-    } finally {
-      field.setAccessible(accessible);
-    }
-  }
-
-  /**
-   * Creates a {@link TypeDescriptor} representing the type parameter
-   * {@code T}, which should resolve to a concrete type in the context
-   * of the class {@code clazz}.
-   */
-  @SuppressWarnings("unchecked")
-  protected TypeDescriptor(Class<?> clazz) {
-    TypeToken<T> unresolvedToken = new TypeToken<T>(getClass()) {};
-    token = (TypeToken<T>) TypeToken.of(clazz).resolveType(unresolvedToken.getType());
-  }
-
-  /**
-   * Returns a {@link TypeDescriptor} representing the given type.
-   */
-  public static <T> TypeDescriptor<T> of(Class<T> type) {
-    return new SimpleTypeDescriptor<>(TypeToken.<T>of(type));
-  }
-
-  /**
-   * Returns a {@link TypeDescriptor} representing the given type.
-   */
-  @SuppressWarnings("unchecked")
-  public static TypeDescriptor<?> of(Type type) {
-    return new SimpleTypeDescriptor<>((TypeToken<Object>) TypeToken.of(type));
-  }
-
-  /**
-   * Returns the {@link Type} represented by this {@link TypeDescriptor}.
-   */
-  public Type getType() {
-    return token.getType();
-  }
-
-  /**
-   * Returns the {@link Class} underlying the {@link Type} represented by
-   * this {@link TypeDescriptor}.
-   */
-  public Class<? super T> getRawType() {
-    return token.getRawType();
-  }
-
-  /**
-   * Returns the component type if this type is an array type,
-   * otherwise returns {@code null}.
-   */
-  public TypeDescriptor<?> getComponentType() {
-    return new SimpleTypeDescriptor<>(token.getComponentType());
-  }
-
-  /**
-   * Returns the generic form of a supertype.
-   */
-  public final TypeDescriptor<? super T> getSupertype(Class<? super T> superclass) {
-    return new SimpleTypeDescriptor<>(token.getSupertype(superclass));
-  }
-
-  /**
-   * Returns true if this type is known to be an array type.
-   */
-  public final boolean isArray() {
-    return token.isArray();
-  }
-
-  /**
-   * Returns a {@link TypeVariable} for the named type parameter. Throws
-   * {@link IllegalArgumentException} if a type variable by the requested type parameter is not
-   * found.
-   *
-   * <p>For example, {@code new TypeDescriptor<List>(){}.getTypeParameter("T")} returns a
-   * {@code TypeVariable<? super List>} representing the formal type parameter {@code T}.
-   *
-   * <p>Do not mistake the type parameters (formal type argument list) with the actual
-   * type arguments. For example, if a class {@code Foo} extends {@code List<String>}, it
-   * does not make sense to ask for a type parameter, because {@code Foo} does not have any.
-   */
-  public final TypeVariable<Class<? super T>> getTypeParameter(String paramName) {
-    // Cannot convert TypeVariable<Class<? super T>>[] to TypeVariable<Class<? super T>>[]
-    // due to how they are used here, so the result of getTypeParameters() cannot be used
-    // without upcast.
-    Class<?> rawType = getRawType();
-    for (TypeVariable<?> param : rawType.getTypeParameters()) {
-      if (param.getName().equals(paramName)) {
-        @SuppressWarnings("unchecked")
-        TypeVariable<Class<? super T>> typedParam = (TypeVariable<Class<? super T>>) param;
-        return typedParam;
-      }
-    }
-     throw new IllegalArgumentException(
-         "No type parameter named " + paramName + " found on " + getRawType());
-  }
-
-  /**
-   * Returns true if this type is assignable from the given type.
-   */
-  public final boolean isSupertypeOf(TypeDescriptor<?> source) {
-    return token.isSupertypeOf(source.token);
-  }
-
-  /**
-   * Return true if this type is a subtype of the given type.
-   */
-  public final boolean isSubtypeOf(TypeDescriptor<?> parent) {
-    return token.isSubtypeOf(parent.token);
-  }
-
-  /**
-   * Returns a list of argument types for the given method, which must
-   * be a part of the class.
-   */
-  public List<TypeDescriptor<?>> getArgumentTypes(Method method) {
-    Invokable<?, ?> typedMethod = token.method(method);
-
-    List<TypeDescriptor<?>> argTypes = Lists.newArrayList();
-    for (Parameter parameter : typedMethod.getParameters()) {
-      argTypes.add(new SimpleTypeDescriptor<>(parameter.getType()));
-    }
-    return argTypes;
-  }
-
-  /**
-   * Returns a {@link TypeDescriptor} representing the given
-   * type, with type variables resolved according to the specialization
-   * in this type.
-   *
-   * <p>For example, consider the following class:
-   * <pre>
-   * {@code
-   * class MyList implements List<String> { ... }
-   * }
-   * </pre>
-   *
-   * <p>The {@link TypeDescriptor} returned by
-   * <pre>
-   * {@code
-   * TypeDescriptor.of(MyList.class)
-   *     .resolveType(Mylist.class.getMethod("get", int.class).getGenericReturnType)
-   * }
-   * </pre>
-   * will represent the type {@code String}.
-   */
-  public TypeDescriptor<?> resolveType(Type type) {
-    return new SimpleTypeDescriptor<>(token.resolveType(type));
-  }
-
-  /**
-   * Returns a set of {@link TypeDescriptor}s, one for each
-   * interface implemented by this class.
-   */
-  @SuppressWarnings("rawtypes")
-  public Iterable<TypeDescriptor> getInterfaces() {
-    List<TypeDescriptor> interfaces = Lists.newArrayList();
-    for (TypeToken<?> interfaceToken : token.getTypes().interfaces()) {
-      interfaces.add(new SimpleTypeDescriptor<>(interfaceToken));
-    }
-    return interfaces;
-  }
-
-  /**
-   * Returns a set of {@link TypeDescriptor}s, one for each
-   * superclass (including this class).
-   */
-  @SuppressWarnings("rawtypes")
-  public Iterable<TypeDescriptor> getClasses() {
-    List<TypeDescriptor> classes = Lists.newArrayList();
-    for (TypeToken<?> classToken : token.getTypes().classes()) {
-      classes.add(new SimpleTypeDescriptor<>(classToken));
-    }
-    return classes;
-  }
-
-  @Override
-  public String toString() {
-    return token.toString();
-  }
-
-  /**
-   * Two type descriptor are equal if and only if they
-   * represent the same type.
-   */
-  @Override
-  public boolean equals(Object other) {
-    if (!(other instanceof TypeDescriptor)) {
-      return false;
-    } else {
-      @SuppressWarnings("unchecked")
-      TypeDescriptor<?> descriptor = (TypeDescriptor<?>) other;
-      return token.equals(descriptor.token);
-    }
-  }
-
-  @Override
-  public int hashCode() {
-    return token.hashCode();
-  }
-
-  /**
-   * A non-abstract {@link TypeDescriptor} for construction directly from an existing
-   * {@link TypeToken}.
-   */
-  private static final class SimpleTypeDescriptor<T> extends TypeDescriptor<T> {
-    SimpleTypeDescriptor(TypeToken<T> typeToken) {
-      super(typeToken);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
deleted file mode 100644
index 29fd639..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/TypedPValue.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.values;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException.ReasonCode;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-
-/**
- * A {@link TypedPValue TypedPValue&lt;T&gt;} is the abstract base class of things that
- * store some number of values of type {@code T}.
- *
- * <p>Because we know the type {@code T}, this is the layer of the inheritance hierarchy where
- * we store a coder for objects of type {@code T}.
- *
- * @param <T> the type of the values stored in this {@link TypedPValue}
- */
-public abstract class TypedPValue<T> extends PValueBase implements PValue {
-
-  /**
-   * Returns the {@link Coder} used by this {@link TypedPValue} to encode and decode
-   * the values stored in it.
-   *
-   * @throws IllegalStateException if the {@link Coder} hasn't been set, and
-   * couldn't be inferred.
-   */
-  public Coder<T> getCoder() {
-    if (coder == null) {
-        coder = inferCoderOrFail();
-    }
-    return coder;
-  }
-
-  /**
-   * Sets the {@link Coder} used by this {@link TypedPValue} to encode and decode the
-   * values stored in it. Returns {@code this}.
-   *
-   * @throws IllegalStateException if this {@link TypedPValue} has already
-   * been finalized and is no longer settable, e.g., by having
-   * {@code apply()} called on it
-   */
-  public TypedPValue<T> setCoder(Coder<T> coder) {
-    if (isFinishedSpecifyingInternal()) {
-      throw new IllegalStateException(
-          "cannot change the Coder of " + this + " once it's been used");
-    }
-    if (coder == null) {
-      throw new IllegalArgumentException(
-          "Cannot setCoder(null)");
-    }
-    this.coder = coder;
-    return this;
-  }
-
-  /**
-   * After building, finalizes this {@link PValue} to make it ready for
-   * running.  Automatically invoked whenever the {@link PValue} is "used"
-   * (e.g., when apply() is called on it) and when the Pipeline is
-   * run (useful if this is a {@link PValue} with no consumers).
-   */
-  @Override
-  public void finishSpecifying() {
-    if (isFinishedSpecifyingInternal()) {
-      return;
-    }
-    super.finishSpecifying();
-    // Ensure that this TypedPValue has a coder by inferring the coder if none exists; If not,
-    // this will throw an exception.
-    getCoder();
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal details below here.
-
-  /**
-   * The {@link Coder} used by this {@link TypedPValue} to encode and decode the
-   * values stored in it, or null if not specified nor inferred yet.
-   */
-  private Coder<T> coder;
-
-  protected TypedPValue(Pipeline p) {
-    super(p);
-  }
-
-  private TypeDescriptor<T> typeDescriptor;
-
-  /**
-   * Returns a {@link TypeDescriptor TypeDescriptor&lt;T&gt;} with some reflective information
-   * about {@code T}, if possible. May return {@code null} if no information
-   * is available. Subclasses may override this to enable better
-   * {@code Coder} inference.
-   */
-  public TypeDescriptor<T> getTypeDescriptor() {
-    return typeDescriptor;
-  }
-
-  /**
-   * Sets the {@link TypeDescriptor TypeDescriptor&lt;T&gt;} associated with this class. Better
-   * reflective type information will lead to better {@link Coder}
-   * inference.
-   */
-  public TypedPValue<T> setTypeDescriptorInternal(TypeDescriptor<T> typeDescriptor) {
-    this.typeDescriptor = typeDescriptor;
-    return this;
-  }
-
-  /**
-   * If the coder is not explicitly set, this sets the coder for
-   * this {@link TypedPValue} to the best coder that can be inferred
-   * based upon the known {@link TypeDescriptor}. By default, this is null,
-   * but can and should be improved by subclasses.
-   */
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  private Coder<T> inferCoderOrFail() {
-    // First option for a coder: use the Coder set on this PValue.
-    if (coder != null) {
-      return coder;
-    }
-
-    AppliedPTransform<?, ?, ?> application = getProducingTransformInternal();
-
-    // Second option for a coder: Look in the coder registry.
-    CoderRegistry registry = getPipeline().getCoderRegistry();
-    TypeDescriptor<T> token = getTypeDescriptor();
-    CannotProvideCoderException inferFromTokenException = null;
-    if (token != null) {
-      try {
-          return registry.getDefaultCoder(token);
-      } catch (CannotProvideCoderException exc) {
-        inferFromTokenException = exc;
-        // Attempt to detect when the token came from a TupleTag used for a ParDo side output,
-        // and provide a better error message if so. Unfortunately, this information is not
-        // directly available from the TypeDescriptor, so infer based on the type of the PTransform
-        // and the error message itself.
-        if (application.getTransform() instanceof ParDo.BoundMulti
-            && exc.getReason() == ReasonCode.TYPE_ERASURE) {
-          inferFromTokenException = new CannotProvideCoderException(exc.getMessage()
-              + " If this error occurs for a side output of the producing ParDo, verify that the "
-              + "TupleTag for this output is constructed with proper type information (see "
-              + "TupleTag Javadoc) or explicitly set the Coder to use if this is not possible.");
-        }
-      }
-    }
-
-    // Third option for a coder: use the default Coder from the producing PTransform.
-    CannotProvideCoderException inputCoderException;
-    try {
-      return ((PTransform) application.getTransform()).getDefaultOutputCoder(
-          application.getInput(), this);
-    } catch (CannotProvideCoderException exc) {
-      inputCoderException = exc;
-    }
-
-    // Build up the error message and list of causes.
-    StringBuilder messageBuilder = new StringBuilder()
-        .append("Unable to return a default Coder for ").append(this)
-        .append(". Correct one of the following root causes:");
-
-    // No exception, but give the user a message about .setCoder() has not been called.
-    messageBuilder.append("\n  No Coder has been manually specified; ")
-        .append(" you may do so using .setCoder().");
-
-    if (inferFromTokenException != null) {
-      messageBuilder
-          .append("\n  Inferring a Coder from the CoderRegistry failed: ")
-          .append(inferFromTokenException.getMessage());
-    }
-
-    if (inputCoderException != null) {
-      messageBuilder
-          .append("\n  Using the default output Coder from the producing PTransform failed: ")
-          .append(inputCoderException.getMessage());
-    }
-
-    // Build and throw the exception.
-    throw new IllegalStateException(messageBuilder.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
deleted file mode 100644
index b8ca756..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/values/package-info.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines {@link com.google.cloud.dataflow.sdk.values.PCollection} and other classes for
- * representing data in a {@link com.google.cloud.dataflow.sdk.Pipeline}.
- *
- * <p>In particular, see these collection abstractions:
- *
- * <ul>
- *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollection} - an immutable collection of
- *     values of type {@code T} and the main representation for data in Dataflow.</li>
- *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollectionView} - an immutable view of a
- *     {@link com.google.cloud.dataflow.sdk.values.PCollection} that can be accessed as a
- *     side input of a {@link com.google.cloud.dataflow.sdk.transforms.ParDo}
- *     {@link com.google.cloud.dataflow.sdk.transforms.PTransform}.</li>
- *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollectionTuple} - a heterogeneous tuple of
- *     {@link com.google.cloud.dataflow.sdk.values.PCollection PCollections}
- *     used in cases where a {@link com.google.cloud.dataflow.sdk.transforms.PTransform} takes
- *     or returns multiple
- *     {@link com.google.cloud.dataflow.sdk.values.PCollection PCollections}.</li>
- *   <li>{@link com.google.cloud.dataflow.sdk.values.PCollectionList} - a homogeneous list of
- *     {@link com.google.cloud.dataflow.sdk.values.PCollection PCollections} used, for example,
- *     as input to {@link com.google.cloud.dataflow.sdk.transforms.Flatten}.</li>
- * </ul>
- *
- * <p>And these classes for individual values play particular roles in Dataflow:
- *
- * <ul>
- *   <li>{@link com.google.cloud.dataflow.sdk.values.KV} - a key/value pair that is used by
- *     keyed transforms, most notably {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}.
- *     </li>
- *   <li>{@link com.google.cloud.dataflow.sdk.values.TimestampedValue} - a timestamp/value pair
- *     that is used for windowing and handling out-of-order data in streaming execution.</li>
- * </ul>
- *
- * <p>For further details, see the documentation for each class in this package.
- */
-package com.google.cloud.dataflow.sdk.values;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/proto/README.md
----------------------------------------------------------------------
diff --git a/sdk/src/main/proto/README.md b/sdk/src/main/proto/README.md
deleted file mode 100644
index fa4e925..0000000
--- a/sdk/src/main/proto/README.md
+++ /dev/null
@@ -1,27 +0,0 @@
-## Protocol Buffers in Google Cloud Dataflow
-
-This directory contains the Protocol Buffer messages used in Google Cloud
-Dataflow.
-
-They aren't, however, used during the Maven build process, and are included here
-for completeness only. Instead, the following artifact on Maven Central contains
-the binary version of the generated code from these Protocol Buffers:
-
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>LATEST</version>
-    </dependency>
-
-Please follow this process for testing changes:
-
-* Make changes to the Protocol Buffer messages in this directory.
-* Use `protoc` to generate the new code, and compile it into a new Java library.
-* Install that Java library into your local Maven repository.
-* Update SDK's `pom.xml` to pick up the newly installed library, instead of
-downloading it from Maven Central.
-
-Once the changes are ready for submission, please separate them into two
-commits. The first commit should update the Protocol Buffer messages only. After
-that, we need to update the generated artifact on Maven Central. Finally,
-changes that make use of the Protocol Buffer changes may be committed.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/proto/proto2_coder_test_messages.proto
----------------------------------------------------------------------
diff --git a/sdk/src/main/proto/proto2_coder_test_messages.proto b/sdk/src/main/proto/proto2_coder_test_messages.proto
deleted file mode 100644
index eb3c3df..0000000
--- a/sdk/src/main/proto/proto2_coder_test_messages.proto
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/*
- * Protocol Buffer messages used for testing Proto2Coder implementation.
- */
-
-syntax = "proto2";
-
-package proto2_coder_test_messages;
-
-option java_package = "com.google.cloud.dataflow.sdk.coders";
-
-message MessageA {
-  optional string field1 = 1;
-  repeated MessageB field2 = 2;
-}
-
-message MessageB {
-  optional bool field1 = 1;
-}
-
-message MessageC {
-  extensions 100 to 105;
-}
-
-extend MessageC {
-  optional MessageA field1 = 101;
-  optional MessageB field2 = 102;
-}
-
-message MessageWithMap {
-  map<string, MessageA> field1 = 1;
-}
-
-message ReferencesMessageWithMap {
-  repeated MessageWithMap field1 = 1;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties
----------------------------------------------------------------------
diff --git a/sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties b/sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties
deleted file mode 100644
index 5b0a720..0000000
--- a/sdk/src/main/resources/com/google/cloud/dataflow/sdk/sdk.properties
+++ /dev/null
@@ -1,5 +0,0 @@
-# SDK source version.
-version=${pom.version}
-
-build.date=${timestamp}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
deleted file mode 100644
index ad21072..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/DataflowMatchers.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk;
-
-import com.google.protobuf.ByteString;
-
-import org.hamcrest.Description;
-import org.hamcrest.TypeSafeMatcher;
-
-import java.io.Serializable;
-
-/**
- * Matchers that are useful when writing Dataflow tests.
- */
-public class DataflowMatchers {
-  /**
-   * Matcher for {@link ByteString} that prints the strings in UTF8.
-   */
-  public static class ByteStringMatcher extends TypeSafeMatcher<ByteString>
-      implements Serializable {
-    private ByteString expected;
-    private ByteStringMatcher(ByteString expected) {
-      this.expected = expected;
-    }
-
-    public static ByteStringMatcher byteStringEq(ByteString expected) {
-      return new ByteStringMatcher(expected);
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description
-          .appendText("ByteString(")
-          .appendText(expected.toStringUtf8())
-          .appendText(")");
-    }
-
-    @Override
-    public void describeMismatchSafely(ByteString actual, Description description) {
-      description
-          .appendText("was ByteString(")
-          .appendText(actual.toStringUtf8())
-          .appendText(")");
-    }
-
-    @Override
-    protected boolean matchesSafely(ByteString actual) {
-      return actual.equals(expected);
-    }
-  }
-}

[56/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
new file mode 100644
index 0000000..3deff2a
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.cookbook.MaxPerKeyExamples.ExtractTempFn;
+import com.google.cloud.dataflow.examples.cookbook.MaxPerKeyExamples.FormatMaxesFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.common.collect.ImmutableList;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/** Unit tests for {@link MaxPerKeyExamples}. */
+@RunWith(JUnit4.class)
+public class MaxPerKeyExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+        .set("month", "6").set("day", "21")
+        .set("year", "2014").set("mean_temp", "85.3")
+        .set("tornado", true);
+  private static final TableRow row2 = new TableRow()
+        .set("month", "7").set("day", "20")
+        .set("year", "2014").set("mean_temp", "75.4")
+        .set("tornado", false);
+  private static final TableRow row3 = new TableRow()
+        .set("month", "6").set("day", "18")
+        .set("year", "2014").set("mean_temp", "45.3")
+        .set("tornado", true);
+  private static final List<TableRow> TEST_ROWS = ImmutableList.of(row1, row2, row3);
+
+  private static final KV<Integer, Double> kv1 = KV.of(6, 85.3);
+  private static final KV<Integer, Double> kv2 = KV.of(6, 45.3);
+  private static final KV<Integer, Double> kv3 = KV.of(7, 75.4);
+
+  private static final List<KV<Integer, Double>> TEST_KVS = ImmutableList.of(kv1, kv2, kv3);
+
+  private static final TableRow resultRow1 = new TableRow()
+      .set("month", 6)
+      .set("max_mean_temp", 85.3);
+  private static final TableRow resultRow2 = new TableRow()
+      .set("month", 7)
+      .set("max_mean_temp", 75.4);
+
+
+  @Test
+  public void testExtractTempFn() {
+    DoFnTester<TableRow, KV<Integer, Double>> extractTempFn =
+        DoFnTester.of(new ExtractTempFn());
+    List<KV<Integer, Double>> results = extractTempFn.processBatch(TEST_ROWS);
+    Assert.assertThat(results, CoreMatchers.hasItem(kv1));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv2));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv3));
+  }
+
+  @Test
+  public void testFormatMaxesFn() {
+    DoFnTester<KV<Integer, Double>, TableRow> formatMaxesFnFn =
+        DoFnTester.of(new FormatMaxesFn());
+    List<TableRow> results = formatMaxesFnFn.processBatch(TEST_KVS);
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
new file mode 100644
index 0000000..209ea52
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.cookbook.TriggerExample.ExtractFlowInfo;
+import com.google.cloud.dataflow.examples.cookbook.TriggerExample.TotalFlow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Unit Tests for {@link TriggerExample}.
+ * The results generated by triggers are by definition non-deterministic and hence hard to test.
+ * The unit test does not test all aspects of the example.
+ */
+@RunWith(JUnit4.class)
+public class TriggerExampleTest {
+
+  private static final String[] INPUT =
+    {"01/01/2010 00:00:00,1108302,94,E,ML,36,100,29,0.0065,66,9,1,0.001,74.8,1,9,3,0.0028,71,1,9,"
+        + "12,0.0099,67.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,,,0", "01/01/2010 00:00:00,"
+            + "1100333,5,N,FR,9,0,39,,,9,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,"};
+
+  private static final List<TimestampedValue<String>> TIME_STAMPED_INPUT = Arrays.asList(
+      TimestampedValue.of("01/01/2010 00:00:00,1108302,5,W,ML,36,100,30,0.0065,66,9,1,0.001,"
+          + "74.8,1,9,3,0.0028,71,1,9,12,0.0099,87.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,"
+          + ",,0", new Instant(60000)),
+      TimestampedValue.of("01/01/2010 00:00:00,1108302,110,E,ML,36,100,40,0.0065,66,9,1,0.001,"
+          + "74.8,1,9,3,0.0028,71,1,9,12,0.0099,67.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,"
+          + ",,0", new Instant(1)),
+      TimestampedValue.of("01/01/2010 00:00:00,1108302,110,E,ML,36,100,50,0.0065,66,9,1,"
+          + "0.001,74.8,1,9,3,0.0028,71,1,9,12,0.0099,97.4,1,9,13,0.0121,50.0,1,,,,,0,,,,,0"
+          + ",,,,,0,,,,,0", new Instant(1)));
+
+  private static final TableRow OUT_ROW_1 = new TableRow()
+      .set("trigger_type", "default")
+      .set("freeway", "5").set("total_flow", 30)
+      .set("number_of_records", 1)
+      .set("isFirst", true).set("isLast", true)
+      .set("timing", "ON_TIME")
+      .set("window", "[1970-01-01T00:01:00.000Z..1970-01-01T00:02:00.000Z)");
+
+  private static final TableRow OUT_ROW_2 = new TableRow()
+      .set("trigger_type", "default")
+      .set("freeway", "110").set("total_flow", 90)
+      .set("number_of_records", 2)
+      .set("isFirst", true).set("isLast", true)
+      .set("timing", "ON_TIME")
+      .set("window", "[1970-01-01T00:00:00.000Z..1970-01-01T00:01:00.000Z)");
+
+  @Test
+  public void testExtractTotalFlow() {
+    DoFnTester<String, KV<String, Integer>> extractFlowInfow = DoFnTester
+        .of(new ExtractFlowInfo());
+
+    List<KV<String, Integer>> results = extractFlowInfow.processBatch(INPUT);
+    Assert.assertEquals(results.size(), 1);
+    Assert.assertEquals(results.get(0).getKey(), "94");
+    Assert.assertEquals(results.get(0).getValue(), new Integer(29));
+
+    List<KV<String, Integer>> output = extractFlowInfow.processBatch("");
+    Assert.assertEquals(output.size(), 0);
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTotalFlow () {
+    Pipeline pipeline = TestPipeline.create();
+    PCollection<KV<String, Integer>> flow = pipeline
+        .apply(Create.timestamped(TIME_STAMPED_INPUT))
+        .apply(ParDo.of(new ExtractFlowInfo()));
+
+    PCollection<TableRow> totalFlow = flow
+        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(1))))
+        .apply(new TotalFlow("default"));
+
+    PCollection<TableRow> results =  totalFlow.apply(ParDo.of(new FormatResults()));
+
+
+    DataflowAssert.that(results).containsInAnyOrder(OUT_ROW_1, OUT_ROW_2);
+    pipeline.run();
+
+  }
+
+  static class FormatResults extends DoFn<TableRow, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      TableRow element = c.element();
+      TableRow row = new TableRow()
+          .set("trigger_type", element.get("trigger_type"))
+          .set("freeway", element.get("freeway"))
+          .set("total_flow", element.get("total_flow"))
+          .set("number_of_records", element.get("number_of_records"))
+          .set("isFirst", element.get("isFirst"))
+          .set("isLast", element.get("isLast"))
+          .set("timing", element.get("timing"))
+          .set("window", element.get("window"));
+      c.output(row);
+    }
+  }
+}
+
+

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
deleted file mode 100644
index 8b17dfe..0000000
--- a/examples/pom.xml
+++ /dev/null
@@ -1,394 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>parent</artifactId>
-    <version>0.1.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>java-examples-all</artifactId>
-  <name>Apache Beam :: Examples :: Java All</name>
-  <description>Apache Beam SDK provides a simple, Java-based
-  interface for processing virtually any size data. This
-  artifact includes all Apache Beam Java SDK examples.</description>
-
-  <packaging>jar</packaging>
-
-  <profiles>
-    <profile>
-      <id>DataflowPipelineTests</id>
-      <properties>
-        <runIntegrationTestOnService>true</runIntegrationTestOnService>
-        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
-        <testParallelValue>both</testParallelValue>
-      </properties>
-    </profile>
-  </profiles>
-
-  <build>
-    <plugins>
-      <plugin>
-        <artifactId>maven-compiler-plugin</artifactId>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>2.12</version>
-        <dependencies>
-          <dependency>
-            <groupId>com.puppycrawl.tools</groupId>
-            <artifactId>checkstyle</artifactId>
-            <version>6.6</version>
-          </dependency>
-        </dependencies>
-        <configuration>
-          <configLocation>../checkstyle.xml</configLocation>
-          <consoleOutput>true</consoleOutput>
-          <failOnViolation>true</failOnViolation>
-          <includeTestSourceDirectory>true</includeTestSourceDirectory>
-          <includeResources>false</includeResources>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>check</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Source plugin for generating source and test-source JARs. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <version>2.4</version>
-        <executions>
-          <execution>
-            <id>attach-sources</id>
-            <phase>compile</phase>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>attach-test-sources</id>
-            <phase>test-compile</phase>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-javadoc-plugin</artifactId>
-        <configuration>
-          <windowtitle>Apache Beam Examples</windowtitle>
-          <doctitle>Apache Beam Examples</doctitle>
-
-          <subpackages>com.google.cloud.dataflow.examples</subpackages>
-          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util ${dataflow.javadoc_opts}</additionalparam>
-          <use>false</use>
-          <quiet>true</quiet>
-          <bottom><![CDATA[<br>]]></bottom>
-
-          <offlineLinks>
-            <!-- The Dataflow SDK docs -->
-            <offlineLink>
-              <url>https://cloud.google.com/dataflow/java-sdk/JavaDoc/</url>
-              <location>${basedir}/../javadoc/dataflow-sdk-docs</location>
-            </offlineLink>
-            <!-- Other dependencies -->
-            <offlineLink>
-              <url>https://developers.google.com/api-client-library/java/google-api-java-client/reference/1.20.0/</url>
-              <location>${basedir}/../javadoc/apiclient-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://avro.apache.org/docs/1.7.7/api/java/</url>
-              <location>${basedir}/../javadoc/avro-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/</url>
-              <location>${basedir}/../javadoc/bq-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>https://cloud.google.com/datastore/docs/apis/javadoc/</url>
-              <location>${basedir}/../javadoc/datastore-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://docs.guava-libraries.googlecode.com/git-history/release18/javadoc/</url>
-              <location>${basedir}/../javadoc/guava-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.7/</url>
-              <location>${basedir}/../javadoc/jackson-annotations-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.7/</url>
-              <location>${basedir}/../javadoc/jackson-databind-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://www.joda.org/joda-time/apidocs</url>
-              <location>${basedir}/../javadoc/joda-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>https://developers.google.com/api-client-library/java/google-oauth-java-client/reference/1.20.0/</url>
-              <location>${basedir}/../javadoc/oauth-docs</location>
-            </offlineLink>
-          </offlineLinks>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-            <phase>package</phase>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <version>2.4.1</version>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
-              <artifactSet>
-                <includes>
-                  <include>*:*</include>
-                </includes>
-              </artifactSet>
-              <filters>
-                <filter>
-                  <artifact>*:*</artifact>
-                  <excludes>
-                    <exclude>META-INF/*.SF</exclude>
-                    <exclude>META-INF/*.DSA</exclude>
-                    <exclude>META-INF/*.RSA</exclude>
-                  </excludes>
-                </filter>
-              </filters>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>default-jar</id>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>default-test-jar</id>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Coverage analysis for unit tests. -->
-      <plugin>
-        <groupId>org.jacoco</groupId>
-        <artifactId>jacoco-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>java-sdk-all</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.api-client</groupId>
-      <artifactId>google-api-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-dataflow</artifactId>
-      <version>${dataflow.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-bigquery</artifactId>
-      <version>${bigquery.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-      <version>${avro.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-datastore-protobuf</artifactId>
-      <version>${datastore.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-pubsub</artifactId>
-      <version>${pubsub.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>${guava.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-      <version>${jsr305.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>${joda.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <version>${slf4j.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>${slf4j.version}</version>
-      <scope>runtime</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>javax.servlet</groupId>
-      <artifactId>javax.servlet-api</artifactId>
-      <version>3.1.0</version>
-    </dependency>
-
-    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
-         which is used in the main code of DebuggingWordCount example. -->
-
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <version>${hamcrest.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>${junit.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <version>1.10.19</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
deleted file mode 100644
index 8823dbc..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.examples.WordCount.WordCountOptions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Pattern;
-
-
-/**
- * An example that verifies word counts in Shakespeare and includes Dataflow best practices.
- *
- * <p>This class, {@link DebuggingWordCount}, is the third in a series of four successively more
- * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
- * and {@link WordCount}. After you've looked at this example, then see the
- * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
- *
- * <p>Basic concepts, also in the MinimalWordCount and WordCount examples:
- * Reading text files; counting a PCollection; executing a Pipeline both locally
- * and using the Dataflow service; defining DoFns.
- *
- * <p>New Concepts:
- * <pre>
- *   1. Logging to Cloud Logging
- *   2. Controlling Dataflow worker log levels
- *   3. Creating a custom aggregator
- *   4. Testing your Pipeline via DataflowAssert
- * </pre>
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- *
- * <p>To execute this pipeline using the Dataflow service and the additional logging discussed
- * below, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
- * }
- * </pre>
- *
- * <p>Note that when you run via <code>mvn exec</code>, you may need to escape
- * the quotations as appropriate for your shell. For example, in <code>bash</code>:
- * <pre>
- * mvn compile exec:java ... \
- *   -Dexec.args="... \
- *     --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
- * </pre>
- *
- * <p>Concept #2: Dataflow workers which execute user code are configured to log to Cloud
- * Logging by default at "INFO" log level and higher. One may override log levels for specific
- * logging namespaces by specifying:
- * <pre><code>
- *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
- * </code></pre>
- * For example, by specifying:
- * <pre><code>
- *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
- * </code></pre>
- * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
- * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
- * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
- * logging configuration can be overridden by specifying
- * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
- * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
- * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
- * that changing the default worker log level to TRACE or DEBUG will significantly increase
- * the amount of logs output.
- *
- * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
- * overridden with {@code --inputFile}.
- */
-public class DebuggingWordCount {
-  /** A DoFn that filters for a specific key based upon a regular expression. */
-  public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
-    /**
-     * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
-     * as the logger. All log statements emitted by this logger will be referenced by this name
-     * and will be visible in the Cloud Logging UI. Learn more at https://cloud.google.com/logging
-     * about the Cloud Logging UI.
-     */
-    private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class);
-
-    private final Pattern filter;
-    public FilterTextFn(String pattern) {
-      filter = Pattern.compile(pattern);
-    }
-
-    /**
-     * Concept #3: A custom aggregator can track values in your pipeline as it runs. Those
-     * values will be displayed in the Dataflow Monitoring UI when this pipeline is run using the
-     * Dataflow service. These aggregators below track the number of matched and unmatched words.
-     * Learn more at https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf about
-     * the Dataflow Monitoring UI.
-     */
-    private final Aggregator<Long, Long> matchedWords =
-        createAggregator("matchedWords", new Sum.SumLongFn());
-    private final Aggregator<Long, Long> unmatchedWords =
-        createAggregator("umatchedWords", new Sum.SumLongFn());
-
-    @Override
-    public void processElement(ProcessContext c) {
-      if (filter.matcher(c.element().getKey()).matches()) {
-        // Log at the "DEBUG" level each element that we match. When executing this pipeline
-        // using the Dataflow service, these log lines will appear in the Cloud Logging UI
-        // only if the log level is set to "DEBUG" or lower.
-        LOG.debug("Matched: " + c.element().getKey());
-        matchedWords.addValue(1L);
-        c.output(c.element());
-      } else {
-        // Log at the "TRACE" level each element that is not matched. Different log levels
-        // can be used to control the verbosity of logging providing an effective mechanism
-        // to filter less important information.
-        LOG.trace("Did not match: " + c.element().getKey());
-        unmatchedWords.addValue(1L);
-      }
-    }
-  }
-
-  public static void main(String[] args) {
-    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
-      .as(WordCountOptions.class);
-    Pipeline p = Pipeline.create(options);
-
-    PCollection<KV<String, Long>> filteredWords =
-        p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
-         .apply(new WordCount.CountWords())
-         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
-
-    /**
-     * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
-     * Hamcrest's collection matchers that can be used when writing Pipeline level tests
-     * to validate the contents of PCollections. DataflowAssert is best used in unit tests
-     * with small data sets but is demonstrated here as a teaching tool.
-     *
-     * <p>Below we verify that the set of filtered words matches our expected counts. Note
-     * that DataflowAssert does not provide any output and that successful completion of the
-     * Pipeline implies that the expectations were met. Learn more at
-     * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
-     * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
-     */
-    List<KV<String, Long>> expectedResults = Arrays.asList(
-        KV.of("Flourish", 3L),
-        KV.of("stomach", 1L));
-    DataflowAssert.that(filteredWords).containsInAnyOrder(expectedResults);
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
deleted file mode 100644
index 4ed0520..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCount.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-
-/**
- * An example that counts words in Shakespeare.
- *
- * <p>This class, {@link MinimalWordCount}, is the first in a series of four successively more
- * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
- * argument processing, and focus on construction of the pipeline, which chains together the
- * application of core transforms.
- *
- * <p>Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
- * the {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
- * concepts.
- *
- * <p>Concepts:
- * <pre>
- *   1. Reading data from text files
- *   2. Specifying 'inline' transforms
- *   3. Counting a PCollection
- *   4. Writing data to Cloud Storage as text files
- * </pre>
- *
- * <p>To execute this pipeline, first edit the code to set your project ID, the staging
- * location, and the output location. The specified GCS bucket(s) must already exist.
- *
- * <p>Then, run the pipeline as described in the README. It will be deployed and run using the
- * Dataflow service. No args are required to run the pipeline. You can see the results in your
- * output bucket in the GCS browser.
- */
-public class MinimalWordCount {
-
-  public static void main(String[] args) {
-    // Create a DataflowPipelineOptions object. This object lets us set various execution
-    // options for our pipeline, such as the associated Cloud Platform project and the location
-    // in Google Cloud Storage to stage files.
-    DataflowPipelineOptions options = PipelineOptionsFactory.create()
-      .as(DataflowPipelineOptions.class);
-    options.setRunner(BlockingDataflowPipelineRunner.class);
-    // CHANGE 1/3: Your project ID is required in order to run your pipeline on the Google Cloud.
-    options.setProject("SET_YOUR_PROJECT_ID_HERE");
-    // CHANGE 2/3: Your Google Cloud Storage path is required for staging local files.
-    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
-
-    // Create the Pipeline object with the options we defined above.
-    Pipeline p = Pipeline.create(options);
-
-    // Apply the pipeline's transforms.
-
-    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
-    // of input text files. TextIO.Read returns a PCollection where each element is one line from
-    // the input text (a set of Shakespeare's texts).
-    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
-     // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
-     // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
-     // The ParDo returns a PCollection<String>, where each element is an individual word in
-     // Shakespeare's collected texts.
-     .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
-                       @Override
-                       public void processElement(ProcessContext c) {
-                         for (String word : c.element().split("[^a-zA-Z']+")) {
-                           if (!word.isEmpty()) {
-                             c.output(word);
-                           }
-                         }
-                       }
-                     }))
-     // Concept #3: Apply the Count transform to our PCollection of individual words. The Count
-     // transform returns a new PCollection of key/value pairs, where each key represents a unique
-     // word in the text. The associated value is the occurrence count for that word.
-     .apply(Count.<String>perElement())
-     // Apply a MapElements transform that formats our PCollection of word counts into a printable
-     // string, suitable for writing to an output file.
-     .apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {
-                       @Override
-                       public String apply(KV<String, Long> input) {
-                         return input.getKey() + ": " + input.getValue();
-                       }
-                     }))
-     // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
-     // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
-     // formatted strings) to a series of text files in Google Cloud Storage.
-     // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to.
-     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
-
-    // Run the pipeline.
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
deleted file mode 100644
index 2adac55..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WindowedWordCount.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-
-/**
- * An example that counts words in text, and can run over either unbounded or bounded input
- * collections.
- *
- * <p>This class, {@link WindowedWordCount}, is the last in a series of four successively more
- * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
- * {@link WordCount}, and {@link DebuggingWordCount}.
- *
- * <p>Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
- * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
- * and using the Dataflow service; defining DoFns; creating a custom aggregator;
- * user-defined PTransforms; defining PipelineOptions.
- *
- * <p>New Concepts:
- * <pre>
- *   1. Unbounded and bounded pipeline input modes
- *   2. Adding timestamps to data
- *   3. PubSub topics as sources
- *   4. Windowing
- *   5. Re-using PTransforms over windowed PCollections
- *   6. Writing to BigQuery
- * </pre>
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- *
- * <p>Optionally specify the input file path via:
- * {@code --inputFile=gs://INPUT_PATH},
- * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
- *
- * <p>Specify an output BigQuery dataset and optionally, a table for the output. If you don't
- * specify the table, one will be created for you using the job name. If you don't specify the
- * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
- * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
- *
- * <p>Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
- * 'unbounded' input (such as a PubSub topic). To run with unbounded input, set
- * {@code --unbounded=true}. Then, optionally specify the Google Cloud PubSub topic to read from
- * via {@code --pubsubTopic=projects/PROJECT_ID/topics/YOUR_TOPIC_NAME}. If the topic does not
- * exist, the pipeline will create one for you. It will delete this topic when it terminates.
- * The pipeline will automatically launch an auxiliary batch pipeline to populate the given PubSub
- * topic with the contents of the {@code --inputFile}, in order to make the example easy to run.
- * If you want to use an independently-populated PubSub topic, indicate this by setting
- * {@code --inputFile=""}. In that case, the auxiliary pipeline will not be started.
- *
- * <p>By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
- * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
- * for 10-minute windows.
- */
-public class WindowedWordCount {
-    private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
-    static final int WINDOW_SIZE = 1;  // Default window duration in minutes
-
-  /**
-   * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
-   * this example, for the bounded data case.
-   *
-   * <p>Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
-   * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
-   * 2-hour period.
-   */
-  static class AddTimestampFn extends DoFn<String, String> {
-    private static final long RAND_RANGE = 7200000; // 2 hours in ms
-
-    @Override
-    public void processElement(ProcessContext c) {
-      // Generate a timestamp that falls somewhere in the past two hours.
-      long randomTimestamp = System.currentTimeMillis()
-        - (int) (Math.random() * RAND_RANGE);
-      /**
-       * Concept #2: Set the data element with that timestamp.
-       */
-      c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
-    }
-  }
-
-  /** A DoFn that converts a Word and Count into a BigQuery table row. */
-  static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = new TableRow()
-          .set("word", c.element().getKey())
-          .set("count", c.element().getValue())
-          // include a field for the window timestamp
-         .set("window_timestamp", c.timestamp().toString());
-      c.output(row);
-    }
-  }
-
-  /**
-   * Helper method that defines the BigQuery schema used for the output.
-   */
-  private static TableSchema getSchema() {
-    List<TableFieldSchema> fields = new ArrayList<>();
-    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
-    fields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
-    TableSchema schema = new TableSchema().setFields(fields);
-    return schema;
-  }
-
-  /**
-   * Concept #6: We'll stream the results to a BigQuery table. The BigQuery output source is one
-   * that supports both bounded and unbounded data. This is a helper method that creates a
-   * TableReference from input options, to tell the pipeline where to write its BigQuery results.
-   */
-  private static TableReference getTableReference(Options options) {
-    TableReference tableRef = new TableReference();
-    tableRef.setProjectId(options.getProject());
-    tableRef.setDatasetId(options.getBigQueryDataset());
-    tableRef.setTableId(options.getBigQueryTable());
-    return tableRef;
-  }
-
-  /**
-   * Options supported by {@link WindowedWordCount}.
-   *
-   * <p>Inherits standard example configuration options, which allow specification of the BigQuery
-   * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
-   * specification of the input file.
-   */
-  public static interface Options extends WordCount.WordCountOptions,
-      DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
-    @Description("Fixed window duration, in minutes")
-    @Default.Integer(WINDOW_SIZE)
-    Integer getWindowSize();
-    void setWindowSize(Integer value);
-
-    @Description("Whether to run the pipeline with unbounded input")
-    boolean isUnbounded();
-    void setUnbounded(boolean value);
-  }
-
-  public static void main(String[] args) throws IOException {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    options.setBigQuerySchema(getSchema());
-    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
-    // Pipeline.
-    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
-      options.isUnbounded());
-
-    Pipeline pipeline = Pipeline.create(options);
-
-    /**
-     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
-     * unbounded input source.
-     */
-    PCollection<String> input;
-    if (options.isUnbounded()) {
-      LOG.info("Reading from PubSub.");
-      /**
-       * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
-       * specified as an argument. The data elements' timestamps will come from the pubsub
-       * injection.
-       */
-      input = pipeline
-          .apply(PubsubIO.Read.topic(options.getPubsubTopic()));
-    } else {
-      /** Else, this is a bounded pipeline. Read from the GCS file. */
-      input = pipeline
-          .apply(TextIO.Read.from(options.getInputFile()))
-          // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
-          // See AddTimestampFn for more detail on this.
-          .apply(ParDo.of(new AddTimestampFn()));
-    }
-
-    /**
-     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
-     * minute (you can change this with a command-line option). See the documentation for more
-     * information on how fixed windows work, and for information on the other types of windowing
-     * available (e.g., sliding windows).
-     */
-    PCollection<String> windowedWords = input
-      .apply(Window.<String>into(
-        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
-
-    /**
-     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
-     * windows over a PCollection containing windowed values.
-     */
-    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
-
-    /**
-     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
-     * The BigQuery output source supports both bounded and unbounded data.
-     */
-    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
-        .apply(BigQueryIO.Write
-          .to(getTableReference(options))
-          .withSchema(getSchema())
-          .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-          .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND));
-
-    PipelineResult result = pipeline.run();
-
-    /**
-     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
-     * runs for a limited time, and publishes to the input PubSub topic.
-     *
-     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
-     * are done with it, so that you do not continue to be charged for the instances. You can do
-     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
-     * pipelines. The PubSub topic will also be deleted at this time.
-     */
-    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
deleted file mode 100644
index 1086106..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/WordCount.java
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SimpleFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-
-/**
- * An example that counts words in Shakespeare and includes Dataflow best practices.
- *
- * <p>This class, {@link WordCount}, is the second in a series of four successively more detailed
- * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
- * After you've looked at this example, then see the {@link DebuggingWordCount}
- * pipeline, for introduction of additional concepts.
- *
- * <p>For a detailed walkthrough of this example, see
- *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
- *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
- *   </a>
- *
- * <p>Basic concepts, also in the MinimalWordCount example:
- * Reading text files; counting a PCollection; writing to GCS.
- *
- * <p>New Concepts:
- * <pre>
- *   1. Executing a Pipeline both locally and using the Dataflow service
- *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Building a composite transform
- *   4. Defining your own pipeline options
- * </pre>
- *
- * <p>Concept #1: you can execute this pipeline either locally or using the Dataflow service.
- * These are now command-line options and not hard-coded as they were in the MinimalWordCount
- * example.
- * To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and a local output file or output prefix on GCS:
- * <pre>{@code
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
- *
- * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
- * overridden with {@code --inputFile}.
- */
-public class WordCount {
-
-  /**
-   * Concept #2: You can make your pipeline code less verbose by defining your DoFns statically out-
-   * of-line. This DoFn tokenizes lines of text into individual words; we pass it to a ParDo in the
-   * pipeline.
-   */
-  static class ExtractWordsFn extends DoFn<String, String> {
-    private final Aggregator<Long, Long> emptyLines =
-        createAggregator("emptyLines", new Sum.SumLongFn());
-
-    @Override
-    public void processElement(ProcessContext c) {
-      if (c.element().trim().isEmpty()) {
-        emptyLines.addValue(1L);
-      }
-
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  /** A SimpleFunction that converts a Word and Count into a printable string. */
-  public static class FormatAsTextFn extends SimpleFunction<KV<String, Long>, String> {
-    @Override
-    public String apply(KV<String, Long> input) {
-      return input.getKey() + ": " + input.getValue();
-    }
-  }
-
-  /**
-   * A PTransform that converts a PCollection containing lines of text into a PCollection of
-   * formatted word counts.
-   *
-   * <p>Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
-   * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
-   * modular testing, and an improved monitoring experience.
-   */
-  public static class CountWords extends PTransform<PCollection<String>,
-      PCollection<KV<String, Long>>> {
-    @Override
-    public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
-
-      // Convert lines of text into individual words.
-      PCollection<String> words = lines.apply(
-          ParDo.of(new ExtractWordsFn()));
-
-      // Count the number of times each word occurs.
-      PCollection<KV<String, Long>> wordCounts =
-          words.apply(Count.<String>perElement());
-
-      return wordCounts;
-    }
-  }
-
-  /**
-   * Options supported by {@link WordCount}.
-   *
-   * <p>Concept #4: Defining your own configuration options. Here, you can add your own arguments
-   * to be processed by the command-line parser, and specify default values for them. You can then
-   * access the options values in your pipeline code.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  public static interface WordCountOptions extends PipelineOptions {
-    @Description("Path of the file to read from")
-    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
-    String getInputFile();
-    void setInputFile(String value);
-
-    @Description("Path of the file to write to")
-    @Default.InstanceFactory(OutputFactory.class)
-    String getOutput();
-    void setOutput(String value);
-
-    /**
-     * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.
-     */
-    public static class OutputFactory implements DefaultValueFactory<String> {
-      @Override
-      public String create(PipelineOptions options) {
-        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-        if (dataflowOptions.getStagingLocation() != null) {
-          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
-              .resolve("counts.txt").toString();
-        } else {
-          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
-        }
-      }
-    }
-
-  }
-
-  public static void main(String[] args) {
-    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
-      .as(WordCountOptions.class);
-    Pipeline p = Pipeline.create(options);
-
-    // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
-    // static FormatAsTextFn() to the ParDo transform.
-    p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
-     .apply(new CountWords())
-     .apply(MapElements.via(new FormatAsTextFn()))
-     .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
deleted file mode 100644
index 606bfb4..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleOptions.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-
-/**
- * Options that can be used to configure the Dataflow examples.
- */
-public interface DataflowExampleOptions extends DataflowPipelineOptions {
-  @Description("Whether to keep jobs running on the Dataflow service after local process exit")
-  @Default.Boolean(false)
-  boolean getKeepJobsRunning();
-  void setKeepJobsRunning(boolean keepJobsRunning);
-
-  @Description("Number of workers to use when executing the injector pipeline")
-  @Default.Integer(1)
-  int getInjectorNumWorkers();
-  void setInjectorNumWorkers(int numWorkers);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
deleted file mode 100644
index 4dfdd85..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/DataflowExampleUtils.java
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.Bigquery.Datasets;
-import com.google.api.services.bigquery.Bigquery.Tables;
-import com.google.api.services.bigquery.model.Dataset;
-import com.google.api.services.bigquery.model.DatasetReference;
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.Subscription;
-import com.google.api.services.pubsub.model.Topic;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Strings;
-import com.google.common.base.Throwables;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * The utility class that sets up and tears down external resources, starts the Google Cloud Pub/Sub
- * injector, and cancels the streaming and the injector pipelines once the program terminates.
- *
- * <p>It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
- */
-public class DataflowExampleUtils {
-
-  private final DataflowPipelineOptions options;
-  private Bigquery bigQueryClient = null;
-  private Pubsub pubsubClient = null;
-  private Dataflow dataflowClient = null;
-  private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
-  private List<String> pendingMessages = Lists.newArrayList();
-
-  public DataflowExampleUtils(DataflowPipelineOptions options) {
-    this.options = options;
-  }
-
-  /**
-   * Do resources and runner options setup.
-   */
-  public DataflowExampleUtils(DataflowPipelineOptions options, boolean isUnbounded)
-      throws IOException {
-    this.options = options;
-    setupResourcesAndRunner(isUnbounded);
-  }
-
-  /**
-   * Sets up external resources that are required by the example,
-   * such as Pub/Sub topics and BigQuery tables.
-   *
-   * @throws IOException if there is a problem setting up the resources
-   */
-  public void setup() throws IOException {
-    Sleeper sleeper = Sleeper.DEFAULT;
-    BackOff backOff = new AttemptBoundedExponentialBackOff(3, 200);
-    Throwable lastException = null;
-    try {
-      do {
-        try {
-          setupPubsub();
-          setupBigQueryTable();
-          return;
-        } catch (GoogleJsonResponseException e) {
-          lastException = e;
-        }
-      } while (BackOffUtils.next(sleeper, backOff));
-    } catch (InterruptedException e) {
-      // Ignore InterruptedException
-    }
-    Throwables.propagate(lastException);
-  }
-
-  /**
-   * Set up external resources, and configure the runner appropriately.
-   */
-  public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
-    if (isUnbounded) {
-      options.setStreaming(true);
-    }
-    setup();
-    setupRunner();
-  }
-
-  /**
-   * Sets up the Google Cloud Pub/Sub topic.
-   *
-   * <p>If the topic doesn't exist, a new topic with the given name will be created.
-   *
-   * @throws IOException if there is a problem setting up the Pub/Sub topic
-   */
-  public void setupPubsub() throws IOException {
-    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
-        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
-    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
-      pendingMessages.add("**********************Set Up Pubsub************************");
-      setupPubsubTopic(pubsubOptions.getPubsubTopic());
-      pendingMessages.add("The Pub/Sub topic has been set up for this example: "
-          + pubsubOptions.getPubsubTopic());
-
-      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
-        setupPubsubSubscription(
-            pubsubOptions.getPubsubTopic(), pubsubOptions.getPubsubSubscription());
-        pendingMessages.add("The Pub/Sub subscription has been set up for this example: "
-            + pubsubOptions.getPubsubSubscription());
-      }
-    }
-  }
-
-  /**
-   * Sets up the BigQuery table with the given schema.
-   *
-   * <p>If the table already exists, the schema has to match the given one. Otherwise, the example
-   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
-   * will be created.
-   *
-   * @throws IOException if there is a problem setting up the BigQuery table
-   */
-  public void setupBigQueryTable() throws IOException {
-    ExampleBigQueryTableOptions bigQueryTableOptions =
-        options.as(ExampleBigQueryTableOptions.class);
-    if (bigQueryTableOptions.getBigQueryDataset() != null
-        && bigQueryTableOptions.getBigQueryTable() != null
-        && bigQueryTableOptions.getBigQuerySchema() != null) {
-      pendingMessages.add("******************Set Up Big Query Table*******************");
-      setupBigQueryTable(bigQueryTableOptions.getProject(),
-                         bigQueryTableOptions.getBigQueryDataset(),
-                         bigQueryTableOptions.getBigQueryTable(),
-                         bigQueryTableOptions.getBigQuerySchema());
-      pendingMessages.add("The BigQuery table has been set up for this example: "
-          + bigQueryTableOptions.getProject()
-          + ":" + bigQueryTableOptions.getBigQueryDataset()
-          + "." + bigQueryTableOptions.getBigQueryTable());
-    }
-  }
-
-  /**
-   * Tears down external resources that can be deleted upon the example's completion.
-   */
-  private void tearDown() {
-    pendingMessages.add("*************************Tear Down*************************");
-    ExamplePubsubTopicAndSubscriptionOptions pubsubOptions =
-        options.as(ExamplePubsubTopicAndSubscriptionOptions.class);
-    if (!pubsubOptions.getPubsubTopic().isEmpty()) {
-      try {
-        deletePubsubTopic(pubsubOptions.getPubsubTopic());
-        pendingMessages.add("The Pub/Sub topic has been deleted: "
-            + pubsubOptions.getPubsubTopic());
-      } catch (IOException e) {
-        pendingMessages.add("Failed to delete the Pub/Sub topic : "
-            + pubsubOptions.getPubsubTopic());
-      }
-      if (!pubsubOptions.getPubsubSubscription().isEmpty()) {
-        try {
-          deletePubsubSubscription(pubsubOptions.getPubsubSubscription());
-          pendingMessages.add("The Pub/Sub subscription has been deleted: "
-              + pubsubOptions.getPubsubSubscription());
-        } catch (IOException e) {
-          pendingMessages.add("Failed to delete the Pub/Sub subscription : "
-              + pubsubOptions.getPubsubSubscription());
-        }
-      }
-    }
-
-    ExampleBigQueryTableOptions bigQueryTableOptions =
-        options.as(ExampleBigQueryTableOptions.class);
-    if (bigQueryTableOptions.getBigQueryDataset() != null
-        && bigQueryTableOptions.getBigQueryTable() != null
-        && bigQueryTableOptions.getBigQuerySchema() != null) {
-      pendingMessages.add("The BigQuery table might contain the example's output, "
-          + "and it is not deleted automatically: "
-          + bigQueryTableOptions.getProject()
-          + ":" + bigQueryTableOptions.getBigQueryDataset()
-          + "." + bigQueryTableOptions.getBigQueryTable());
-      pendingMessages.add("Please go to the Developers Console to delete it manually."
-          + " Otherwise, you may be charged for its usage.");
-    }
-  }
-
-  private void setupBigQueryTable(String projectId, String datasetId, String tableId,
-      TableSchema schema) throws IOException {
-    if (bigQueryClient == null) {
-      bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
-    }
-
-    Datasets datasetService = bigQueryClient.datasets();
-    if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
-      Dataset newDataset = new Dataset().setDatasetReference(
-          new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
-      datasetService.insert(projectId, newDataset).execute();
-    }
-
-    Tables tableService = bigQueryClient.tables();
-    Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
-    if (table == null) {
-      Table newTable = new Table().setSchema(schema).setTableReference(
-          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId));
-      tableService.insert(projectId, datasetId, newTable).execute();
-    } else if (!table.getSchema().equals(schema)) {
-      throw new RuntimeException(
-          "Table exists and schemas do not match, expecting: " + schema.toPrettyString()
-          + ", actual: " + table.getSchema().toPrettyString());
-    }
-  }
-
-  private void setupPubsubTopic(String topic) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) {
-      pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute();
-    }
-  }
-
-  private void setupPubsubSubscription(String topic, String subscription) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) == null) {
-      Subscription subInfo = new Subscription()
-        .setAckDeadlineSeconds(60)
-        .setTopic(topic);
-      pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
-    }
-  }
-
-  /**
-   * Deletes the Google Cloud Pub/Sub topic.
-   *
-   * @throws IOException if there is a problem deleting the Pub/Sub topic
-   */
-  private void deletePubsubTopic(String topic) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) {
-      pubsubClient.projects().topics().delete(topic).execute();
-    }
-  }
-
-  /**
-   * Deletes the Google Cloud Pub/Sub subscription.
-   *
-   * @throws IOException if there is a problem deleting the Pub/Sub subscription
-   */
-  private void deletePubsubSubscription(String subscription) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().subscriptions().get(subscription)) != null) {
-      pubsubClient.projects().subscriptions().delete(subscription).execute();
-    }
-  }
-
-  /**
-   * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
-   * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
-   * creating the topic if necessary.
-   */
-  public void startInjectorIfNeeded(String inputFile) {
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (pubsubTopicOptions.isStreaming()
-        && !Strings.isNullOrEmpty(inputFile)
-        && !Strings.isNullOrEmpty(pubsubTopicOptions.getPubsubTopic())) {
-      runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
-    }
-  }
-
-  /**
-   * Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
-   * streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
-   * flag value.
-   */
-  public void setupRunner() {
-    if (options.isStreaming() && options.getRunner() != DirectPipelineRunner.class) {
-      // In order to cancel the pipelines automatically,
-      // {@literal DataflowPipelineRunner} is forced to be used.
-      options.setRunner(DataflowPipelineRunner.class);
-    }
-  }
-
-  /**
-   * Runs a batch pipeline to inject data into the PubSubIO input topic.
-   *
-   * <p>The injector pipeline will read from the given text file, and inject data
-   * into the Google Cloud Pub/Sub topic.
-   */
-  public void runInjectorPipeline(String inputFile, String topic) {
-    runInjectorPipeline(TextIO.Read.from(inputFile), topic, null);
-  }
-
-  /**
-   * Runs a batch pipeline to inject data into the PubSubIO input topic.
-   *
-   * <p>The injector pipeline will read from the given source, and inject data
-   * into the Google Cloud Pub/Sub topic.
-   */
-  public void runInjectorPipeline(PTransform<? super PBegin, PCollection<String>> readSource,
-                                  String topic,
-                                  String pubsubTimestampTabelKey) {
-    PubsubFileInjector.Bound injector;
-    if (Strings.isNullOrEmpty(pubsubTimestampTabelKey)) {
-      injector = PubsubFileInjector.publish(topic);
-    } else {
-      injector = PubsubFileInjector.withTimestampLabelKey(pubsubTimestampTabelKey).publish(topic);
-    }
-    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
-    if (options.getServiceAccountName() != null) {
-      copiedOptions.setServiceAccountName(options.getServiceAccountName());
-    }
-    if (options.getServiceAccountKeyfile() != null) {
-      copiedOptions.setServiceAccountKeyfile(options.getServiceAccountKeyfile());
-    }
-    copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
-    copiedOptions.setJobName(options.getJobName() + "-injector");
-    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
-    injectorPipeline.apply(readSource)
-                    .apply(IntraBundleParallelization
-                        .of(injector)
-                        .withMaxParallelism(20));
-    PipelineResult result = injectorPipeline.run();
-    if (result instanceof DataflowPipelineJob) {
-      jobsToCancel.add(((DataflowPipelineJob) result));
-    }
-  }
-
-  /**
-   * Runs the provided pipeline to inject data into the PubSubIO input topic.
-   */
-  public void runInjectorPipeline(Pipeline injectorPipeline) {
-    PipelineResult result = injectorPipeline.run();
-    if (result instanceof DataflowPipelineJob) {
-      jobsToCancel.add(((DataflowPipelineJob) result));
-    }
-  }
-
-  /**
-   * Start the auxiliary injector pipeline, then wait for this pipeline to finish.
-   */
-  public void mockUnboundedSource(String inputFile, PipelineResult result) {
-    startInjectorIfNeeded(inputFile);
-    waitToFinish(result);
-  }
-
-  /**
-   * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
-   * waits for the pipeline to finish and cancels it (and the injector) before the program exists.
-   */
-  public void waitToFinish(PipelineResult result) {
-    if (result instanceof DataflowPipelineJob) {
-      final DataflowPipelineJob job = (DataflowPipelineJob) result;
-      jobsToCancel.add(job);
-      if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
-        addShutdownHook(jobsToCancel);
-      }
-      try {
-        job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
-      } catch (Exception e) {
-        throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
-      }
-    } else {
-      // Do nothing if the given PipelineResult doesn't support waitToFinish(),
-      // such as EvaluationResults returned by DirectPipelineRunner.
-      tearDown();
-      printPendingMessages();
-    }
-  }
-
-  private void addShutdownHook(final Collection<DataflowPipelineJob> jobs) {
-    if (dataflowClient == null) {
-      dataflowClient = options.getDataflowClient();
-    }
-
-    Runtime.getRuntime().addShutdownHook(new Thread() {
-      @Override
-      public void run() {
-        tearDown();
-        printPendingMessages();
-        for (DataflowPipelineJob job : jobs) {
-          System.out.println("Canceling example pipeline: " + job.getJobId());
-          try {
-            job.cancel();
-          } catch (IOException e) {
-            System.out.println("Failed to cancel the job,"
-                + " please go to the Developers Console to cancel it manually");
-            System.out.println(
-                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
-          }
-        }
-
-        for (DataflowPipelineJob job : jobs) {
-          boolean cancellationVerified = false;
-          for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) {
-            if (job.getState().isTerminal()) {
-              cancellationVerified = true;
-              System.out.println("Canceled example pipeline: " + job.getJobId());
-              break;
-            } else {
-              System.out.println(
-                  "The example pipeline is still running. Verifying the cancellation.");
-            }
-            try {
-              Thread.sleep(10000);
-            } catch (InterruptedException e) {
-              // Ignore
-            }
-          }
-          if (!cancellationVerified) {
-            System.out.println("Failed to verify the cancellation for job: " + job.getJobId());
-            System.out.println("Please go to the Developers Console to verify manually:");
-            System.out.println(
-                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
-          }
-        }
-      }
-    });
-  }
-
-  private void printPendingMessages() {
-    System.out.println();
-    System.out.println("***********************************************************");
-    System.out.println("***********************************************************");
-    for (String message : pendingMessages) {
-      System.out.println(message);
-    }
-    System.out.println("***********************************************************");
-    System.out.println("***********************************************************");
-  }
-
-  private static <T> T executeNullIfNotFound(
-      AbstractGoogleClientRequest<T> request) throws IOException {
-    try {
-      return request.execute();
-    } catch (GoogleJsonResponseException e) {
-      if (e.getStatusCode() == HttpServletResponse.SC_NOT_FOUND) {
-        return null;
-      } else {
-        throw e;
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
deleted file mode 100644
index 7c213b5..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExampleBigQueryTableOptions.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure BigQuery tables in Dataflow examples.
- * The project defaults to the project being used to run the example.
- */
-public interface ExampleBigQueryTableOptions extends DataflowPipelineOptions {
-  @Description("BigQuery dataset name")
-  @Default.String("dataflow_examples")
-  String getBigQueryDataset();
-  void setBigQueryDataset(String dataset);
-
-  @Description("BigQuery table name")
-  @Default.InstanceFactory(BigQueryTableFactory.class)
-  String getBigQueryTable();
-  void setBigQueryTable(String table);
-
-  @Description("BigQuery table schema")
-  TableSchema getBigQuerySchema();
-  void setBigQuerySchema(TableSchema schema);
-
-  /**
-   * Returns the job name as the default BigQuery table name.
-   */
-  static class BigQueryTableFactory implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      return options.as(DataflowPipelineOptions.class).getJobName()
-          .replace('-', '_');
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
deleted file mode 100644
index d7bd4b8..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicAndSubscriptionOptions.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure Pub/Sub topic/subscription in Dataflow examples.
- */
-public interface ExamplePubsubTopicAndSubscriptionOptions extends ExamplePubsubTopicOptions {
-  @Description("Pub/Sub subscription")
-  @Default.InstanceFactory(PubsubSubscriptionFactory.class)
-  String getPubsubSubscription();
-  void setPubsubSubscription(String subscription);
-
-  /**
-   * Returns a default Pub/Sub subscription based on the project and the job names.
-   */
-  static class PubsubSubscriptionFactory implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      DataflowPipelineOptions dataflowPipelineOptions =
-          options.as(DataflowPipelineOptions.class);
-      return "projects/" + dataflowPipelineOptions.getProject()
-          + "/subscriptions/" + dataflowPipelineOptions.getJobName();
-    }
-  }
-}

[55/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
deleted file mode 100644
index 4bedf31..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/ExamplePubsubTopicOptions.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure Pub/Sub topic in Dataflow examples.
- */
-public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
-  @Description("Pub/Sub topic")
-  @Default.InstanceFactory(PubsubTopicFactory.class)
-  String getPubsubTopic();
-  void setPubsubTopic(String topic);
-
-  /**
-   * Returns a default Pub/Sub topic based on the project and the job names.
-   */
-  static class PubsubTopicFactory implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      DataflowPipelineOptions dataflowPipelineOptions =
-          options.as(DataflowPipelineOptions.class);
-      return "projects/" + dataflowPipelineOptions.getProject()
-          + "/topics/" + dataflowPipelineOptions.getJobName();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java b/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
deleted file mode 100644
index 4a82ae6..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/common/PubsubFileInjector.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.common;
-
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.PublishRequest;
-import com.google.api.services.pubsub.model.PubsubMessage;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.collect.ImmutableMap;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-/**
- * A batch Dataflow pipeline for injecting a set of GCS files into
- * a PubSub topic line by line. Empty lines are skipped.
- *
- * <p>This is useful for testing streaming
- * pipelines. Note that since batch pipelines might retry chunks, this
- * does _not_ guarantee exactly-once injection of file data. Some lines may
- * be published multiple times.
- * </p>
- */
-public class PubsubFileInjector {
-
-  /**
-   * An incomplete {@code PubsubFileInjector} transform with unbound output topic.
-   */
-  public static class Unbound {
-    private final String timestampLabelKey;
-
-    Unbound() {
-      this.timestampLabelKey = null;
-    }
-
-    Unbound(String timestampLabelKey) {
-      this.timestampLabelKey = timestampLabelKey;
-    }
-
-    Unbound withTimestampLabelKey(String timestampLabelKey) {
-      return new Unbound(timestampLabelKey);
-    }
-
-    public Bound publish(String outputTopic) {
-      return new Bound(outputTopic, timestampLabelKey);
-    }
-  }
-
-  /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
-  public static class Bound extends DoFn<String, Void> {
-    private final String outputTopic;
-    private final String timestampLabelKey;
-    public transient Pubsub pubsub;
-
-    public Bound(String outputTopic, String timestampLabelKey) {
-      this.outputTopic = outputTopic;
-      this.timestampLabelKey = timestampLabelKey;
-    }
-
-    @Override
-    public void startBundle(Context context) {
-      this.pubsub =
-          Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
-              .build();
-    }
-
-    @Override
-    public void processElement(ProcessContext c) throws IOException {
-      if (c.element().isEmpty()) {
-        return;
-      }
-      PubsubMessage pubsubMessage = new PubsubMessage();
-      pubsubMessage.encodeData(c.element().getBytes());
-      if (timestampLabelKey != null) {
-        pubsubMessage.setAttributes(
-            ImmutableMap.of(timestampLabelKey, Long.toString(c.timestamp().getMillis())));
-      }
-      PublishRequest publishRequest = new PublishRequest();
-      publishRequest.setMessages(Arrays.asList(pubsubMessage));
-      this.pubsub.projects().topics().publish(outputTopic, publishRequest).execute();
-    }
-  }
-
-  /**
-   * Creates a {@code PubsubFileInjector} transform with the given timestamp label key.
-   */
-  public static Unbound withTimestampLabelKey(String timestampLabelKey) {
-    return new Unbound(timestampLabelKey);
-  }
-
-  /**
-   * Creates a {@code PubsubFileInjector} transform that publishes to the given output topic.
-   */
-  public static Bound publish(String outputTopic) {
-    return new Unbound().publish(outputTopic);
-  }
-
-  /**
-   * Command line parameter options.
-   */
-  private interface PubsubFileInjectorOptions extends PipelineOptions {
-    @Description("GCS location of files.")
-    @Validation.Required
-    String getInput();
-    void setInput(String value);
-
-    @Description("Topic to publish on.")
-    @Validation.Required
-    String getOutputTopic();
-    void setOutputTopic(String value);
-  }
-
-  /**
-   * Sets up and starts streaming pipeline.
-   */
-  public static void main(String[] args) {
-    PubsubFileInjectorOptions options = PipelineOptionsFactory.fromArgs(args)
-        .withValidation()
-        .as(PubsubFileInjectorOptions.class);
-
-    Pipeline pipeline = Pipeline.create(options);
-
-    pipeline
-        .apply(TextIO.Read.from(options.getInput()))
-        .apply(IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
-            .withMaxParallelism(20));
-
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
deleted file mode 100644
index f897338..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.api.services.datastore.DatastoreV1.Entity;
-import com.google.api.services.datastore.DatastoreV1.Key;
-import com.google.api.services.datastore.DatastoreV1.Value;
-import com.google.api.services.datastore.client.DatastoreHelper;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.DatastoreIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Filter;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Partition;
-import com.google.cloud.dataflow.sdk.transforms.Partition.PartitionFn;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Top;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Duration;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * An example that computes the most popular hash tags
- * for every prefix, which can be used for auto-completion.
- *
- * <p>Concepts: Using the same pipeline in both streaming and batch, combiners,
- *              composite transforms.
- *
- * <p>To execute this pipeline using the Dataflow service in batch mode,
- * specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=DataflowPipelineRunner
- *   --inputFile=gs://path/to/input*.txt
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service in streaming mode,
- * specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=DataflowPipelineRunner
- *   --inputFile=gs://YOUR_INPUT_DIRECTORY/*.txt
- *   --streaming
- * }</pre>
- *
- * <p>This will update the datastore every 10 seconds based on the last
- * 30 minutes of data received.
- */
-public class AutoComplete {
-
-  /**
-   * A PTransform that takes as input a list of tokens and returns
-   * the most common tokens per prefix.
-   */
-  public static class ComputeTopCompletions
-      extends PTransform<PCollection<String>, PCollection<KV<String, List<CompletionCandidate>>>> {
-    private final int candidatesPerPrefix;
-    private final boolean recursive;
-
-    protected ComputeTopCompletions(int candidatesPerPrefix, boolean recursive) {
-      this.candidatesPerPrefix = candidatesPerPrefix;
-      this.recursive = recursive;
-    }
-
-    public static ComputeTopCompletions top(int candidatesPerPrefix, boolean recursive) {
-      return new ComputeTopCompletions(candidatesPerPrefix, recursive);
-    }
-
-    @Override
-    public PCollection<KV<String, List<CompletionCandidate>>> apply(PCollection<String> input) {
-      PCollection<CompletionCandidate> candidates = input
-        // First count how often each token appears.
-        .apply(new Count.PerElement<String>())
-
-        // Map the KV outputs of Count into our own CompletionCandiate class.
-        .apply(ParDo.named("CreateCompletionCandidates").of(
-            new DoFn<KV<String, Long>, CompletionCandidate>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(new CompletionCandidate(c.element().getKey(), c.element().getValue()));
-              }
-            }));
-
-      // Compute the top via either a flat or recursive algorithm.
-      if (recursive) {
-        return candidates
-          .apply(new ComputeTopRecursive(candidatesPerPrefix, 1))
-          .apply(Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
-      } else {
-        return candidates
-          .apply(new ComputeTopFlat(candidatesPerPrefix, 1));
-      }
-    }
-  }
-
-  /**
-   * Lower latency, but more expensive.
-   */
-  private static class ComputeTopFlat
-      extends PTransform<PCollection<CompletionCandidate>,
-                         PCollection<KV<String, List<CompletionCandidate>>>> {
-    private final int candidatesPerPrefix;
-    private final int minPrefix;
-
-    public ComputeTopFlat(int candidatesPerPrefix, int minPrefix) {
-      this.candidatesPerPrefix = candidatesPerPrefix;
-      this.minPrefix = minPrefix;
-    }
-
-    @Override
-    public PCollection<KV<String, List<CompletionCandidate>>> apply(
-        PCollection<CompletionCandidate> input) {
-      return input
-        // For each completion candidate, map it to all prefixes.
-        .apply(ParDo.of(new AllPrefixes(minPrefix)))
-
-        // Find and return the top candiates for each prefix.
-        .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix)
-               .withHotKeyFanout(new HotKeyFanout()));
-    }
-
-    private static class HotKeyFanout implements SerializableFunction<String, Integer> {
-      @Override
-      public Integer apply(String input) {
-        return (int) Math.pow(4, 5 - input.length());
-      }
-    }
-  }
-
-  /**
-   * Cheaper but higher latency.
-   *
-   * <p>Returns two PCollections, the first is top prefixes of size greater
-   * than minPrefix, and the second is top prefixes of size exactly
-   * minPrefix.
-   */
-  private static class ComputeTopRecursive
-      extends PTransform<PCollection<CompletionCandidate>,
-                         PCollectionList<KV<String, List<CompletionCandidate>>>> {
-    private final int candidatesPerPrefix;
-    private final int minPrefix;
-
-    public ComputeTopRecursive(int candidatesPerPrefix, int minPrefix) {
-      this.candidatesPerPrefix = candidatesPerPrefix;
-      this.minPrefix = minPrefix;
-    }
-
-    private class KeySizePartitionFn implements PartitionFn<KV<String, List<CompletionCandidate>>> {
-      @Override
-      public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numPartitions) {
-        return elem.getKey().length() > minPrefix ? 0 : 1;
-      }
-    }
-
-    private static class FlattenTops
-        extends DoFn<KV<String, List<CompletionCandidate>>, CompletionCandidate> {
-      @Override
-      public void processElement(ProcessContext c) {
-        for (CompletionCandidate cc : c.element().getValue()) {
-          c.output(cc);
-        }
-      }
-    }
-
-    @Override
-    public PCollectionList<KV<String, List<CompletionCandidate>>> apply(
-          PCollection<CompletionCandidate> input) {
-        if (minPrefix > 10) {
-          // Base case, partitioning to return the output in the expected format.
-          return input
-            .apply(new ComputeTopFlat(candidatesPerPrefix, minPrefix))
-            .apply(Partition.of(2, new KeySizePartitionFn()));
-        } else {
-          // If a candidate is in the top N for prefix a...b, it must also be in the top
-          // N for a...bX for every X, which is typlically a much smaller set to consider.
-          // First, compute the top candidate for prefixes of size at least minPrefix + 1.
-          PCollectionList<KV<String, List<CompletionCandidate>>> larger = input
-            .apply(new ComputeTopRecursive(candidatesPerPrefix, minPrefix + 1));
-          // Consider the top candidates for each prefix of length minPrefix + 1...
-          PCollection<KV<String, List<CompletionCandidate>>> small =
-            PCollectionList
-            .of(larger.get(1).apply(ParDo.of(new FlattenTops())))
-            // ...together with those (previously excluded) candidates of length
-            // exactly minPrefix...
-            .and(input.apply(Filter.byPredicate(
-                new SerializableFunction<CompletionCandidate, Boolean>() {
-                  @Override
-                  public Boolean apply(CompletionCandidate c) {
-                    return c.getValue().length() == minPrefix;
-                  }
-                })))
-            .apply("FlattenSmall", Flatten.<CompletionCandidate>pCollections())
-            // ...set the key to be the minPrefix-length prefix...
-            .apply(ParDo.of(new AllPrefixes(minPrefix, minPrefix)))
-            // ...and (re)apply the Top operator to all of them together.
-            .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix));
-
-          PCollection<KV<String, List<CompletionCandidate>>> flattenLarger = larger
-              .apply("FlattenLarge", Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
-
-          return PCollectionList.of(flattenLarger).and(small);
-        }
-    }
-  }
-
-  /**
-   * A DoFn that keys each candidate by all its prefixes.
-   */
-  private static class AllPrefixes
-      extends DoFn<CompletionCandidate, KV<String, CompletionCandidate>> {
-    private final int minPrefix;
-    private final int maxPrefix;
-    public AllPrefixes(int minPrefix) {
-      this(minPrefix, Integer.MAX_VALUE);
-    }
-    public AllPrefixes(int minPrefix, int maxPrefix) {
-      this.minPrefix = minPrefix;
-      this.maxPrefix = maxPrefix;
-    }
-    @Override
-      public void processElement(ProcessContext c) {
-      String word = c.element().value;
-      for (int i = minPrefix; i <= Math.min(word.length(), maxPrefix); i++) {
-        c.output(KV.of(word.substring(0, i), c.element()));
-      }
-    }
-  }
-
-  /**
-   * Class used to store tag-count pairs.
-   */
-  @DefaultCoder(AvroCoder.class)
-  static class CompletionCandidate implements Comparable<CompletionCandidate> {
-    private long count;
-    private String value;
-
-    public CompletionCandidate(String value, long count) {
-      this.value = value;
-      this.count = count;
-    }
-
-    public long getCount() {
-      return count;
-    }
-
-    public String getValue() {
-      return value;
-    }
-
-    // Empty constructor required for Avro decoding.
-    public CompletionCandidate() {}
-
-    @Override
-    public int compareTo(CompletionCandidate o) {
-      if (this.count < o.count) {
-        return -1;
-      } else if (this.count == o.count) {
-        return this.value.compareTo(o.value);
-      } else {
-        return 1;
-      }
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other instanceof CompletionCandidate) {
-        CompletionCandidate that = (CompletionCandidate) other;
-        return this.count == that.count && this.value.equals(that.value);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Long.valueOf(count).hashCode() ^ value.hashCode();
-    }
-
-    @Override
-    public String toString() {
-      return "CompletionCandidate[" + value + ", " + count + "]";
-    }
-  }
-
-  /**
-   * Takes as input a set of strings, and emits each #hashtag found therein.
-   */
-  static class ExtractHashtags extends DoFn<String, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      Matcher m = Pattern.compile("#\\S+").matcher(c.element());
-      while (m.find()) {
-        c.output(m.group().substring(1));
-      }
-    }
-  }
-
-  static class FormatForBigquery extends DoFn<KV<String, List<CompletionCandidate>>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-      List<TableRow> completions = new ArrayList<>();
-      for (CompletionCandidate cc : c.element().getValue()) {
-        completions.add(new TableRow()
-          .set("count", cc.getCount())
-          .set("tag", cc.getValue()));
-      }
-      TableRow row = new TableRow()
-        .set("prefix", c.element().getKey())
-        .set("tags", completions);
-      c.output(row);
-    }
-
-    /**
-     * Defines the BigQuery schema used for the output.
-     */
-    static TableSchema getSchema() {
-      List<TableFieldSchema> tagFields = new ArrayList<>();
-      tagFields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
-      tagFields.add(new TableFieldSchema().setName("tag").setType("STRING"));
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName("prefix").setType("STRING"));
-      fields.add(new TableFieldSchema()
-          .setName("tags").setType("RECORD").setMode("REPEATED").setFields(tagFields));
-      return new TableSchema().setFields(fields);
-    }
-  }
-
-  /**
-   * Takes as input a the top candidates per prefix, and emits an entity
-   * suitable for writing to Datastore.
-   */
-  static class FormatForDatastore extends DoFn<KV<String, List<CompletionCandidate>>, Entity> {
-    private String kind;
-
-    public FormatForDatastore(String kind) {
-      this.kind = kind;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      Entity.Builder entityBuilder = Entity.newBuilder();
-      Key key = DatastoreHelper.makeKey(kind, c.element().getKey()).build();
-
-      entityBuilder.setKey(key);
-      List<Value> candidates = new ArrayList<>();
-      for (CompletionCandidate tag : c.element().getValue()) {
-        Entity.Builder tagEntity = Entity.newBuilder();
-        tagEntity.addProperty(
-            DatastoreHelper.makeProperty("tag", DatastoreHelper.makeValue(tag.value)));
-        tagEntity.addProperty(
-            DatastoreHelper.makeProperty("count", DatastoreHelper.makeValue(tag.count)));
-        candidates.add(DatastoreHelper.makeValue(tagEntity).setIndexed(false).build());
-      }
-      entityBuilder.addProperty(
-          DatastoreHelper.makeProperty("candidates", DatastoreHelper.makeValue(candidates)));
-      c.output(entityBuilder.build());
-    }
-  }
-
-  /**
-   * Options supported by this class.
-   *
-   * <p>Inherits standard Dataflow configuration options.
-   */
-  private static interface Options extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
-    @Description("Input text file")
-    String getInputFile();
-    void setInputFile(String value);
-
-    @Description("Whether to use the recursive algorithm")
-    @Default.Boolean(true)
-    Boolean getRecursive();
-    void setRecursive(Boolean value);
-
-    @Description("Dataset entity kind")
-    @Default.String("autocomplete-demo")
-    String getKind();
-    void setKind(String value);
-
-    @Description("Whether output to BigQuery")
-    @Default.Boolean(true)
-    Boolean getOutputToBigQuery();
-    void setOutputToBigQuery(Boolean value);
-
-    @Description("Whether output to Datastore")
-    @Default.Boolean(false)
-    Boolean getOutputToDatastore();
-    void setOutputToDatastore(Boolean value);
-
-    @Description("Datastore output dataset ID, defaults to project ID")
-    String getOutputDataset();
-    void setOutputDataset(String value);
-  }
-
-  public static void main(String[] args) throws IOException {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-
-    if (options.isStreaming()) {
-      // In order to cancel the pipelines automatically,
-      // {@literal DataflowPipelineRunner} is forced to be used.
-      options.setRunner(DataflowPipelineRunner.class);
-    }
-
-    options.setBigQuerySchema(FormatForBigquery.getSchema());
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-
-    // We support running the same pipeline in either
-    // batch or windowed streaming mode.
-    PTransform<? super PBegin, PCollection<String>> readSource;
-    WindowFn<Object, ?> windowFn;
-    if (options.isStreaming()) {
-      Preconditions.checkArgument(
-          !options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
-      dataflowUtils.setupPubsub();
-
-      readSource = PubsubIO.Read.topic(options.getPubsubTopic());
-      windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
-    } else {
-      readSource = TextIO.Read.from(options.getInputFile());
-      windowFn = new GlobalWindows();
-    }
-
-    // Create the pipeline.
-    Pipeline p = Pipeline.create(options);
-    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
-      .apply(readSource)
-      .apply(ParDo.of(new ExtractHashtags()))
-      .apply(Window.<String>into(windowFn))
-      .apply(ComputeTopCompletions.top(10, options.getRecursive()));
-
-    if (options.getOutputToDatastore()) {
-      toWrite
-      .apply(ParDo.named("FormatForDatastore").of(new FormatForDatastore(options.getKind())))
-      .apply(DatastoreIO.writeTo(MoreObjects.firstNonNull(
-          options.getOutputDataset(), options.getProject())));
-    }
-    if (options.getOutputToBigQuery()) {
-      dataflowUtils.setupBigQueryTable();
-
-      TableReference tableRef = new TableReference();
-      tableRef.setProjectId(options.getProject());
-      tableRef.setDatasetId(options.getBigQueryDataset());
-      tableRef.setTableId(options.getBigQueryTable());
-
-      toWrite
-        .apply(ParDo.of(new FormatForBigquery()))
-        .apply(BigQueryIO.Write
-               .to(tableRef)
-               .withSchema(FormatForBigquery.getSchema())
-               .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-               .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
-    }
-
-    // Run the pipeline.
-    PipelineResult result = p.run();
-
-    if (options.isStreaming() && !options.getInputFile().isEmpty()) {
-      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
-    }
-
-    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
-    dataflowUtils.waitToFinish(result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
deleted file mode 100644
index 5fba154..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-
-# "Complete" Examples
-
-This directory contains end-to-end example pipelines that perform complex data processing tasks. They include:
-
-<ul>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/AutoComplete.java">AutoComplete</a>
-  &mdash; An example that computes the most popular hash tags for every
-  prefix, which can be used for auto-completion. Demonstrates how to use the
-  same pipeline in both streaming and batch, combiners, and composite
-  transforms.</li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java">StreamingWordExtract</a>
-  &mdash; A streaming pipeline example that inputs lines of text from a Cloud
-  Pub/Sub topic, splits each line into individual words, capitalizes those
-  words, and writes the output to a BigQuery table.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java">TfIdf</a>
-  &mdash; An example that computes a basic TF-IDF search table for a directory or
-  Cloud Storage prefix. Demonstrates joining data, side inputs, and logging.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java">TopWikipediaSessions</a>
-  &mdash; An example that reads Wikipedia edit data from Cloud Storage and
-  computes the user with the longest string of edits separated by no more than
-  an hour within each month. Demonstrates using Cloud Dataflow
-  <code>Windowing</code> to perform time-based aggregations of data.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java">TrafficMaxLaneFlow</a>
-  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
-  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
-  runner, sliding windows, Cloud Pub/Sub topic ingestion, the use of the
-  <code>AvroCoder</code> to encode a custom class, and custom
-  <code>Combine</code> transforms.
-  </li>
-  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficRoutes.java">TrafficRoutes</a>
-  &mdash; A streaming Cloud Dataflow example using BigQuery output in the
-  <code>traffic sensor</code> domain. Demonstrates the Cloud Dataflow streaming
-  runner, <code>GroupByKey</code>, keyed state, sliding windows, and Cloud
-  Pub/Sub topic ingestion.
-  </li>
-  </ul>
-
-See the [documentation](https://cloud.google.com/dataflow/getting-started) and the [Examples
-README](../../../../../../../../../README.md) for
-information about how to run these examples.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
deleted file mode 100644
index 99c5249..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/StreamingWordExtract.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-/**
- * A streaming Dataflow Example using BigQuery output.
- *
- * <p>This pipeline example reads lines of text from a PubSub topic, splits each line
- * into individual words, capitalizes those words, and writes the output to
- * a BigQuery table.
- *
- * <p>By default, the example will run a separate pipeline to inject the data from the default
- * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
- * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
- * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
- * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
- * to this example.
- *
- * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
- * from the example common package (there are no defaults for a general Dataflow pipeline).
- * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
- * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
- * the example will try to create them.
- *
- * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
- * and then exits.
- */
-public class StreamingWordExtract {
-
-  /** A DoFn that tokenizes lines of text into individual words. */
-  static class ExtractWords extends DoFn<String, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      String[] words = c.element().split("[^a-zA-Z']+");
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  /** A DoFn that uppercases a word. */
-  static class Uppercase extends DoFn<String, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(c.element().toUpperCase());
-    }
-  }
-
-  /**
-   * Converts strings into BigQuery rows.
-   */
-  static class StringToRowConverter extends DoFn<String, TableRow> {
-    /**
-     * In this example, put the whole string into single BigQuery field.
-     */
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(new TableRow().set("string_field", c.element()));
-    }
-
-    static TableSchema getSchema() {
-      return new TableSchema().setFields(new ArrayList<TableFieldSchema>() {
-            // Compose the list of TableFieldSchema from tableSchema.
-            {
-              add(new TableFieldSchema().setName("string_field").setType("STRING"));
-            }
-      });
-    }
-  }
-
-  /**
-   * Options supported by {@link StreamingWordExtract}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private interface StreamingWordExtractOptions
-      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
-    @Description("Input file to inject to Pub/Sub topic")
-    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
-    String getInputFile();
-    void setInputFile(String value);
-  }
-
-  /**
-   * Sets up and starts streaming pipeline.
-   *
-   * @throws IOException if there is a problem setting up resources
-   */
-  public static void main(String[] args) throws IOException {
-    StreamingWordExtractOptions options = PipelineOptionsFactory.fromArgs(args)
-        .withValidation()
-        .as(StreamingWordExtractOptions.class);
-    options.setStreaming(true);
-    // In order to cancel the pipelines automatically,
-    // {@literal DataflowPipelineRunner} is forced to be used.
-    options.setRunner(DataflowPipelineRunner.class);
-
-    options.setBigQuerySchema(StringToRowConverter.getSchema());
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    dataflowUtils.setup();
-
-    Pipeline pipeline = Pipeline.create(options);
-
-    String tableSpec = new StringBuilder()
-        .append(options.getProject()).append(":")
-        .append(options.getBigQueryDataset()).append(".")
-        .append(options.getBigQueryTable())
-        .toString();
-    pipeline
-        .apply(PubsubIO.Read.topic(options.getPubsubTopic()))
-        .apply(ParDo.of(new ExtractWords()))
-        .apply(ParDo.of(new Uppercase()))
-        .apply(ParDo.of(new StringToRowConverter()))
-        .apply(BigQueryIO.Write.to(tableSpec)
-            .withSchema(StringToRowConverter.getSchema()));
-
-    PipelineResult result = pipeline.run();
-
-    if (!options.getInputFile().isEmpty()) {
-      // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-      dataflowUtils.runInjectorPipeline(options.getInputFile(), options.getPubsubTopic());
-    }
-
-    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
-    dataflowUtils.waitToFinish(result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
deleted file mode 100644
index 65ac753..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TfIdf.java
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.Keys;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
-import com.google.cloud.dataflow.sdk.transforms.Values;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.WithKeys;
-import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
-import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
-import com.google.cloud.dataflow.sdk.util.GcsUtil;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.HashSet;
-import java.util.Set;
-
-/**
- * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
- *
- * <p>Concepts: joining data; side inputs; logging
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }</pre>
- * and a local output file or output prefix on GCS:
- * <pre>{@code
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * and an output prefix on GCS:
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
- *
- * <p>The default input is {@code gs://dataflow-samples/shakespeare/} and can be overridden with
- * {@code --input}.
- */
-public class TfIdf {
-  /**
-   * Options supported by {@link TfIdf}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description("Path to the directory or GCS prefix containing files to read from")
-    @Default.String("gs://dataflow-samples/shakespeare/")
-    String getInput();
-    void setInput(String value);
-
-    @Description("Prefix of output URI to write to")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  /**
-   * Lists documents contained beneath the {@code options.input} prefix/directory.
-   */
-  public static Set<URI> listInputDocuments(Options options)
-      throws URISyntaxException, IOException {
-    URI baseUri = new URI(options.getInput());
-
-    // List all documents in the directory or GCS prefix.
-    URI absoluteUri;
-    if (baseUri.getScheme() != null) {
-      absoluteUri = baseUri;
-    } else {
-      absoluteUri = new URI(
-          "file",
-          baseUri.getAuthority(),
-          baseUri.getPath(),
-          baseUri.getQuery(),
-          baseUri.getFragment());
-    }
-
-    Set<URI> uris = new HashSet<>();
-    if (absoluteUri.getScheme().equals("file")) {
-      File directory = new File(absoluteUri);
-      for (String entry : directory.list()) {
-        File path = new File(directory, entry);
-        uris.add(path.toURI());
-      }
-    } else if (absoluteUri.getScheme().equals("gs")) {
-      GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
-      URI gcsUriGlob = new URI(
-          absoluteUri.getScheme(),
-          absoluteUri.getAuthority(),
-          absoluteUri.getPath() + "*",
-          absoluteUri.getQuery(),
-          absoluteUri.getFragment());
-      for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
-        uris.add(entry.toUri());
-      }
-    }
-
-    return uris;
-  }
-
-  /**
-   * Reads the documents at the provided uris and returns all lines
-   * from the documents tagged with which document they are from.
-   */
-  public static class ReadDocuments
-      extends PTransform<PInput, PCollection<KV<URI, String>>> {
-    private Iterable<URI> uris;
-
-    public ReadDocuments(Iterable<URI> uris) {
-      this.uris = uris;
-    }
-
-    @Override
-    public Coder<?> getDefaultOutputCoder() {
-      return KvCoder.of(StringDelegateCoder.of(URI.class), StringUtf8Coder.of());
-    }
-
-    @Override
-    public PCollection<KV<URI, String>> apply(PInput input) {
-      Pipeline pipeline = input.getPipeline();
-
-      // Create one TextIO.Read transform for each document
-      // and add its output to a PCollectionList
-      PCollectionList<KV<URI, String>> urisToLines =
-          PCollectionList.empty(pipeline);
-
-      // TextIO.Read supports:
-      //  - file: URIs and paths locally
-      //  - gs: URIs on the service
-      for (final URI uri : uris) {
-        String uriString;
-        if (uri.getScheme().equals("file")) {
-          uriString = new File(uri).getPath();
-        } else {
-          uriString = uri.toString();
-        }
-
-        PCollection<KV<URI, String>> oneUriToLines = pipeline
-            .apply(TextIO.Read.from(uriString)
-                .named("TextIO.Read(" + uriString + ")"))
-            .apply("WithKeys(" + uriString + ")", WithKeys.<URI, String>of(uri));
-
-        urisToLines = urisToLines.and(oneUriToLines);
-      }
-
-      return urisToLines.apply(Flatten.<KV<URI, String>>pCollections());
-    }
-  }
-
-  /**
-   * A transform containing a basic TF-IDF pipeline. The input consists of KV objects
-   * where the key is the document's URI and the value is a piece
-   * of the document's content. The output is mapping from terms to
-   * scores for each document URI.
-   */
-  public static class ComputeTfIdf
-      extends PTransform<PCollection<KV<URI, String>>, PCollection<KV<String, KV<URI, Double>>>> {
-    public ComputeTfIdf() { }
-
-    @Override
-    public PCollection<KV<String, KV<URI, Double>>> apply(
-      PCollection<KV<URI, String>> uriToContent) {
-
-      // Compute the total number of documents, and
-      // prepare this singleton PCollectionView for
-      // use as a side input.
-      final PCollectionView<Long> totalDocuments =
-          uriToContent
-          .apply("GetURIs", Keys.<URI>create())
-          .apply("RemoveDuplicateDocs", RemoveDuplicates.<URI>create())
-          .apply(Count.<URI>globally())
-          .apply(View.<Long>asSingleton());
-
-      // Create a collection of pairs mapping a URI to each
-      // of the words in the document associated with that that URI.
-      PCollection<KV<URI, String>> uriToWords = uriToContent
-          .apply(ParDo.named("SplitWords").of(
-              new DoFn<KV<URI, String>, KV<URI, String>>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  URI uri = c.element().getKey();
-                  String line = c.element().getValue();
-                  for (String word : line.split("\\W+")) {
-                    // Log INFO messages when the word “love” is found.
-                    if (word.toLowerCase().equals("love")) {
-                      LOG.info("Found {}", word.toLowerCase());
-                    }
-
-                    if (!word.isEmpty()) {
-                      c.output(KV.of(uri, word.toLowerCase()));
-                    }
-                  }
-                }
-              }));
-
-      // Compute a mapping from each word to the total
-      // number of documents in which it appears.
-      PCollection<KV<String, Long>> wordToDocCount = uriToWords
-          .apply("RemoveDuplicateWords", RemoveDuplicates.<KV<URI, String>>create())
-          .apply(Values.<String>create())
-          .apply("CountDocs", Count.<String>perElement());
-
-      // Compute a mapping from each URI to the total
-      // number of words in the document associated with that URI.
-      PCollection<KV<URI, Long>> uriToWordTotal = uriToWords
-          .apply("GetURIs2", Keys.<URI>create())
-          .apply("CountWords", Count.<URI>perElement());
-
-      // Count, for each (URI, word) pair, the number of
-      // occurrences of that word in the document associated
-      // with the URI.
-      PCollection<KV<KV<URI, String>, Long>> uriAndWordToCount = uriToWords
-          .apply("CountWordDocPairs", Count.<KV<URI, String>>perElement());
-
-      // Adjust the above collection to a mapping from
-      // (URI, word) pairs to counts into an isomorphic mapping
-      // from URI to (word, count) pairs, to prepare for a join
-      // by the URI key.
-      PCollection<KV<URI, KV<String, Long>>> uriToWordAndCount = uriAndWordToCount
-          .apply(ParDo.named("ShiftKeys").of(
-              new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  URI uri = c.element().getKey().getKey();
-                  String word = c.element().getKey().getValue();
-                  Long occurrences = c.element().getValue();
-                  c.output(KV.of(uri, KV.of(word, occurrences)));
-                }
-              }));
-
-      // Prepare to join the mapping of URI to (word, count) pairs with
-      // the mapping of URI to total word counts, by associating
-      // each of the input PCollection<KV<URI, ...>> with
-      // a tuple tag. Each input must have the same key type, URI
-      // in this case. The type parameter of the tuple tag matches
-      // the types of the values for each collection.
-      final TupleTag<Long> wordTotalsTag = new TupleTag<Long>();
-      final TupleTag<KV<String, Long>> wordCountsTag = new TupleTag<KV<String, Long>>();
-      KeyedPCollectionTuple<URI> coGbkInput = KeyedPCollectionTuple
-          .of(wordTotalsTag, uriToWordTotal)
-          .and(wordCountsTag, uriToWordAndCount);
-
-      // Perform a CoGroupByKey (a sort of pre-join) on the prepared
-      // inputs. This yields a mapping from URI to a CoGbkResult
-      // (CoGroupByKey Result). The CoGbkResult is a mapping
-      // from the above tuple tags to the values in each input
-      // associated with a particular URI. In this case, each
-      // KV<URI, CoGbkResult> group a URI with the total number of
-      // words in that document as well as all the (word, count)
-      // pairs for particular words.
-      PCollection<KV<URI, CoGbkResult>> uriToWordAndCountAndTotal = coGbkInput
-          .apply("CoGroupByUri", CoGroupByKey.<URI>create());
-
-      // Compute a mapping from each word to a (URI, term frequency)
-      // pair for each URI. A word's term frequency for a document
-      // is simply the number of times that word occurs in the document
-      // divided by the total number of words in the document.
-      PCollection<KV<String, KV<URI, Double>>> wordToUriAndTf = uriToWordAndCountAndTotal
-          .apply(ParDo.named("ComputeTermFrequencies").of(
-              new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  URI uri = c.element().getKey();
-                  Long wordTotal = c.element().getValue().getOnly(wordTotalsTag);
-
-                  for (KV<String, Long> wordAndCount
-                           : c.element().getValue().getAll(wordCountsTag)) {
-                    String word = wordAndCount.getKey();
-                    Long wordCount = wordAndCount.getValue();
-                    Double termFrequency = wordCount.doubleValue() / wordTotal.doubleValue();
-                    c.output(KV.of(word, KV.of(uri, termFrequency)));
-                  }
-                }
-              }));
-
-      // Compute a mapping from each word to its document frequency.
-      // A word's document frequency in a corpus is the number of
-      // documents in which the word appears divided by the total
-      // number of documents in the corpus. Note how the total number of
-      // documents is passed as a side input; the same value is
-      // presented to each invocation of the DoFn.
-      PCollection<KV<String, Double>> wordToDf = wordToDocCount
-          .apply(ParDo
-              .named("ComputeDocFrequencies")
-              .withSideInputs(totalDocuments)
-              .of(new DoFn<KV<String, Long>, KV<String, Double>>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  String word = c.element().getKey();
-                  Long documentCount = c.element().getValue();
-                  Long documentTotal = c.sideInput(totalDocuments);
-                  Double documentFrequency = documentCount.doubleValue()
-                      / documentTotal.doubleValue();
-
-                  c.output(KV.of(word, documentFrequency));
-                }
-              }));
-
-      // Join the term frequency and document frequency
-      // collections, each keyed on the word.
-      final TupleTag<KV<URI, Double>> tfTag = new TupleTag<KV<URI, Double>>();
-      final TupleTag<Double> dfTag = new TupleTag<Double>();
-      PCollection<KV<String, CoGbkResult>> wordToUriAndTfAndDf = KeyedPCollectionTuple
-          .of(tfTag, wordToUriAndTf)
-          .and(dfTag, wordToDf)
-          .apply(CoGroupByKey.<String>create());
-
-      // Compute a mapping from each word to a (URI, TF-IDF) score
-      // for each URI. There are a variety of definitions of TF-IDF
-      // ("term frequency - inverse document frequency") score;
-      // here we use a basic version that is the term frequency
-      // divided by the log of the document frequency.
-      PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = wordToUriAndTfAndDf
-          .apply(ParDo.named("ComputeTfIdf").of(
-              new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  String word = c.element().getKey();
-                  Double df = c.element().getValue().getOnly(dfTag);
-
-                  for (KV<URI, Double> uriAndTf : c.element().getValue().getAll(tfTag)) {
-                    URI uri = uriAndTf.getKey();
-                    Double tf = uriAndTf.getValue();
-                    Double tfIdf = tf * Math.log(1 / df);
-                    c.output(KV.of(word, KV.of(uri, tfIdf)));
-                  }
-                }
-              }));
-
-      return wordToUriAndTfIdf;
-    }
-
-    // Instantiate Logger.
-    // It is suggested that the user specify the class name of the containing class
-    // (in this case ComputeTfIdf).
-    private static final Logger LOG = LoggerFactory.getLogger(ComputeTfIdf.class);
-  }
-
-  /**
-   * A {@link PTransform} to write, in CSV format, a mapping from term and URI
-   * to score.
-   */
-  public static class WriteTfIdf
-      extends PTransform<PCollection<KV<String, KV<URI, Double>>>, PDone> {
-    private String output;
-
-    public WriteTfIdf(String output) {
-      this.output = output;
-    }
-
-    @Override
-    public PDone apply(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
-      return wordToUriAndTfIdf
-          .apply(ParDo.named("Format").of(new DoFn<KV<String, KV<URI, Double>>, String>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              c.output(String.format("%s,\t%s,\t%f",
-                  c.element().getKey(),
-                  c.element().getValue().getKey(),
-                  c.element().getValue().getValue()));
-            }
-          }))
-          .apply(TextIO.Write
-              .to(output)
-              .withSuffix(".csv"));
-    }
-  }
-
-  public static void main(String[] args) throws Exception {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline pipeline = Pipeline.create(options);
-    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
-
-    pipeline
-        .apply(new ReadDocuments(listInputDocuments(options)))
-        .apply(new ComputeTfIdf())
-        .apply(new WriteTfIdf(options.getOutput()));
-
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
deleted file mode 100644
index c57a5f2..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessions.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableComparator;
-import com.google.cloud.dataflow.sdk.transforms.Top;
-import com.google.cloud.dataflow.sdk.transforms.windowing.CalendarWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.List;
-
-/**
- * An example that reads Wikipedia edit data from Cloud Storage and computes the user with
- * the longest string of edits separated by no more than an hour within each month.
- *
- * <p>Concepts: Using Windowing to perform time-based aggregations of data.
- *
- * <p>It is not recommended to execute this pipeline locally, given the size of the default input
- * data.
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
- *
- * <p>The default input is {@code gs://dataflow-samples/wikipedia_edits/*.json} and can be
- * overridden with {@code --input}.
- *
- * <p>The input for this example is large enough that it's a good place to enable (experimental)
- * autoscaling:
- * <pre>{@code
- *   --autoscalingAlgorithm=BASIC
- *   --maxNumWorkers=20
- * }
- * </pre>
- * This will automatically scale the number of workers up over time until the job completes.
- */
-public class TopWikipediaSessions {
-  private static final String EXPORTED_WIKI_TABLE = "gs://dataflow-samples/wikipedia_edits/*.json";
-
-  /**
-   * Extracts user and timestamp from a TableRow representing a Wikipedia edit.
-   */
-  static class ExtractUserAndTimestamp extends DoFn<TableRow, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = c.element();
-      int timestamp = (Integer) row.get("timestamp");
-      String userName = (String) row.get("contributor_username");
-      if (userName != null) {
-        // Sets the implicit timestamp field to be used in windowing.
-        c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
-      }
-    }
-  }
-
-  /**
-   * Computes the number of edits in each user session.  A session is defined as
-   * a string of edits where each is separated from the next by less than an hour.
-   */
-  static class ComputeSessions
-      extends PTransform<PCollection<String>, PCollection<KV<String, Long>>> {
-    @Override
-    public PCollection<KV<String, Long>> apply(PCollection<String> actions) {
-      return actions
-          .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardHours(1))))
-
-          .apply(Count.<String>perElement());
-    }
-  }
-
-  /**
-   * Computes the longest session ending in each month.
-   */
-  private static class TopPerMonth
-      extends PTransform<PCollection<KV<String, Long>>, PCollection<List<KV<String, Long>>>> {
-    @Override
-    public PCollection<List<KV<String, Long>>> apply(PCollection<KV<String, Long>> sessions) {
-      return sessions
-        .apply(Window.<KV<String, Long>>into(CalendarWindows.months(1)))
-
-          .apply(Top.of(1, new SerializableComparator<KV<String, Long>>() {
-                    @Override
-                    public int compare(KV<String, Long> o1, KV<String, Long> o2) {
-                      return Long.compare(o1.getValue(), o2.getValue());
-                    }
-                  }).withoutDefaults());
-    }
-  }
-
-  static class SessionsToStringsDoFn extends DoFn<KV<String, Long>, KV<String, Long>>
-      implements RequiresWindowAccess {
-
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(KV.of(
-          c.element().getKey() + " : " + c.window(), c.element().getValue()));
-    }
-  }
-
-  static class FormatOutputDoFn extends DoFn<List<KV<String, Long>>, String>
-      implements RequiresWindowAccess {
-    @Override
-    public void processElement(ProcessContext c) {
-      for (KV<String, Long> item : c.element()) {
-        String session = item.getKey();
-        long count = item.getValue();
-        c.output(session + " : " + count + " : " + ((IntervalWindow) c.window()).start());
-      }
-    }
-  }
-
-  static class ComputeTopSessions extends PTransform<PCollection<TableRow>, PCollection<String>> {
-
-    private final double samplingThreshold;
-
-    public ComputeTopSessions(double samplingThreshold) {
-      this.samplingThreshold = samplingThreshold;
-    }
-
-    @Override
-    public PCollection<String> apply(PCollection<TableRow> input) {
-      return input
-          .apply(ParDo.of(new ExtractUserAndTimestamp()))
-
-          .apply(ParDo.named("SampleUsers").of(
-              new DoFn<String, String>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  if (Math.abs(c.element().hashCode()) <= Integer.MAX_VALUE * samplingThreshold) {
-                    c.output(c.element());
-                  }
-                }
-              }))
-
-          .apply(new ComputeSessions())
-
-          .apply(ParDo.named("SessionsToStrings").of(new SessionsToStringsDoFn()))
-          .apply(new TopPerMonth())
-          .apply(ParDo.named("FormatOutput").of(new FormatOutputDoFn()));
-    }
-  }
-
-  /**
-   * Options supported by this class.
-   *
-   * <p>Inherits standard Dataflow configuration options.
-   */
-  private static interface Options extends PipelineOptions {
-    @Description(
-      "Input specified as a GCS path containing a BigQuery table exported as json")
-    @Default.String(EXPORTED_WIKI_TABLE)
-    String getInput();
-    void setInput(String value);
-
-    @Description("File to output results to")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  public static void main(String[] args) {
-    Options options = PipelineOptionsFactory.fromArgs(args)
-        .withValidation()
-        .as(Options.class);
-    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-
-    Pipeline p = Pipeline.create(dataflowOptions);
-
-    double samplingThreshold = 0.1;
-
-    p.apply(TextIO.Read
-        .from(options.getInput())
-        .withCoder(TableRowJsonCoder.of()))
-     .apply(new ComputeTopSessions(samplingThreshold))
-     .apply(TextIO.Write.named("Write").withoutSharding().to(options.getOutput()));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java b/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
deleted file mode 100644
index 2d54252..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/complete/TrafficMaxLaneFlow.java
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicAndSubscriptionOptions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Strings;
-
-import org.apache.avro.reflect.Nullable;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * A Dataflow Example that runs in both batch and streaming modes with traffic sensor data.
- * You can configure the running mode by setting {@literal --streaming} to true or false.
- *
- * <p>Concepts: The batch and streaming runners, sliding windows, Google Cloud Pub/Sub
- * topic injection, use of the AvroCoder to encode a custom class, and custom Combine transforms.
- *
- * <p>This example analyzes traffic sensor data using SlidingWindows. For each window,
- * it finds the lane that had the highest flow recorded, for each sensor station. It writes
- * those max values along with auxiliary info to a BigQuery table.
- *
- * <p>In batch mode, the pipeline reads traffic sensor data from {@literal --inputFile}.
- *
- * <p>In streaming mode, the pipeline reads the data from a Pub/Sub topic.
- * By default, the example will run a separate pipeline to inject the data from the default
- * {@literal --inputFile} to the Pub/Sub {@literal --pubsubTopic}. It will make it available for
- * the streaming pipeline to process. You may override the default {@literal --inputFile} with the
- * file of your choosing. You may also set {@literal --inputFile} to an empty string, which will
- * disable the automatic Pub/Sub injection, and allow you to use separate tool to control the input
- * to this example. An example code, which publishes traffic sensor data to a Pub/Sub topic,
- * is provided in
- * <a href="https://github.com/GoogleCloudPlatform/cloud-pubsub-samples-python/tree/master/gce-cmdline-publisher"></a>.
- *
- * <p>The example is configured to use the default Pub/Sub topic and the default BigQuery table
- * from the example common package (there are no defaults for a general Dataflow pipeline).
- * You can override them by using the {@literal --pubsubTopic}, {@literal --bigQueryDataset}, and
- * {@literal --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
- * the example will try to create them.
- *
- * <p>The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
- * and then exits.
- */
-public class TrafficMaxLaneFlow {
-
-  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
-  private static final Integer VALID_INPUTS = 4999;
-
-  static final int WINDOW_DURATION = 60;  // Default sliding window duration in minutes
-  static final int WINDOW_SLIDE_EVERY = 5;  // Default window 'slide every' setting in minutes
-
-  /**
-   * This class holds information about each lane in a station reading, along with some general
-   * information from the reading.
-   */
-  @DefaultCoder(AvroCoder.class)
-  static class LaneInfo {
-    @Nullable String stationId;
-    @Nullable String lane;
-    @Nullable String direction;
-    @Nullable String freeway;
-    @Nullable String recordedTimestamp;
-    @Nullable Integer laneFlow;
-    @Nullable Integer totalFlow;
-    @Nullable Double laneAO;
-    @Nullable Double laneAS;
-
-    public LaneInfo() {}
-
-    public LaneInfo(String stationId, String lane, String direction, String freeway,
-        String timestamp, Integer laneFlow, Double laneAO,
-        Double laneAS, Integer totalFlow) {
-      this.stationId = stationId;
-      this.lane = lane;
-      this.direction = direction;
-      this.freeway = freeway;
-      this.recordedTimestamp = timestamp;
-      this.laneFlow = laneFlow;
-      this.laneAO = laneAO;
-      this.laneAS = laneAS;
-      this.totalFlow = totalFlow;
-    }
-
-    public String getStationId() {
-      return this.stationId;
-    }
-    public String getLane() {
-      return this.lane;
-    }
-    public String getDirection() {
-      return this.direction;
-    }
-    public String getFreeway() {
-      return this.freeway;
-    }
-    public String getRecordedTimestamp() {
-      return this.recordedTimestamp;
-    }
-    public Integer getLaneFlow() {
-      return this.laneFlow;
-    }
-    public Double getLaneAO() {
-      return this.laneAO;
-    }
-    public Double getLaneAS() {
-      return this.laneAS;
-    }
-    public Integer getTotalFlow() {
-      return this.totalFlow;
-    }
-  }
-
-  /**
-   * Extract the timestamp field from the input string, and use it as the element timestamp.
-   */
-  static class ExtractTimestamps extends DoFn<String, String> {
-    private static final DateTimeFormatter dateTimeFormat =
-        DateTimeFormat.forPattern("MM/dd/yyyy HH:mm:ss");
-
-    @Override
-    public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
-      String[] items = c.element().split(",");
-      if (items.length > 0) {
-        try {
-          String timestamp = items[0];
-          c.outputWithTimestamp(c.element(), new Instant(dateTimeFormat.parseMillis(timestamp)));
-        } catch (IllegalArgumentException e) {
-          // Skip the invalid input.
-        }
-      }
-    }
-  }
-
-  /**
-   * Extract flow information for each of the 8 lanes in a reading, and output as separate tuples.
-   * This will let us determine which lane has the max flow for that station over the span of the
-   * window, and output not only the max flow from that calculation, but other associated
-   * information. The number of lanes for which data is present depends upon which freeway the data
-   * point comes from.
-   */
-  static class ExtractFlowInfoFn extends DoFn<String, KV<String, LaneInfo>> {
-
-    @Override
-    public void processElement(ProcessContext c) {
-      String[] items = c.element().split(",");
-      if (items.length < 48) {
-        // Skip the invalid input.
-        return;
-      }
-      // extract the sensor information for the lanes from the input string fields.
-      String timestamp = items[0];
-      String stationId = items[1];
-      String freeway = items[2];
-      String direction = items[3];
-      Integer totalFlow = tryIntParse(items[7]);
-      for (int i = 1; i <= 8; ++i) {
-        Integer laneFlow = tryIntParse(items[6 + 5 * i]);
-        Double laneAvgOccupancy = tryDoubleParse(items[7 + 5 * i]);
-        Double laneAvgSpeed = tryDoubleParse(items[8 + 5 * i]);
-        if (laneFlow == null || laneAvgOccupancy == null || laneAvgSpeed == null) {
-          return;
-        }
-        LaneInfo laneInfo = new LaneInfo(stationId, "lane" + i, direction, freeway, timestamp,
-            laneFlow, laneAvgOccupancy, laneAvgSpeed, totalFlow);
-        c.output(KV.of(stationId, laneInfo));
-      }
-    }
-  }
-
-  /**
-   * A custom 'combine function' used with the Combine.perKey transform. Used to find the max lane
-   * flow over all the data points in the Window. Extracts the lane flow from the input string and
-   * determines whether it's the max seen so far. We're using a custom combiner instead of the Max
-   * transform because we want to retain the additional information we've associated with the flow
-   * value.
-   */
-  public static class MaxFlow implements SerializableFunction<Iterable<LaneInfo>, LaneInfo> {
-    @Override
-    public LaneInfo apply(Iterable<LaneInfo> input) {
-      Integer max = 0;
-      LaneInfo maxInfo = new LaneInfo();
-      for (LaneInfo item : input) {
-        Integer flow = item.getLaneFlow();
-        if (flow != null && (flow >= max)) {
-          max = flow;
-          maxInfo = item;
-        }
-      }
-      return maxInfo;
-    }
-  }
-
-  /**
-   * Format the results of the Max Lane flow calculation to a TableRow, to save to BigQuery.
-   * Add the timestamp from the window context.
-   */
-  static class FormatMaxesFn extends DoFn<KV<String, LaneInfo>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-
-      LaneInfo laneInfo = c.element().getValue();
-      TableRow row = new TableRow()
-          .set("station_id", c.element().getKey())
-          .set("direction", laneInfo.getDirection())
-          .set("freeway", laneInfo.getFreeway())
-          .set("lane_max_flow", laneInfo.getLaneFlow())
-          .set("lane", laneInfo.getLane())
-          .set("avg_occ", laneInfo.getLaneAO())
-          .set("avg_speed", laneInfo.getLaneAS())
-          .set("total_flow", laneInfo.getTotalFlow())
-          .set("recorded_timestamp", laneInfo.getRecordedTimestamp())
-          .set("window_timestamp", c.timestamp().toString());
-      c.output(row);
-    }
-
-    /** Defines the BigQuery schema used for the output. */
-    static TableSchema getSchema() {
-      List<TableFieldSchema> fields = new ArrayList<>();
-      fields.add(new TableFieldSchema().setName("station_id").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("direction").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("freeway").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("lane_max_flow").setType("INTEGER"));
-      fields.add(new TableFieldSchema().setName("lane").setType("STRING"));
-      fields.add(new TableFieldSchema().setName("avg_occ").setType("FLOAT"));
-      fields.add(new TableFieldSchema().setName("avg_speed").setType("FLOAT"));
-      fields.add(new TableFieldSchema().setName("total_flow").setType("INTEGER"));
-      fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
-      fields.add(new TableFieldSchema().setName("recorded_timestamp").setType("STRING"));
-      TableSchema schema = new TableSchema().setFields(fields);
-      return schema;
-    }
-  }
-
-  /**
-   * This PTransform extracts lane info, calculates the max lane flow found for a given station (for
-   * the current Window) using a custom 'combiner', and formats the results for BigQuery.
-   */
-  static class MaxLaneFlow
-      extends PTransform<PCollection<KV<String, LaneInfo>>, PCollection<TableRow>> {
-    @Override
-    public PCollection<TableRow> apply(PCollection<KV<String, LaneInfo>> flowInfo) {
-      // stationId, LaneInfo => stationId + max lane flow info
-      PCollection<KV<String, LaneInfo>> flowMaxes =
-          flowInfo.apply(Combine.<String, LaneInfo>perKey(
-              new MaxFlow()));
-
-      // <stationId, max lane flow info>... => row...
-      PCollection<TableRow> results = flowMaxes.apply(
-          ParDo.of(new FormatMaxesFn()));
-
-      return results;
-    }
-  }
-
-  static class ReadFileAndExtractTimestamps extends PTransform<PBegin, PCollection<String>> {
-    private final String inputFile;
-
-    public ReadFileAndExtractTimestamps(String inputFile) {
-      this.inputFile = inputFile;
-    }
-
-    @Override
-    public PCollection<String> apply(PBegin begin) {
-      return begin
-          .apply(TextIO.Read.from(inputFile))
-          .apply(ParDo.of(new ExtractTimestamps()));
-    }
-  }
-
-  /**
-    * Options supported by {@link TrafficMaxLaneFlow}.
-    *
-    * <p>Inherits standard configuration options.
-    */
-  private interface TrafficMaxLaneFlowOptions extends DataflowExampleOptions,
-      ExamplePubsubTopicAndSubscriptionOptions, ExampleBigQueryTableOptions {
-        @Description("Input file to inject to Pub/Sub topic")
-    @Default.String("gs://dataflow-samples/traffic_sensor/"
-        + "Freeways-5Minaa2010-01-01_to_2010-02-15_test2.csv")
-    String getInputFile();
-    void setInputFile(String value);
-
-    @Description("Numeric value of sliding window duration, in minutes")
-    @Default.Integer(WINDOW_DURATION)
-    Integer getWindowDuration();
-    void setWindowDuration(Integer value);
-
-    @Description("Numeric value of window 'slide every' setting, in minutes")
-    @Default.Integer(WINDOW_SLIDE_EVERY)
-    Integer getWindowSlideEvery();
-    void setWindowSlideEvery(Integer value);
-
-    @Description("Whether to run the pipeline with unbounded input")
-    @Default.Boolean(false)
-    boolean isUnbounded();
-    void setUnbounded(boolean value);
-  }
-
-  /**
-   * Sets up and starts streaming pipeline.
-   *
-   * @throws IOException if there is a problem setting up resources
-   */
-  public static void main(String[] args) throws IOException {
-    TrafficMaxLaneFlowOptions options = PipelineOptionsFactory.fromArgs(args)
-        .withValidation()
-        .as(TrafficMaxLaneFlowOptions.class);
-    options.setBigQuerySchema(FormatMaxesFn.getSchema());
-    // Using DataflowExampleUtils to set up required resources.
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options, options.isUnbounded());
-
-    Pipeline pipeline = Pipeline.create(options);
-    TableReference tableRef = new TableReference();
-    tableRef.setProjectId(options.getProject());
-    tableRef.setDatasetId(options.getBigQueryDataset());
-    tableRef.setTableId(options.getBigQueryTable());
-
-    PCollection<String> input;
-    if (options.isUnbounded()) {
-      // Read unbounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()));
-    } else {
-      // Read bounded PubSubIO.
-      input = pipeline.apply(PubsubIO.Read
-          .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-          .subscription(options.getPubsubSubscription()).maxNumRecords(VALID_INPUTS));
-
-      // To read bounded TextIO files, use:
-      // input = pipeline.apply(new ReadFileAndExtractTimestamps(options.getInputFile()));
-    }
-    input
-        // row... => <station route, station speed> ...
-        .apply(ParDo.of(new ExtractFlowInfoFn()))
-        // map the incoming data stream into sliding windows. The default window duration values
-        // work well if you're running the accompanying Pub/Sub generator script with the
-        // --replay flag, which simulates pauses in the sensor data publication. You may want to
-        // adjust them otherwise.
-        .apply(Window.<KV<String, LaneInfo>>into(SlidingWindows.of(
-            Duration.standardMinutes(options.getWindowDuration())).
-            every(Duration.standardMinutes(options.getWindowSlideEvery()))))
-        .apply(new MaxLaneFlow())
-        .apply(BigQueryIO.Write.to(tableRef)
-            .withSchema(FormatMaxesFn.getSchema()));
-
-    // Inject the data into the Pub/Sub topic with a Dataflow batch pipeline.
-    if (!Strings.isNullOrEmpty(options.getInputFile())
-        && !Strings.isNullOrEmpty(options.getPubsubTopic())) {
-      dataflowUtils.runInjectorPipeline(
-          new ReadFileAndExtractTimestamps(options.getInputFile()),
-          options.getPubsubTopic(),
-          PUBSUB_TIMESTAMP_LABEL_KEY);
-    }
-
-    // Run the pipeline.
-    PipelineResult result = pipeline.run();
-
-    // dataflowUtils will try to cancel the pipeline and the injector before the program exists.
-    dataflowUtils.waitToFinish(result);
-  }
-
-  private static Integer tryIntParse(String number) {
-    try {
-      return Integer.parseInt(number);
-    } catch (NumberFormatException e) {
-      return null;
-    }
-  }
-
-  private static Double tryDoubleParse(String number) {
-    try {
-      return Double.parseDouble(number);
-    } catch (NumberFormatException e) {
-      return null;
-    }
-  }
-}

[22/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
deleted file mode 100644
index 4f131ad..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnWithContext.java
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.DelegatingAggregator;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.io.Serializable;
-import java.lang.annotation.Documented;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * The argument to {@link ParDo} providing the code to use to process
- * elements of the input
- * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
- *
- * <p>See {@link ParDo} for more explanation, examples of use, and
- * discussion of constraints on {@code DoFnWithContext}s, including their
- * serializability, lack of access to global shared mutable state,
- * requirements for failure tolerance, and benefits of optimization.
- *
- * <p>{@code DoFnWithContext}s can be tested in a particular
- * {@code Pipeline} by running that {@code Pipeline} on sample input
- * and then checking its output.  Unit testing of a {@code DoFnWithContext},
- * separately from any {@code ParDo} transform or {@code Pipeline},
- * can be done via the {@link DoFnTester} harness.
- *
- * <p>Implementations must define a method annotated with {@link ProcessElement}
- * that satisfies the requirements described there. See the {@link ProcessElement}
- * for details.
- *
- * <p>This functionality is experimental and likely to change.
- *
- * <p>Example usage:
- *
- * <pre> {@code
- * PCollection<String> lines = ... ;
- * PCollection<String> words =
- *     lines.apply(ParDo.of(new DoFnWithContext<String, String>() {
- *         @ProcessElement
- *         public void processElement(ProcessContext c, BoundedWindow window) {
- *
- *         }}));
- * } </pre>
- *
- * @param <InputT> the type of the (main) input elements
- * @param <OutputT> the type of the (main) output elements
- */
-@Experimental
-public abstract class DoFnWithContext<InputT, OutputT> implements Serializable {
-
-  /** Information accessible to all methods in this {@code DoFnWithContext}. */
-  public abstract class Context {
-
-    /**
-     * Returns the {@code PipelineOptions} specified with the
-     * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
-     * invoking this {@code DoFnWithContext}.  The {@code PipelineOptions} will
-     * be the default running via {@link DoFnTester}.
-     */
-    public abstract PipelineOptions getPipelineOptions();
-
-    /**
-     * Adds the given element to the main output {@code PCollection}.
-     *
-     * <p>Once passed to {@code output} the element should not be modified in
-     * any way.
-     *
-     * <p>If invoked from {@link ProcessElement}, the output
-     * element will have the same timestamp and be in the same windows
-     * as the input element passed to the method annotated with
-     * {@code @ProcessElement}.
-     *
-     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element. The output element
-     * will have a timestamp of negative infinity.
-     */
-    public abstract void output(OutputT output);
-
-    /**
-     * Adds the given element to the main output {@code PCollection},
-     * with the given timestamp.
-     *
-     * <p>Once passed to {@code outputWithTimestamp} the element should not be
-     * modified in any way.
-     *
-     * <p>If invoked from {@link ProcessElement}), the timestamp
-     * must not be older than the input element's timestamp minus
-     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
-     * be in the same windows as the input element.
-     *
-     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element except for the
-     * timestamp.
-     */
-    public abstract void outputWithTimestamp(OutputT output, Instant timestamp);
-
-    /**
-     * Adds the given element to the side output {@code PCollection} with the
-     * given tag.
-     *
-     * <p>Once passed to {@code sideOutput} the element should not be modified
-     * in any way.
-     *
-     * <p>The caller of {@code ParDo} uses {@link ParDo#withOutputTags} to
-     * specify the tags of side outputs that it consumes. Non-consumed side
-     * outputs, e.g., outputs for monitoring purposes only, don't necessarily
-     * need to be specified.
-     *
-     * <p>The output element will have the same timestamp and be in the same
-     * windows as the input element passed to {@link ProcessElement}).
-     *
-     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element. The output element
-     * will have a timestamp of negative infinity.
-     *
-     * @see ParDo#withOutputTags
-     */
-    public abstract <T> void sideOutput(TupleTag<T> tag, T output);
-
-    /**
-     * Adds the given element to the specified side output {@code PCollection},
-     * with the given timestamp.
-     *
-     * <p>Once passed to {@code sideOutputWithTimestamp} the element should not be
-     * modified in any way.
-     *
-     * <p>If invoked from {@link ProcessElement}), the timestamp
-     * must not be older than the input element's timestamp minus
-     * {@link DoFn#getAllowedTimestampSkew}.  The output element will
-     * be in the same windows as the input element.
-     *
-     * <p>If invoked from {@link StartBundle} or {@link FinishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element except for the
-     * timestamp.
-     *
-     * @see ParDo#withOutputTags
-     */
-    public abstract <T> void sideOutputWithTimestamp(
-        TupleTag<T> tag, T output, Instant timestamp);
-  }
-
-  /**
-   * Information accessible when running {@link DoFn#processElement}.
-   */
-  public abstract class ProcessContext extends Context {
-
-    /**
-     * Returns the input element to be processed.
-     *
-     * <p>The element will not be changed -- it is safe to cache, etc.
-     * without copying.
-     */
-    public abstract InputT element();
-
-
-    /**
-     * Returns the value of the side input.
-     *
-     * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInputs
-     */
-    public abstract <T> T sideInput(PCollectionView<T> view);
-
-    /**
-     * Returns the timestamp of the input element.
-     *
-     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
-     * for more information.
-     */
-    public abstract Instant timestamp();
-
-    /**
-     * Returns information about the pane within this window into which the
-     * input element has been assigned.
-     *
-     * <p>Generally all data is in a single, uninteresting pane unless custom
-     * triggering and/or late data has been explicitly requested.
-     * See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
-     * for more information.
-     */
-    public abstract PaneInfo pane();
-  }
-
-  /**
-   * Returns the allowed timestamp skew duration, which is the maximum
-   * duration that timestamps can be shifted backward in
-   * {@link DoFnWithContext.Context#outputWithTimestamp}.
-   *
-   * <p>The default value is {@code Duration.ZERO}, in which case
-   * timestamps can only be shifted forward to future.  For infinite
-   * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
-   */
-  public Duration getAllowedTimestampSkew() {
-    return Duration.ZERO;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  Map<String, DelegatingAggregator<?, ?>> aggregators = new HashMap<>();
-
-  /**
-   * Protects aggregators from being created after initialization.
-   */
-  private boolean aggregatorsAreFinal;
-
-  /**
-   * Returns a {@link TypeDescriptor} capturing what is known statically
-   * about the input type of this {@code DoFnWithContext} instance's most-derived
-   * class.
-   *
-   * <p>See {@link #getOutputTypeDescriptor} for more discussion.
-   */
-  protected TypeDescriptor<InputT> getInputTypeDescriptor() {
-    return new TypeDescriptor<InputT>(getClass()) {};
-  }
-
-  /**
-   * Returns a {@link TypeDescriptor} capturing what is known statically
-   * about the output type of this {@code DoFnWithContext} instance's
-   * most-derived class.
-   *
-   * <p>In the normal case of a concrete {@code DoFnWithContext} subclass with
-   * no generic type parameters of its own (including anonymous inner
-   * classes), this will be a complete non-generic type, which is good
-   * for choosing a default output {@code Coder<O>} for the output
-   * {@code PCollection<O>}.
-   */
-  protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
-    return new TypeDescriptor<OutputT>(getClass()) {};
-  }
-
-  /**
-   * Interface for runner implementors to provide implementations of extra context information.
-   *
-   * <p>The methods on this interface are called by {@link DoFnReflector} before invoking an
-   * annotated {@link StartBundle}, {@link ProcessElement} or {@link FinishBundle} method that
-   * has indicated it needs the given extra context.
-   *
-   * <p>In the case of {@link ProcessElement} it is called once per invocation of
-   * {@link ProcessElement}.
-   */
-  public interface ExtraContextFactory<InputT, OutputT> {
-    /**
-     * Construct the {@link BoundedWindow} to use within a {@link DoFnWithContext} that
-     * needs it. This is called if the {@link ProcessElement} method has a parameter of type
-     * {@link BoundedWindow}.
-     *
-     * @return {@link BoundedWindow} of the element currently being processed.
-     */
-    BoundedWindow window();
-
-    /**
-     * Construct the {@link WindowingInternals} to use within a {@link DoFnWithContext} that
-     * needs it. This is called if the {@link ProcessElement} method has a parameter of type
-     * {@link WindowingInternals}.
-     */
-    WindowingInternals<InputT, OutputT> windowingInternals();
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Annotation for the method to use to prepare an instance for processing a batch of elements.
-   * The method annotated with this must satisfy the following constraints:
-   * <ul>
-   *   <li>It must have at least one argument.
-   *   <li>Its first (and only) argument must be a {@link DoFnWithContext.Context}.
-   * </ul>
-   */
-  @Documented
-  @Retention(RetentionPolicy.RUNTIME)
-  @Target(ElementType.METHOD)
-  public @interface StartBundle {}
-
-  /**
-   * Annotation for the method to use for processing elements. A subclass of
-   * {@link DoFnWithContext} must have a method with this annotation satisfying
-   * the following constraints in order for it to be executable:
-   * <ul>
-   *   <li>It must have at least one argument.
-   *   <li>Its first argument must be a {@link DoFnWithContext.ProcessContext}.
-   *   <li>Its remaining arguments must be {@link BoundedWindow}, or
-   *   {@link WindowingInternals WindowingInternals&lt;InputT, OutputT&gt;}.
-   * </ul>
-   */
-  @Documented
-  @Retention(RetentionPolicy.RUNTIME)
-  @Target(ElementType.METHOD)
-  public @interface ProcessElement {}
-
-  /**
-   * Annotation for the method to use to prepare an instance for processing a batch of elements.
-   * The method annotated with this must satisfy the following constraints:
-   * <ul>
-   *   <li>It must have at least one argument.
-   *   <li>Its first (and only) argument must be a {@link DoFnWithContext.Context}.
-   * </ul>
-   */
-  @Documented
-  @Retention(RetentionPolicy.RUNTIME)
-  @Target(ElementType.METHOD)
-  public @interface FinishBundle {}
-
-  /**
-   * Returns an {@link Aggregator} with aggregation logic specified by the
-   * {@link CombineFn} argument. The name provided must be unique across
-   * {@link Aggregator}s created within the DoFn. Aggregators can only be created
-   * during pipeline construction.
-   *
-   * @param name the name of the aggregator
-   * @param combiner the {@link CombineFn} to use in the aggregator
-   * @return an aggregator for the provided name and combiner in the scope of
-   *         this DoFn
-   * @throws NullPointerException if the name or combiner is null
-   * @throws IllegalArgumentException if the given name collides with another
-   *         aggregator in this scope
-   * @throws IllegalStateException if called during pipeline execution.
-   */
-  public final <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
-      createAggregator(String name, Combine.CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
-    checkNotNull(name, "name cannot be null");
-    checkNotNull(combiner, "combiner cannot be null");
-    checkArgument(!aggregators.containsKey(name),
-        "Cannot create aggregator with name %s."
-        + " An Aggregator with that name already exists within this scope.",
-        name);
-    checkState(!aggregatorsAreFinal,
-        "Cannot create an aggregator during pipeline execution."
-        + " Aggregators should be registered during pipeline construction.");
-
-    DelegatingAggregator<AggInputT, AggOutputT> aggregator =
-        new DelegatingAggregator<>(name, combiner);
-    aggregators.put(name, aggregator);
-    return aggregator;
-  }
-
-  /**
-   * Returns an {@link Aggregator} with the aggregation logic specified by the
-   * {@link SerializableFunction} argument. The name provided must be unique
-   * across {@link Aggregator}s created within the DoFn. Aggregators can only be
-   * created during pipeline construction.
-   *
-   * @param name the name of the aggregator
-   * @param combiner the {@link SerializableFunction} to use in the aggregator
-   * @return an aggregator for the provided name and combiner in the scope of
-   *         this DoFn
-   * @throws NullPointerException if the name or combiner is null
-   * @throws IllegalArgumentException if the given name collides with another
-   *         aggregator in this scope
-   * @throws IllegalStateException if called during pipeline execution.
-   */
-  public final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(
-      String name, SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
-    checkNotNull(combiner, "combiner cannot be null.");
-    return createAggregator(name, Combine.IterableCombineFn.of(combiner));
-  }
-
-  /**
-   * Finalize the {@link DoFnWithContext} construction to prepare for processing.
-   * This method should be called by runners before any processing methods.
-   */
-  void prepareForProcessing() {
-    aggregatorsAreFinal = true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
deleted file mode 100644
index 9e123a1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Filter.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-/**
- * {@code PTransform}s for filtering from a {@code PCollection} the
- * elements satisfying a predicate, or satisfying an inequality with
- * a given value based on the elements' natural ordering.
- *
- * @param <T> the type of the values in the input {@code PCollection},
- * and the type of the elements in the output {@code PCollection}
- */
-public class Filter<T> extends PTransform<PCollection<T>, PCollection<T>> {
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
-   * elements that satisfy the given predicate.  The predicate must be
-   * a {@code SerializableFunction<T, Boolean>}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<String> wordList = ...;
-   * PCollection<String> longWords =
-   *     wordList.apply(Filter.byPredicate(new MatchIfWordLengthGT(6)));
-   * } </pre>
-   *
-   * <p>See also {@link #lessThan}, {@link #lessThanEq},
-   * {@link #greaterThan}, {@link #greaterThanEq}, which return elements
-   * satisfying various inequalities with the specified value based on
-   * the elements' natural ordering.
-   */
-  public static <T, PredicateT extends SerializableFunction<T, Boolean>> Filter<T>
-  byPredicate(PredicateT predicate) {
-    return new Filter<T>("Filter", predicate);
-  }
-
-  /**
-   * @deprecated use {@link #byPredicate}, which returns a {@link Filter} transform instead of
-   * a {@link ParDo.Bound}.
-   */
-  @Deprecated
-  public static <T, PredicateT extends SerializableFunction<T, Boolean>> ParDo.Bound<T, T>
-  by(final PredicateT filterPred) {
-    return ParDo.named("Filter").of(new DoFn<T, T>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        if (filterPred.apply(c.element()) == true) {
-          c.output(c.element());
-        }
-      }
-    });
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@link PCollection} and returns a {@link PCollection} with
-   * elements that are less than a given value, based on the
-   * elements' natural ordering. Elements must be {@code Comparable}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> listOfNumbers = ...;
-   * PCollection<Integer> smallNumbers =
-   *     listOfNumbers.apply(Filter.lessThan(10));
-   * } </pre>
-   *
-   * <p>See also {@link #lessThanEq}, {@link #greaterThanEq},
-   * and {@link #greaterThan}, which return elements satisfying various
-   * inequalities with the specified value based on the elements'
-   * natural ordering.
-   *
-   * <p>See also {@link #byPredicate}, which returns elements
-   * that satisfy the given predicate.
-   */
-  public static <T extends Comparable<T>> ParDo.Bound<T, T> lessThan(final T value) {
-    return ParDo.named("Filter.lessThan").of(new DoFn<T, T>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        if (c.element().compareTo(value) < 0) {
-          c.output(c.element());
-        }
-      }
-    });
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
-   * elements that are greater than a given value, based on the
-   * elements' natural ordering. Elements must be {@code Comparable}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> listOfNumbers = ...;
-   * PCollection<Integer> largeNumbers =
-   *     listOfNumbers.apply(Filter.greaterThan(1000));
-   * } </pre>
-   *
-   * <p>See also {@link #greaterThanEq}, {@link #lessThan},
-   * and {@link #lessThanEq}, which return elements satisfying various
-   * inequalities with the specified value based on the elements'
-   * natural ordering.
-   *
-   * <p>See also {@link #byPredicate}, which returns elements
-   * that satisfy the given predicate.
-   */
-  public static <T extends Comparable<T>> ParDo.Bound<T, T> greaterThan(final T value) {
-    return ParDo.named("Filter.greaterThan").of(new DoFn<T, T>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        if (c.element().compareTo(value) > 0) {
-          c.output(c.element());
-        }
-      }
-    });
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
-   * elements that are less than or equal to a given value, based on the
-   * elements' natural ordering. Elements must be {@code Comparable}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> listOfNumbers = ...;
-   * PCollection<Integer> smallOrEqualNumbers =
-   *     listOfNumbers.apply(Filter.lessThanEq(10));
-   * } </pre>
-   *
-   * <p>See also {@link #lessThan}, {@link #greaterThanEq},
-   * and {@link #greaterThan}, which return elements satisfying various
-   * inequalities with the specified value based on the elements'
-   * natural ordering.
-   *
-   * <p>See also {@link #byPredicate}, which returns elements
-   * that satisfy the given predicate.
-   */
-  public static <T extends Comparable<T>> ParDo.Bound<T, T> lessThanEq(final T value) {
-    return ParDo.named("Filter.lessThanEq").of(new DoFn<T, T>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        if (c.element().compareTo(value) <= 0) {
-          c.output(c.element());
-        }
-      }
-    });
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<T>} and returns a {@code PCollection<T>} with
-   * elements that are greater than or equal to a given value, based on
-   * the elements' natural ordering. Elements must be {@code Comparable}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> listOfNumbers = ...;
-   * PCollection<Integer> largeOrEqualNumbers =
-   *     listOfNumbers.apply(Filter.greaterThanEq(1000));
-   * } </pre>
-   *
-   * <p>See also {@link #greaterThan}, {@link #lessThan},
-   * and {@link #lessThanEq}, which return elements satisfying various
-   * inequalities with the specified value based on the elements'
-   * natural ordering.
-   *
-   * <p>See also {@link #byPredicate}, which returns elements
-   * that satisfy the given predicate.
-   */
-  public static <T extends Comparable<T>> ParDo.Bound<T, T> greaterThanEq(final T value) {
-    return ParDo.named("Filter.greaterThanEq").of(new DoFn<T, T>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        if (c.element().compareTo(value) >= 0) {
-          c.output(c.element());
-        }
-      }
-    });
-  }
-
-  ///////////////////////////////////////////////////////////////////////////////
-
-  private SerializableFunction<T, Boolean> predicate;
-
-  private Filter(SerializableFunction<T, Boolean> predicate) {
-    this.predicate = predicate;
-  }
-
-  private Filter(String name, SerializableFunction<T, Boolean> predicate) {
-    super(name);
-    this.predicate = predicate;
-  }
-
-  public Filter<T> named(String name) {
-    return new Filter<>(name, predicate);
-  }
-
-  @Override
-  public PCollection<T> apply(PCollection<T> input) {
-    PCollection<T> output = input.apply(ParDo.named("Filter").of(new DoFn<T, T>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        if (predicate.apply(c.element()) == true) {
-          c.output(c.element());
-        }
-      }
-    }));
-    return output;
-  }
-
-  @Override
-  protected Coder<T> getDefaultOutputCoder(PCollection<T> input) {
-    return input.getCoder();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
deleted file mode 100644
index fbaad5b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/FlatMapElements.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import java.lang.reflect.ParameterizedType;
-
-/**
- * {@code PTransform}s for mapping a simple function that returns iterables over the elements of a
- * {@link PCollection} and merging the results.
- */
-public class FlatMapElements<InputT, OutputT>
-extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
-  /**
-   * For a {@code SerializableFunction<InputT, ? extends Iterable<OutputT>>} {@code fn},
-   * returns a {@link PTransform} that applies {@code fn} to every element of the input
-   * {@code PCollection<InputT>} and outputs all of the elements to the output
-   * {@code PCollection<OutputT>}.
-   *
-   * <p>Example of use in Java 8:
-   * <pre>{@code
-   * PCollection<String> words = lines.apply(
-   *     FlatMapElements.via((String line) -> Arrays.asList(line.split(" ")))
-   *         .withOutputType(new TypeDescriptor<String>(){});
-   * }</pre>
-   *
-   * <p>In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type
-   * descriptor need not be provided.
-   */
-  public static <InputT, OutputT> MissingOutputTypeDescriptor<InputT, OutputT>
-  via(SerializableFunction<InputT, ? extends Iterable<OutputT>> fn) {
-    return new MissingOutputTypeDescriptor<>(fn);
-  }
-
-  /**
-   * For a {@code SimpleFunction<InputT, ? extends Iterable<OutputT>>} {@code fn},
-   * return a {@link PTransform} that applies {@code fn} to every element of the input
-   * {@code PCollection<InputT>} and outputs all of the elements to the output
-   * {@code PCollection<OutputT>}.
-   *
-   * <p>This overload is intended primarily for use in Java 7. In Java 8, the overload
-   * {@link #via(SerializableFunction)} supports use of lambda for greater concision.
-   *
-   * <p>Example of use in Java 7:
-   * <pre>{@code
-   * PCollection<String> lines = ...;
-   * PCollection<String> words = lines.apply(FlatMapElements.via(
-   *     new SimpleFunction<String, List<String>>() {
-   *       public Integer apply(String line) {
-   *         return Arrays.asList(line.split(" "));
-   *       }
-   *     });
-   * }</pre>
-   *
-   * <p>To use a Java 8 lambda, see {@link #via(SerializableFunction)}.
-   */
-  public static <InputT, OutputT> FlatMapElements<InputT, OutputT>
-  via(SimpleFunction<InputT, ? extends Iterable<OutputT>> fn) {
-
-    @SuppressWarnings({"rawtypes", "unchecked"}) // safe by static typing
-    TypeDescriptor<Iterable<?>> iterableType = (TypeDescriptor) fn.getOutputTypeDescriptor();
-
-    @SuppressWarnings("unchecked") // safe by correctness of getIterableElementType
-    TypeDescriptor<OutputT> outputType =
-        (TypeDescriptor<OutputT>) getIterableElementType(iterableType);
-
-    return new FlatMapElements<>(fn, outputType);
-  }
-
-  /**
-   * An intermediate builder for a {@link FlatMapElements} transform. To complete the transform,
-   * provide an output type descriptor to {@link MissingOutputTypeDescriptor#withOutputType}. See
-   * {@link #via(SerializableFunction)} for a full example of use.
-   */
-  public static final class MissingOutputTypeDescriptor<InputT, OutputT> {
-
-    private final SerializableFunction<InputT, ? extends Iterable<OutputT>> fn;
-
-    private MissingOutputTypeDescriptor(
-        SerializableFunction<InputT, ? extends Iterable<OutputT>> fn) {
-      this.fn = fn;
-    }
-
-    public FlatMapElements<InputT, OutputT> withOutputType(TypeDescriptor<OutputT> outputType) {
-      return new FlatMapElements<>(fn, outputType);
-    }
-  }
-
-  private static TypeDescriptor<?> getIterableElementType(
-      TypeDescriptor<Iterable<?>> iterableTypeDescriptor) {
-
-    // If a rawtype was used, the type token may be for Object, not a subtype of Iterable.
-    // In this case, we rely on static typing of the function elsewhere to ensure it is
-    // at least some kind of iterable, and grossly overapproximate the element type to be Object.
-    if (!iterableTypeDescriptor.isSubtypeOf(new TypeDescriptor<Iterable<?>>() {})) {
-      return new TypeDescriptor<Object>() {};
-    }
-
-    // Otherwise we can do the proper thing and get the actual type parameter.
-    ParameterizedType iterableType =
-        (ParameterizedType) iterableTypeDescriptor.getSupertype(Iterable.class).getType();
-    return TypeDescriptor.of(iterableType.getActualTypeArguments()[0]);
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  private final SerializableFunction<InputT, ? extends Iterable<OutputT>> fn;
-  private final transient TypeDescriptor<OutputT> outputType;
-
-  private FlatMapElements(
-      SerializableFunction<InputT, ? extends Iterable<OutputT>> fn,
-      TypeDescriptor<OutputT> outputType) {
-    this.fn = fn;
-    this.outputType = outputType;
-  }
-
-  @Override
-  public PCollection<OutputT> apply(PCollection<InputT> input) {
-    return input.apply(ParDo.named("Map").of(new DoFn<InputT, OutputT>() {
-      private static final long serialVersionUID = 0L;
-      @Override
-      public void processElement(ProcessContext c) {
-        for (OutputT element : fn.apply(c.element())) {
-          c.output(element);
-        }
-      }
-    })).setTypeDescriptorInternal(outputType);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
deleted file mode 100644
index de6add0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Flatten.java
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.IterableLikeCoder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * {@code Flatten<T>} takes multiple {@code PCollection<T>}s bundled
- * into a {@code PCollectionList<T>} and returns a single
- * {@code PCollection<T>} containing all the elements in all the input
- * {@code PCollection}s.  The name "Flatten" suggests taking a list of
- * lists and flattening them into a single list.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<String> pc1 = ...;
- * PCollection<String> pc2 = ...;
- * PCollection<String> pc3 = ...;
- * PCollectionList<String> pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
- * PCollection<String> merged = pcs.apply(Flatten.<String>pCollections());
- * } </pre>
- *
- * <p>By default, the {@code Coder} of the output {@code PCollection}
- * is the same as the {@code Coder} of the first {@code PCollection}
- * in the input {@code PCollectionList} (if the
- * {@code PCollectionList} is non-empty).
- *
- */
-public class Flatten {
-
-  /**
-   * Returns a {@link PTransform} that flattens a {@link PCollectionList}
-   * into a {@link PCollection} containing all the elements of all
-   * the {@link PCollection}s in its input.
-   *
-   * <p>All inputs must have equal {@link WindowFn}s.
-   * The output elements of {@code Flatten<T>} are in the same windows and
-   * have the same timestamps as their corresponding input elements.  The output
-   * {@code PCollection} will have the same
-   * {@link WindowFn} as all of the inputs.
-   *
-   * @param <T> the type of the elements in the input and output
-   * {@code PCollection}s.
-   */
-  public static <T> FlattenPCollectionList<T> pCollections() {
-    return new FlattenPCollectionList<>();
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes a {@code PCollection<Iterable<T>>}
-   * and returns a {@code PCollection<T>} containing all the elements from
-   * all the {@code Iterable}s.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Iterable<Integer>> pcOfIterables = ...;
-   * PCollection<Integer> pc = pcOfIterables.apply(Flatten.<Integer>iterables());
-   * } </pre>
-   *
-   * <p>By default, the output {@code PCollection} encodes its elements
-   * using the same {@code Coder} that the input uses for
-   * the elements in its {@code Iterable}.
-   *
-   * @param <T> the type of the elements of the input {@code Iterable} and
-   * the output {@code PCollection}
-   */
-  public static <T> FlattenIterables<T> iterables() {
-    return new FlattenIterables<>();
-  }
-
-  /**
-   * A {@link PTransform} that flattens a {@link PCollectionList}
-   * into a {@link PCollection} containing all the elements of all
-   * the {@link PCollection}s in its input.
-   * Implements {@link #pCollections}.
-   *
-   * @param <T> the type of the elements in the input and output
-   * {@code PCollection}s.
-   */
-  public static class FlattenPCollectionList<T>
-      extends PTransform<PCollectionList<T>, PCollection<T>> {
-
-    private FlattenPCollectionList() { }
-
-    @Override
-    public PCollection<T> apply(PCollectionList<T> inputs) {
-      WindowingStrategy<?, ?> windowingStrategy;
-      IsBounded isBounded = IsBounded.BOUNDED;
-      if (!inputs.getAll().isEmpty()) {
-        windowingStrategy = inputs.get(0).getWindowingStrategy();
-        for (PCollection<?> input : inputs.getAll()) {
-          WindowingStrategy<?, ?> other = input.getWindowingStrategy();
-          if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) {
-            throw new IllegalStateException(
-                "Inputs to Flatten had incompatible window windowFns: "
-                + windowingStrategy.getWindowFn() + ", " + other.getWindowFn());
-          }
-
-          if (!windowingStrategy.getTrigger().getSpec()
-              .isCompatible(other.getTrigger().getSpec())) {
-            throw new IllegalStateException(
-                "Inputs to Flatten had incompatible triggers: "
-                + windowingStrategy.getTrigger() + ", " + other.getTrigger());
-          }
-          isBounded = isBounded.and(input.isBounded());
-        }
-      } else {
-        windowingStrategy = WindowingStrategy.globalDefault();
-      }
-
-      return PCollection.<T>createPrimitiveOutputInternal(
-          inputs.getPipeline(),
-          windowingStrategy,
-          isBounded);
-    }
-
-    @Override
-    protected Coder<?> getDefaultOutputCoder(PCollectionList<T> input)
-        throws CannotProvideCoderException {
-
-      // Take coder from first collection
-      for (PCollection<T> pCollection : input.getAll()) {
-        return pCollection.getCoder();
-      }
-
-      // No inputs
-      throw new CannotProvideCoderException(
-          this.getClass().getSimpleName() + " cannot provide a Coder for"
-          + " empty " + PCollectionList.class.getSimpleName());
-    }
-  }
-
-  /**
-   * {@code FlattenIterables<T>} takes a {@code PCollection<Iterable<T>>} and returns a
-   * {@code PCollection<T>} that contains all the elements from each iterable.
-   * Implements {@link #iterables}.
-   *
-   * @param <T> the type of the elements of the input {@code Iterable}s and
-   * the output {@code PCollection}
-   */
-  public static class FlattenIterables<T>
-      extends PTransform<PCollection<? extends Iterable<T>>, PCollection<T>> {
-
-    @Override
-    public PCollection<T> apply(PCollection<? extends Iterable<T>> in) {
-      Coder<? extends Iterable<T>> inCoder = in.getCoder();
-      if (!(inCoder instanceof IterableLikeCoder)) {
-        throw new IllegalArgumentException(
-            "expecting the input Coder<Iterable> to be an IterableLikeCoder");
-      }
-      @SuppressWarnings("unchecked")
-      Coder<T> elemCoder = ((IterableLikeCoder<T, ?>) inCoder).getElemCoder();
-
-      return in.apply(ParDo.named("FlattenIterables").of(
-          new DoFn<Iterable<T>, T>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              for (T i : c.element()) {
-                c.output(i);
-              }
-            }
-          }))
-          .setCoder(elemCoder);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  static {
-    DirectPipelineRunner.registerDefaultTransformEvaluator(
-        FlattenPCollectionList.class,
-        new DirectPipelineRunner.TransformEvaluator<FlattenPCollectionList>() {
-          @Override
-          public void evaluate(
-              FlattenPCollectionList transform,
-              DirectPipelineRunner.EvaluationContext context) {
-            evaluateHelper(transform, context);
-          }
-        });
-  }
-
-  private static <T> void evaluateHelper(
-      FlattenPCollectionList<T> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-    List<DirectPipelineRunner.ValueWithMetadata<T>> outputElems = new ArrayList<>();
-    PCollectionList<T> inputs = context.getInput(transform);
-
-    for (PCollection<T> input : inputs.getAll()) {
-      outputElems.addAll(context.getPCollectionValuesWithMetadata(input));
-    }
-
-    context.setPCollectionValuesWithMetadata(context.getOutput(transform), outputElems);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
deleted file mode 100644
index 8fde3e0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/GroupByKey.java
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner.ValueWithMetadata;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.InvalidWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsViaOutputBufferDoFn;
-import com.google.cloud.dataflow.sdk.util.ReifyTimestampAndWindowsDoFn;
-import com.google.cloud.dataflow.sdk.util.SystemReduceFn;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.WindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * {@code GroupByKey<K, V>} takes a {@code PCollection<KV<K, V>>},
- * groups the values by key and windows, and returns a
- * {@code PCollection<KV<K, Iterable<V>>>} representing a map from
- * each distinct key and window of the input {@code PCollection} to an
- * {@code Iterable} over all the values associated with that key in
- * the input per window.  Absent repeatedly-firing
- * {@link Window#triggering triggering}, each key in the output
- * {@code PCollection} is unique within each window.
- *
- * <p>{@code GroupByKey} is analogous to converting a multi-map into
- * a uni-map, and related to {@code GROUP BY} in SQL.  It corresponds
- * to the "shuffle" step between the Mapper and the Reducer in the
- * MapReduce framework.
- *
- * <p>Two keys of type {@code K} are compared for equality
- * <b>not</b> by regular Java {@link Object#equals}, but instead by
- * first encoding each of the keys using the {@code Coder} of the
- * keys of the input {@code PCollection}, and then comparing the
- * encoded bytes.  This admits efficient parallel evaluation.  Note that
- * this requires that the {@code Coder} of the keys be deterministic (see
- * {@link Coder#verifyDeterministic()}).  If the key {@code Coder} is not
- * deterministic, an exception is thrown at pipeline construction time.
- *
- * <p>By default, the {@code Coder} of the keys of the output
- * {@code PCollection} is the same as that of the keys of the input,
- * and the {@code Coder} of the elements of the {@code Iterable}
- * values of the output {@code PCollection} is the same as the
- * {@code Coder} of the values of the input.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<KV<String, Doc>> urlDocPairs = ...;
- * PCollection<KV<String, Iterable<Doc>>> urlToDocs =
- *     urlDocPairs.apply(GroupByKey.<String, Doc>create());
- * PCollection<R> results =
- *     urlToDocs.apply(ParDo.of(new DoFn<KV<String, Iterable<Doc>>, R>() {
- *       public void processElement(ProcessContext c) {
- *         String url = c.element().getKey();
- *         Iterable<Doc> docsWithThatUrl = c.element().getValue();
- *         ... process all docs having that url ...
- *       }}));
- * } </pre>
- *
- * <p>{@code GroupByKey} is a key primitive in data-parallel
- * processing, since it is the main way to efficiently bring
- * associated data together into one location.  It is also a key
- * determiner of the performance of a data-parallel pipeline.
- *
- * <p>See {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey}
- * for a way to group multiple input PCollections by a common key at once.
- *
- * <p>See {@link Combine.PerKey} for a common pattern of
- * {@code GroupByKey} followed by {@link Combine.GroupedValues}.
- *
- * <p>When grouping, windows that can be merged according to the {@link WindowFn}
- * of the input {@code PCollection} will be merged together, and a window pane
- * corresponding to the new, merged window will be created. The items in this pane
- * will be emitted when a trigger fires. By default this will be when the input
- * sources estimate there will be no more data for the window. See
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark}
- * for details on the estimation.
- *
- * <p>The timestamp for each emitted pane is determined by the
- * {@link Window.Bound#withOutputTimeFn windowing operation}.
- * The output {@code PCollection} will have the same {@link WindowFn}
- * as the input.
- *
- * <p>If the input {@code PCollection} contains late data (see
- * {@link com.google.cloud.dataflow.sdk.io.PubsubIO.Read.Bound#timestampLabel}
- * for an example of how this can occur) or the
- * {@link Window#triggering requested TriggerFn} can fire before
- * the watermark, then there may be multiple elements
- * output by a {@code GroupByKey} that correspond to the same key and window.
- *
- * <p>If the {@link WindowFn} of the input requires merging, it is not
- * valid to apply another {@code GroupByKey} without first applying a new
- * {@link WindowFn} or applying {@link Window#remerge()}.
- *
- * @param <K> the type of the keys of the input and output
- * {@code PCollection}s
- * @param <V> the type of the values of the input {@code PCollection}
- * and the elements of the {@code Iterable}s in the output
- * {@code PCollection}
- */
-public class GroupByKey<K, V>
-    extends PTransform<PCollection<KV<K, V>>,
-                       PCollection<KV<K, Iterable<V>>>> {
-
-  private final boolean fewKeys;
-
-  private GroupByKey(boolean fewKeys) {
-    this.fewKeys = fewKeys;
-  }
-
-  /**
-   * Returns a {@code GroupByKey<K, V>} {@code PTransform}.
-   *
-   * @param <K> the type of the keys of the input and output
-   * {@code PCollection}s
-   * @param <V> the type of the values of the input {@code PCollection}
-   * and the elements of the {@code Iterable}s in the output
-   * {@code PCollection}
-   */
-  public static <K, V> GroupByKey<K, V> create() {
-    return new GroupByKey<>(false);
-  }
-
-  /**
-   * Returns a {@code GroupByKey<K, V>} {@code PTransform}.
-   *
-   * @param <K> the type of the keys of the input and output
-   * {@code PCollection}s
-   * @param <V> the type of the values of the input {@code PCollection}
-   * and the elements of the {@code Iterable}s in the output
-   * {@code PCollection}
-   * @param fewKeys whether it groups just few keys.
-   */
-  static <K, V> GroupByKey<K, V> create(boolean fewKeys) {
-    return new GroupByKey<>(fewKeys);
-  }
-
-  /**
-   * Returns whether it groups just few keys.
-   */
-  public boolean fewKeys() {
-    return fewKeys;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  public static void applicableTo(PCollection<?> input) {
-    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-    // Verify that the input PCollection is bounded, or that there is windowing/triggering being
-    // used. Without this, the watermark (at end of global window) will never be reached.
-    if (windowingStrategy.getWindowFn() instanceof GlobalWindows
-        && windowingStrategy.getTrigger().getSpec() instanceof DefaultTrigger
-        && input.isBounded() != IsBounded.BOUNDED) {
-      throw new IllegalStateException("GroupByKey cannot be applied to non-bounded PCollection in "
-          + "the GlobalWindow without a trigger. Use a Window.into or Window.triggering transform "
-          + "prior to GroupByKey.");
-    }
-
-    // Validate the window merge function.
-    if (windowingStrategy.getWindowFn() instanceof InvalidWindows) {
-      String cause = ((InvalidWindows<?>) windowingStrategy.getWindowFn()).getCause();
-      throw new IllegalStateException(
-          "GroupByKey must have a valid Window merge function.  "
-              + "Invalid because: " + cause);
-    }
-  }
-
-  @Override
-  public void validate(PCollection<KV<K, V>> input) {
-    applicableTo(input);
-
-    // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that
-    // the key coder is deterministic.
-    Coder<K> keyCoder = getKeyCoder(input.getCoder());
-    try {
-      keyCoder.verifyDeterministic();
-    } catch (NonDeterministicException e) {
-      throw new IllegalStateException(
-          "the keyCoder of a GroupByKey must be deterministic", e);
-    }
-  }
-
-  public WindowingStrategy<?, ?> updateWindowingStrategy(WindowingStrategy<?, ?> inputStrategy) {
-    WindowFn<?, ?> inputWindowFn = inputStrategy.getWindowFn();
-    if (!inputWindowFn.isNonMerging()) {
-      // Prevent merging windows again, without explicit user
-      // involvement, e.g., by Window.into() or Window.remerge().
-      inputWindowFn = new InvalidWindows<>(
-          "WindowFn has already been consumed by previous GroupByKey", inputWindowFn);
-    }
-
-    // We also switch to the continuation trigger associated with the current trigger.
-    return inputStrategy
-        .withWindowFn(inputWindowFn)
-        .withTrigger(inputStrategy.getTrigger().getSpec().getContinuationTrigger());
-  }
-
-  @Override
-  public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-    // This operation groups by the combination of key and window,
-    // merging windows as needed, using the windows assigned to the
-    // key/value input elements and the window merge operation of the
-    // window function associated with the input PCollection.
-    WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-
-    // By default, implement GroupByKey[AndWindow] via a series of lower-level
-    // operations.
-    return input
-        // Make each input element's timestamp and assigned windows
-        // explicit, in the value part.
-        .apply(new ReifyTimestampsAndWindows<K, V>())
-
-        // Group by just the key.
-        // Combiner lifting will not happen regardless of the disallowCombinerLifting value.
-        // There will be no combiners right after the GroupByKeyOnly because of the two ParDos
-        // introduced in here.
-        .apply(new GroupByKeyOnly<K, WindowedValue<V>>())
-
-        // Sort each key's values by timestamp. GroupAlsoByWindow requires
-        // its input to be sorted by timestamp.
-        .apply(new SortValuesByTimestamp<K, V>())
-
-        // Group each key's values by window, merging windows as needed.
-        .apply(new GroupAlsoByWindow<K, V>(windowingStrategy))
-
-        // And update the windowing strategy as appropriate.
-        .setWindowingStrategyInternal(updateWindowingStrategy(windowingStrategy));
-  }
-
-  @Override
-  protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
-    return getOutputKvCoder(input.getCoder());
-  }
-
-  /**
-   * Returns the {@code Coder} of the input to this transform, which
-   * should be a {@code KvCoder}.
-   */
-  @SuppressWarnings("unchecked")
-  static <K, V> KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
-    if (!(inputCoder instanceof KvCoder)) {
-      throw new IllegalStateException(
-          "GroupByKey requires its input to use KvCoder");
-    }
-    return (KvCoder<K, V>) inputCoder;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns the {@code Coder} of the keys of the input to this
-   * transform, which is also used as the {@code Coder} of the keys of
-   * the output of this transform.
-   */
-  static <K, V> Coder<K> getKeyCoder(Coder<KV<K, V>> inputCoder) {
-    return getInputKvCoder(inputCoder).getKeyCoder();
-  }
-
-  /**
-   * Returns the {@code Coder} of the values of the input to this transform.
-   */
-  public static <K, V> Coder<V> getInputValueCoder(Coder<KV<K, V>> inputCoder) {
-    return getInputKvCoder(inputCoder).getValueCoder();
-  }
-
-  /**
-   * Returns the {@code Coder} of the {@code Iterable} values of the
-   * output of this transform.
-   */
-  static <K, V> Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) {
-    return IterableCoder.of(getInputValueCoder(inputCoder));
-  }
-
-  /**
-   * Returns the {@code Coder} of the output of this transform.
-   */
-  static <K, V> KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) {
-    return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder));
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Helper transform that makes timestamps and window assignments
-   * explicit in the value part of each key/value pair.
-   */
-  public static class ReifyTimestampsAndWindows<K, V>
-      extends PTransform<PCollection<KV<K, V>>,
-                         PCollection<KV<K, WindowedValue<V>>>> {
-    @Override
-    public PCollection<KV<K, WindowedValue<V>>> apply(
-        PCollection<KV<K, V>> input) {
-      @SuppressWarnings("unchecked")
-      KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) input.getCoder();
-      Coder<K> keyCoder = inputKvCoder.getKeyCoder();
-      Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
-      Coder<WindowedValue<V>> outputValueCoder = FullWindowedValueCoder.of(
-          inputValueCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
-      Coder<KV<K, WindowedValue<V>>> outputKvCoder =
-          KvCoder.of(keyCoder, outputValueCoder);
-      return input.apply(ParDo.of(new ReifyTimestampAndWindowsDoFn<K, V>()))
-          .setCoder(outputKvCoder);
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Helper transform that sorts the values associated with each key
-   * by timestamp.
-   */
-  public static class SortValuesByTimestamp<K, V>
-      extends PTransform<PCollection<KV<K, Iterable<WindowedValue<V>>>>,
-                         PCollection<KV<K, Iterable<WindowedValue<V>>>>> {
-    @Override
-    public PCollection<KV<K, Iterable<WindowedValue<V>>>> apply(
-        PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
-      return input.apply(ParDo.of(
-          new DoFn<KV<K, Iterable<WindowedValue<V>>>,
-                   KV<K, Iterable<WindowedValue<V>>>>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              KV<K, Iterable<WindowedValue<V>>> kvs = c.element();
-              K key = kvs.getKey();
-              Iterable<WindowedValue<V>> unsortedValues = kvs.getValue();
-              List<WindowedValue<V>> sortedValues = new ArrayList<>();
-              for (WindowedValue<V> value : unsortedValues) {
-                sortedValues.add(value);
-              }
-              Collections.sort(sortedValues,
-                               new Comparator<WindowedValue<V>>() {
-                  @Override
-                  public int compare(WindowedValue<V> e1, WindowedValue<V> e2) {
-                    return e1.getTimestamp().compareTo(e2.getTimestamp());
-                  }
-                });
-              c.output(KV.<K, Iterable<WindowedValue<V>>>of(key, sortedValues));
-            }}))
-          .setCoder(input.getCoder());
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Helper transform that takes a collection of timestamp-ordered
-   * values associated with each key, groups the values by window,
-   * combines windows as needed, and for each window in each key,
-   * outputs a collection of key/value-list pairs implicitly assigned
-   * to the window and with the timestamp derived from that window.
-   */
-  public static class GroupAlsoByWindow<K, V>
-      extends PTransform<PCollection<KV<K, Iterable<WindowedValue<V>>>>,
-                         PCollection<KV<K, Iterable<V>>>> {
-    private final WindowingStrategy<?, ?> windowingStrategy;
-
-    public GroupAlsoByWindow(WindowingStrategy<?, ?> windowingStrategy) {
-      this.windowingStrategy = windowingStrategy;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public PCollection<KV<K, Iterable<V>>> apply(
-        PCollection<KV<K, Iterable<WindowedValue<V>>>> input) {
-      @SuppressWarnings("unchecked")
-      KvCoder<K, Iterable<WindowedValue<V>>> inputKvCoder =
-          (KvCoder<K, Iterable<WindowedValue<V>>>) input.getCoder();
-
-      Coder<K> keyCoder = inputKvCoder.getKeyCoder();
-      Coder<Iterable<WindowedValue<V>>> inputValueCoder =
-          inputKvCoder.getValueCoder();
-
-      IterableCoder<WindowedValue<V>> inputIterableValueCoder =
-          (IterableCoder<WindowedValue<V>>) inputValueCoder;
-      Coder<WindowedValue<V>> inputIterableElementCoder =
-          inputIterableValueCoder.getElemCoder();
-      WindowedValueCoder<V> inputIterableWindowedValueCoder =
-          (WindowedValueCoder<V>) inputIterableElementCoder;
-
-      Coder<V> inputIterableElementValueCoder =
-          inputIterableWindowedValueCoder.getValueCoder();
-      Coder<Iterable<V>> outputValueCoder =
-          IterableCoder.of(inputIterableElementValueCoder);
-      Coder<KV<K, Iterable<V>>> outputKvCoder = KvCoder.of(keyCoder, outputValueCoder);
-
-      return input
-          .apply(ParDo.of(groupAlsoByWindowsFn(windowingStrategy, inputIterableElementValueCoder)))
-          .setCoder(outputKvCoder);
-    }
-
-    private <W extends BoundedWindow> GroupAlsoByWindowsViaOutputBufferDoFn<K, V, Iterable<V>, W>
-        groupAlsoByWindowsFn(
-            WindowingStrategy<?, W> strategy, Coder<V> inputIterableElementValueCoder) {
-      return new GroupAlsoByWindowsViaOutputBufferDoFn<K, V, Iterable<V>, W>(
-          strategy, SystemReduceFn.<K, V, W>buffering(inputIterableElementValueCoder));
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Primitive helper transform that groups by key only, ignoring any
-   * window assignments.
-   */
-  public static class GroupByKeyOnly<K, V>
-      extends PTransform<PCollection<KV<K, V>>,
-                         PCollection<KV<K, Iterable<V>>>> {
-
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    @Override
-    public PCollection<KV<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      return PCollection.<KV<K, Iterable<V>>>createPrimitiveOutputInternal(
-          input.getPipeline(), input.getWindowingStrategy(), input.isBounded());
-    }
-
-    /**
-     * Returns the {@code Coder} of the input to this transform, which
-     * should be a {@code KvCoder}.
-     */
-    @SuppressWarnings("unchecked")
-    KvCoder<K, V> getInputKvCoder(Coder<KV<K, V>> inputCoder) {
-      if (!(inputCoder instanceof KvCoder)) {
-        throw new IllegalStateException(
-            "GroupByKey requires its input to use KvCoder");
-      }
-      return (KvCoder<K, V>) inputCoder;
-    }
-
-    @Override
-    protected Coder<KV<K, Iterable<V>>> getDefaultOutputCoder(PCollection<KV<K, V>> input) {
-      return GroupByKey.getOutputKvCoder(input.getCoder());
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  static {
-    registerWithDirectPipelineRunner();
-  }
-
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  private static <K, V> void registerWithDirectPipelineRunner() {
-    DirectPipelineRunner.registerDefaultTransformEvaluator(
-        GroupByKeyOnly.class,
-        new DirectPipelineRunner.TransformEvaluator<GroupByKeyOnly>() {
-          @Override
-          public void evaluate(
-              GroupByKeyOnly transform,
-              DirectPipelineRunner.EvaluationContext context) {
-            evaluateHelper(transform, context);
-          }
-        });
-  }
-
-  private static <K, V> void evaluateHelper(
-      GroupByKeyOnly<K, V> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-    PCollection<KV<K, V>> input = context.getInput(transform);
-
-    List<ValueWithMetadata<KV<K, V>>> inputElems =
-        context.getPCollectionValuesWithMetadata(input);
-
-    Coder<K> keyCoder = GroupByKey.getKeyCoder(input.getCoder());
-
-    Map<GroupingKey<K>, List<V>> groupingMap = new HashMap<>();
-
-    for (ValueWithMetadata<KV<K, V>> elem : inputElems) {
-      K key = elem.getValue().getKey();
-      V value = elem.getValue().getValue();
-      byte[] encodedKey;
-      try {
-        encodedKey = encodeToByteArray(keyCoder, key);
-      } catch (CoderException exn) {
-        // TODO: Put in better element printing:
-        // truncate if too long.
-        throw new IllegalArgumentException(
-            "unable to encode key " + key + " of input to " + transform +
-            " using " + keyCoder,
-            exn);
-      }
-      GroupingKey<K> groupingKey = new GroupingKey<>(key, encodedKey);
-      List<V> values = groupingMap.get(groupingKey);
-      if (values == null) {
-        values = new ArrayList<V>();
-        groupingMap.put(groupingKey, values);
-      }
-      values.add(value);
-    }
-
-    List<ValueWithMetadata<KV<K, Iterable<V>>>> outputElems =
-        new ArrayList<>();
-    for (Map.Entry<GroupingKey<K>, List<V>> entry : groupingMap.entrySet()) {
-      GroupingKey<K> groupingKey = entry.getKey();
-      K key = groupingKey.getKey();
-      List<V> values = entry.getValue();
-      values = context.randomizeIfUnordered(values, true /* inPlaceAllowed */);
-      outputElems.add(ValueWithMetadata
-                      .of(WindowedValue.valueInEmptyWindows(KV.<K, Iterable<V>>of(key, values)))
-                      .withKey(key));
-    }
-
-    context.setPCollectionValuesWithMetadata(context.getOutput(transform),
-                                             outputElems);
-  }
-
-  private static class GroupingKey<K> {
-    private K key;
-    private byte[] encodedKey;
-
-    public GroupingKey(K key, byte[] encodedKey) {
-      this.key = key;
-      this.encodedKey = encodedKey;
-    }
-
-    public K getKey() {
-      return key;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o instanceof GroupingKey) {
-        GroupingKey<?> that = (GroupingKey<?>) o;
-        return Arrays.equals(this.encodedKey, that.encodedKey);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Arrays.hashCode(encodedKey);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
deleted file mode 100644
index b6497b7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/IntraBundleParallelization.java
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-
-import org.joda.time.Instant;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.atomic.AtomicReference;
-
-/**
- * Provides multi-threading of {@link DoFn}s, using threaded execution to
- * process multiple elements concurrently within a bundle.
- *
- * <p>Note, that each Dataflow worker will already process multiple bundles
- * concurrently and usage of this class is meant only for cases where processing
- * elements from within a bundle is limited by blocking calls.
- *
- * <p>CPU intensive or IO intensive tasks are in general a poor fit for parallelization.
- * This is because a limited resource that is already maximally utilized does not
- * benefit from sub-division of work. The parallelization will increase the amount of time
- * to process each element yet the throughput for processing will remain relatively the same.
- * For example, if the local disk (an IO resource) has a maximum write rate of 10 MiB/s,
- * and processing each element requires to write 20 MiBs to disk, then processing one element
- * to disk will take 2 seconds. Yet processing 3 elements concurrently (each getting an equal
- * share of the maximum write rate) will take at least 6 seconds to complete (there is additional
- * overhead in the extra parallelization).
- *
- * <p>To parallelize a {@link DoFn} to 10 threads:
- * <pre>{@code
- * PCollection<T> data = ...;
- * data.apply(
- *   IntraBundleParallelization.of(new MyDoFn())
- *                             .withMaxParallelism(10)));
- * }</pre>
- *
- * <p>An uncaught exception from the wrapped {@link DoFn} will result in the exception
- * being rethrown in later calls to {@link MultiThreadedIntraBundleProcessingDoFn#processElement}
- * or a call to {@link MultiThreadedIntraBundleProcessingDoFn#finishBundle}.
- */
-public class IntraBundleParallelization {
-  /**
-   * Creates a {@link IntraBundleParallelization} {@link PTransform} for the given
-   * {@link DoFn} that processes elements using multiple threads.
-   *
-   * <p>Note that the specified {@code doFn} needs to be thread safe.
-   */
-  public static <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
-    return new Unbound().of(doFn);
-  }
-
-  /**
-   * Creates a {@link IntraBundleParallelization} {@link PTransform} with the specified
-   * maximum concurrency level.
-   */
-  public static Unbound withMaxParallelism(int maxParallelism) {
-    return new Unbound().withMaxParallelism(maxParallelism);
-  }
-
-  /**
-   * An incomplete {@code IntraBundleParallelization} transform, with unbound input/output types.
-   *
-   * <p>Before being applied, {@link IntraBundleParallelization.Unbound#of} must be
-   * invoked to specify the {@link DoFn} to invoke, which will also
-   * bind the input/output types of this {@code PTransform}.
-   */
-  public static class Unbound {
-    private final int maxParallelism;
-
-    Unbound() {
-      this(DEFAULT_MAX_PARALLELISM);
-    }
-
-    Unbound(int maxParallelism) {
-      Preconditions.checkArgument(maxParallelism > 0,
-          "Expected parallelism factor greater than zero, received %s.", maxParallelism);
-      this.maxParallelism = maxParallelism;
-    }
-
-    /**
-     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
-     * with the specified maximum concurrency level.
-     */
-    public Unbound withMaxParallelism(int maxParallelism) {
-      return new Unbound(maxParallelism);
-    }
-
-    /**
-     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
-     * with the specified {@link DoFn}.
-     *
-     * <p>Note that the specified {@code doFn} needs to be thread safe.
-     */
-    public <InputT, OutputT> Bound<InputT, OutputT> of(DoFn<InputT, OutputT> doFn) {
-      return new Bound<>(doFn, maxParallelism);
-    }
-  }
-
-  /**
-   * A {@code PTransform} that, when applied to a {@code PCollection<InputT>},
-   * invokes a user-specified {@code DoFn<InputT, OutputT>} on all its elements,
-   * with all its outputs collected into an output
-   * {@code PCollection<OutputT>}.
-   *
-   * <p>Note that the specified {@code doFn} needs to be thread safe.
-   *
-   * @param <InputT> the type of the (main) input {@code PCollection} elements
-   * @param <OutputT> the type of the (main) output {@code PCollection} elements
-   */
-  public static class Bound<InputT, OutputT>
-      extends PTransform<PCollection<? extends InputT>, PCollection<OutputT>> {
-    private final DoFn<InputT, OutputT> doFn;
-    private final int maxParallelism;
-
-    Bound(DoFn<InputT, OutputT> doFn, int maxParallelism) {
-      Preconditions.checkArgument(maxParallelism > 0,
-          "Expected parallelism factor greater than zero, received %s.", maxParallelism);
-      this.doFn = doFn;
-      this.maxParallelism = maxParallelism;
-    }
-
-    /**
-     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
-     * with the specified maximum concurrency level.
-     */
-    public Bound<InputT, OutputT> withMaxParallelism(int maxParallelism) {
-      return new Bound<>(doFn, maxParallelism);
-    }
-
-    /**
-     * Returns a new {@link IntraBundleParallelization} {@link PTransform} like this one
-     * with the specified {@link DoFn}.
-     *
-     * <p>Note that the specified {@code doFn} needs to be thread safe.
-     */
-    public <NewInputT, NewOutputT> Bound<NewInputT, NewOutputT>
-        of(DoFn<NewInputT, NewOutputT> doFn) {
-      return new Bound<>(doFn, maxParallelism);
-    }
-
-    @Override
-    public PCollection<OutputT> apply(PCollection<? extends InputT> input) {
-      return input.apply(
-          ParDo.of(new MultiThreadedIntraBundleProcessingDoFn<>(doFn, maxParallelism)));
-    }
-  }
-
-  /**
-   * A multi-threaded {@code DoFn} wrapper.
-   *
-   * @see IntraBundleParallelization#of(DoFn)
-   *
-   * @param <InputT> the type of the (main) input elements
-   * @param <OutputT> the type of the (main) output elements
-   */
-  public static class MultiThreadedIntraBundleProcessingDoFn<InputT, OutputT>
-      extends DoFn<InputT, OutputT> {
-
-    public MultiThreadedIntraBundleProcessingDoFn(DoFn<InputT, OutputT> doFn, int maxParallelism) {
-      Preconditions.checkArgument(maxParallelism > 0,
-          "Expected parallelism factor greater than zero, received %s.", maxParallelism);
-      this.doFn = doFn;
-      this.maxParallelism = maxParallelism;
-    }
-
-    @Override
-    public void startBundle(Context c) throws Exception {
-      doFn.startBundle(c);
-
-      executor = c.getPipelineOptions().as(GcsOptions.class).getExecutorService();
-      workTickets = new Semaphore(maxParallelism);
-      failure = new AtomicReference<>();
-    }
-
-    @Override
-    public void processElement(final ProcessContext c) throws Exception {
-      try {
-        workTickets.acquire();
-      } catch (InterruptedException e) {
-        throw new RuntimeException("Interrupted while scheduling work", e);
-      }
-
-      if (failure.get() != null) {
-        throw Throwables.propagate(failure.get());
-      }
-
-      executor.submit(new Runnable() {
-        @Override
-        public void run() {
-          try {
-            doFn.processElement(new WrappedContext(c));
-          } catch (Throwable t) {
-            failure.compareAndSet(null, t);
-            Throwables.propagateIfPossible(t);
-            throw new AssertionError("Unexpected checked exception: " + t);
-          } finally {
-            workTickets.release();
-          }
-        }
-      });
-    }
-
-    @Override
-    public void finishBundle(Context c) throws Exception {
-      // Acquire all the work tickets to guarantee that all the previous
-      // processElement calls have finished.
-      workTickets.acquire(maxParallelism);
-      if (failure.get() != null) {
-        throw Throwables.propagate(failure.get());
-      }
-      doFn.finishBundle(c);
-    }
-
-    @Override
-    protected TypeDescriptor<InputT> getInputTypeDescriptor() {
-      return doFn.getInputTypeDescriptor();
-    }
-
-    @Override
-    protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
-      return doFn.getOutputTypeDescriptor();
-    }
-
-    /////////////////////////////////////////////////////////////////////////////
-
-    /**
-     * Wraps a DoFn context, forcing single-thread output so that threads don't
-     * propagate through to downstream functions.
-     */
-    private class WrappedContext extends ProcessContext {
-      private final ProcessContext context;
-
-      WrappedContext(ProcessContext context) {
-        this.context = context;
-      }
-
-      @Override
-      public InputT element() {
-        return context.element();
-      }
-
-      @Override
-      public PipelineOptions getPipelineOptions() {
-        return context.getPipelineOptions();
-      }
-
-      @Override
-      public <T> T sideInput(PCollectionView<T> view) {
-        return context.sideInput(view);
-      }
-
-      @Override
-      public void output(OutputT output) {
-        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
-          context.output(output);
-        }
-      }
-
-      @Override
-      public void outputWithTimestamp(OutputT output, Instant timestamp) {
-        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
-          context.outputWithTimestamp(output, timestamp);
-        }
-      }
-
-      @Override
-      public <T> void sideOutput(TupleTag<T> tag, T output) {
-        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
-          context.sideOutput(tag, output);
-        }
-      }
-
-      @Override
-      public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-        synchronized (MultiThreadedIntraBundleProcessingDoFn.this) {
-          context.sideOutputWithTimestamp(tag, output, timestamp);
-        }
-      }
-
-      @Override
-      public Instant timestamp() {
-        return context.timestamp();
-      }
-
-      @Override
-      public BoundedWindow window() {
-        return context.window();
-      }
-
-      @Override
-      public PaneInfo pane() {
-        return context.pane();
-      }
-
-      @Override
-      public WindowingInternals<InputT, OutputT> windowingInternals() {
-        return context.windowingInternals();
-      }
-
-      @Override
-      protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(
-          String name, CombineFn<AggInputT, ?, AggOutputT> combiner) {
-        return context.createAggregatorInternal(name, combiner);
-      }
-    }
-
-    private final DoFn<InputT, OutputT> doFn;
-    private int maxParallelism;
-
-    private transient ExecutorService executor;
-    private transient Semaphore workTickets;
-    private transient AtomicReference<Throwable> failure;
-  }
-
-  /**
-   * Default maximum for number of concurrent elements to process.
-   */
-  private static final int DEFAULT_MAX_PARALLELISM = 16;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
deleted file mode 100644
index 370d43d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Keys.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-/**
- * {@code Keys<K>} takes a {@code PCollection} of {@code KV<K, V>}s and
- * returns a {@code PCollection<K>} of the keys.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<KV<String, Long>> wordCounts = ...;
- * PCollection<String> words = wordCounts.apply(Keys.<String>create());
- * } </pre>
- *
- * <p>Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and the output {@code PCollection}
- * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * associated with it as the input.
- *
- * <p>See also {@link Values}.
- *
- * @param <K> the type of the keys in the input {@code PCollection},
- * and the type of the elements in the output {@code PCollection}
- */
-public class Keys<K> extends PTransform<PCollection<? extends KV<K, ?>>,
-                                        PCollection<K>> {
-  /**
-   * Returns a {@code Keys<K>} {@code PTransform}.
-   *
-   * @param <K> the type of the keys in the input {@code PCollection},
-   * and the type of the elements in the output {@code PCollection}
-   */
-  public static <K> Keys<K> create() {
-    return new Keys<>();
-  }
-
-  private Keys() { }
-
-  @Override
-  public PCollection<K> apply(PCollection<? extends KV<K, ?>> in) {
-    return
-        in.apply(ParDo.named("Keys")
-                 .of(new DoFn<KV<K, ?>, K>() {
-                     @Override
-                     public void processElement(ProcessContext c) {
-                       c.output(c.element().getKey());
-                     }
-                    }));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
deleted file mode 100644
index 5a9cc87..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/KvSwap.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-/**
- * {@code KvSwap<K, V>} takes a {@code PCollection<KV<K, V>>} and
- * returns a {@code PCollection<KV<V, K>>}, where all the keys and
- * values have been swapped.
- *
- * <p>Example of use:
- * <pre> {@code
- * PCollection<String, Long> wordsToCounts = ...;
- * PCollection<Long, String> countsToWords =
- *     wordToCounts.apply(KvSwap.<String, Long>create());
- * } </pre>
- *
- * <p>Each output element has the same timestamp and is in the same windows
- * as its corresponding input element, and the output {@code PCollection}
- * has the same
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
- * associated with it as the input.
- *
- * @param <K> the type of the keys in the input {@code PCollection}
- * and the values in the output {@code PCollection}
- * @param <V> the type of the values in the input {@code PCollection}
- * and the keys in the output {@code PCollection}
- */
-public class KvSwap<K, V> extends PTransform<PCollection<KV<K, V>>,
-                                             PCollection<KV<V, K>>> {
-  /**
-   * Returns a {@code KvSwap<K, V>} {@code PTransform}.
-   *
-   * @param <K> the type of the keys in the input {@code PCollection}
-   * and the values in the output {@code PCollection}
-   * @param <V> the type of the values in the input {@code PCollection}
-   * and the keys in the output {@code PCollection}
-   */
-  public static <K, V> KvSwap<K, V> create() {
-    return new KvSwap<>();
-  }
-
-  private KvSwap() { }
-
-  @Override
-  public PCollection<KV<V, K>> apply(PCollection<KV<K, V>> in) {
-    return
-        in.apply(ParDo.named("KvSwap")
-                 .of(new DoFn<KV<K, V>, KV<V, K>>() {
-                     @Override
-                     public void processElement(ProcessContext c) {
-                       KV<K, V> e = c.element();
-                       c.output(KV.of(e.getValue(), e.getKey()));
-                     }
-                    }));
-  }
-}

[26/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
deleted file mode 100644
index 57dd510..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateQuantiles.java
+++ /dev/null
@@ -1,766 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.CustomCoder;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
-import com.google.cloud.dataflow.sdk.util.WeightedValue;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterators;
-import com.google.common.collect.Lists;
-import com.google.common.collect.UnmodifiableIterator;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.PriorityQueue;
-
-import javax.annotation.Nullable;
-
-/**
- * {@code PTransform}s for getting an idea of a {@code PCollection}'s
- * data distribution using approximate {@code N}-tiles (e.g. quartiles,
- * percentiles, etc.), either globally or per-key.
- */
-public class ApproximateQuantiles {
-  private ApproximateQuantiles() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes a {@code PCollection<T>}
-   * and returns a {@code PCollection<List<T>>} whose single value is a
-   * {@code List} of the approximate {@code N}-tiles of the elements
-   * of the input {@code PCollection}.  This gives an idea of the
-   * distribution of the input elements.
-   *
-   * <p>The computed {@code List} is of size {@code numQuantiles},
-   * and contains the input elements' minimum value,
-   * {@code numQuantiles-2} intermediate values, and maximum value, in
-   * sorted order, using the given {@code Comparator} to order values.
-   * To compute traditional {@code N}-tiles, one should use
-   * {@code ApproximateQuantiles.globally(compareFn, N+1)}.
-   *
-   * <p>If there are fewer input elements than {@code numQuantiles},
-   * then the result {@code List} will contain all the input elements,
-   * in sorted order.
-   *
-   * <p>The argument {@code Comparator} must be {@code Serializable}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<String> pc = ...;
-   * PCollection<List<String>> quantiles =
-   *     pc.apply(ApproximateQuantiles.globally(stringCompareFn, 11));
-   * } </pre>
-   *
-   * @param <T> the type of the elements in the input {@code PCollection}
-   * @param numQuantiles the number of elements in the resulting
-   *        quantile values {@code List}
-   * @param compareFn the function to use to order the elements
-   */
-  public static <T, ComparatorT extends Comparator<T> & Serializable>
-  PTransform<PCollection<T>, PCollection<List<T>>> globally(
-      int numQuantiles, ComparatorT compareFn) {
-    return Combine.globally(
-        ApproximateQuantilesCombineFn.create(numQuantiles, compareFn));
-  }
-
-  /**
-   * Like {@link #globally(int, Comparator)}, but sorts using the
-   * elements' natural ordering.
-   *
-   * @param <T> the type of the elements in the input {@code PCollection}
-   * @param numQuantiles the number of elements in the resulting
-   *        quantile values {@code List}
-   */
-  public static <T extends Comparable<T>>
-      PTransform<PCollection<T>, PCollection<List<T>>> globally(int numQuantiles) {
-    return Combine.globally(
-        ApproximateQuantilesCombineFn.<T>create(numQuantiles));
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes a
-   * {@code PCollection<KV<K, V>>} and returns a
-   * {@code PCollection<KV<K, List<V>>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to a {@code List} of the approximate
-   * {@code N}-tiles of the values associated with that key in the
-   * input {@code PCollection}.  This gives an idea of the
-   * distribution of the input values for each key.
-   *
-   * <p>Each of the computed {@code List}s is of size {@code numQuantiles},
-   * and contains the input values' minimum value,
-   * {@code numQuantiles-2} intermediate values, and maximum value, in
-   * sorted order, using the given {@code Comparator} to order values.
-   * To compute traditional {@code N}-tiles, one should use
-   * {@code ApproximateQuantiles.perKey(compareFn, N+1)}.
-   *
-   * <p>If a key has fewer than {@code numQuantiles} values
-   * associated with it, then that key's output {@code List} will
-   * contain all the key's input values, in sorted order.
-   *
-   * <p>The argument {@code Comparator} must be {@code Serializable}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<Integer, String>> pc = ...;
-   * PCollection<KV<Integer, List<String>>> quantilesPerKey =
-   *     pc.apply(ApproximateQuantiles.<Integer, String>perKey(stringCompareFn, 11));
-   * } </pre>
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   *
-   * @param <K> the type of the keys in the input and output
-   *        {@code PCollection}s
-   * @param <V> the type of the values in the input {@code PCollection}
-   * @param numQuantiles the number of elements in the resulting
-   *        quantile values {@code List}
-   * @param compareFn the function to use to order the elements
-   */
-  public static <K, V, ComparatorT extends Comparator<V> & Serializable>
-      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
-      perKey(int numQuantiles, ComparatorT compareFn) {
-    return Combine.perKey(
-        ApproximateQuantilesCombineFn.create(numQuantiles, compareFn)
-        .<K>asKeyedFn());
-  }
-
-  /**
-   * Like {@link #perKey(int, Comparator)}, but sorts
-   * values using the their natural ordering.
-   *
-   * @param <K> the type of the keys in the input and output
-   *        {@code PCollection}s
-   * @param <V> the type of the values in the input {@code PCollection}
-   * @param numQuantiles the number of elements in the resulting
-   *        quantile values {@code List}
-   */
-  public static <K, V extends Comparable<V>>
-      PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>>
-      perKey(int numQuantiles) {
-    return Combine.perKey(
-        ApproximateQuantilesCombineFn.<V>create(numQuantiles)
-        .<K>asKeyedFn());
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * The {@code ApproximateQuantilesCombineFn} combiner gives an idea
-   * of the distribution of a collection of values using approximate
-   * {@code N}-tiles.  The output of this combiner is a {@code List}
-   * of size {@code numQuantiles}, containing the input values'
-   * minimum value, {@code numQuantiles-2} intermediate values, and
-   * maximum value, in sorted order, so for traditional
-   * {@code N}-tiles, one should use
-   * {@code ApproximateQuantilesCombineFn#create(N+1)}.
-   *
-   * <p>If there are fewer values to combine than
-   * {@code numQuantiles}, then the result {@code List} will contain all the
-   * values being combined, in sorted order.
-   *
-   * <p>Values are ordered using either a specified
-   * {@code Comparator} or the values' natural ordering.
-   *
-   * <p>To evaluate the quantiles we use the "New Algorithm" described here:
-   * <pre>
-   *   [MRL98] Manku, Rajagopalan &amp; Lindsay, "Approximate Medians and other
-   *   Quantiles in One Pass and with Limited Memory", Proc. 1998 ACM
-   *   SIGMOD, Vol 27, No 2, p 426-435, June 1998.
-   *   http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.6.6513&amp;rep=rep1&amp;type=pdf
-   * </pre>
-   *
-   * <p>The default error bound is {@code 1 / N}, though in practice
-   * the accuracy tends to be much better.  <p>See
-   * {@link #create(int, Comparator, long, double)} for
-   * more information about the meaning of {@code epsilon}, and
-   * {@link #withEpsilon} for a convenient way to adjust it.
-   *
-   * @param <T> the type of the values being combined
-   */
-  public static class ApproximateQuantilesCombineFn
-      <T, ComparatorT extends Comparator<T> & Serializable>
-      extends AccumulatingCombineFn<T, QuantileState<T, ComparatorT>, List<T>> {
-
-    /**
-     * The cost (in time and space) to compute quantiles to a given
-     * accuracy is a function of the total number of elements in the
-     * data set.  If an estimate is not known or specified, we use
-     * this as an upper bound.  If this is too low, errors may exceed
-     * the requested tolerance; if too high, efficiency may be
-     * non-optimal.  The impact is logarithmic with respect to this
-     * value, so this default should be fine for most uses.
-     */
-    public static final long DEFAULT_MAX_NUM_ELEMENTS = (long) 1e9;
-
-    /** The comparison function to use. */
-    private final ComparatorT compareFn;
-
-    /**
-     * Number of quantiles to produce.  The size of the final output
-     * list, including the minimum and maximum, is numQuantiles.
-     */
-    private final int numQuantiles;
-
-    /** The size of the buffers, corresponding to k in the referenced paper. */
-    private final int bufferSize;
-
-    /**  The number of buffers, corresponding to b in the referenced paper. */
-    private final int numBuffers;
-
-    private final long maxNumElements;
-
-    private ApproximateQuantilesCombineFn(
-        int numQuantiles,
-        ComparatorT compareFn,
-        int bufferSize,
-        int numBuffers,
-        long maxNumElements) {
-      Preconditions.checkArgument(numQuantiles >= 2);
-      Preconditions.checkArgument(bufferSize >= 2);
-      Preconditions.checkArgument(numBuffers >= 2);
-      this.numQuantiles = numQuantiles;
-      this.compareFn = compareFn;
-      this.bufferSize = bufferSize;
-      this.numBuffers = numBuffers;
-      this.maxNumElements = maxNumElements;
-    }
-
-    /**
-     * Returns an approximate quantiles combiner with the given
-     * {@code compareFn} and desired number of quantiles.  A total of
-     * {@code numQuantiles} elements will appear in the output list,
-     * including the minimum and maximum.
-     *
-     * <p>The {@code Comparator} must be {@code Serializable}.
-     *
-     * <p>The default error bound is {@code 1 / numQuantiles}, which
-     * holds as long as the number of elements is less than
-     * {@link #DEFAULT_MAX_NUM_ELEMENTS}.
-     */
-    public static <T, ComparatorT extends Comparator<T> & Serializable>
-        ApproximateQuantilesCombineFn<T, ComparatorT> create(
-            int numQuantiles, ComparatorT compareFn) {
-      return create(
-          numQuantiles, compareFn, DEFAULT_MAX_NUM_ELEMENTS, 1.0 / numQuantiles);
-    }
-
-    /**
-     * Like {@link #create(int, Comparator)}, but sorts values using their natural ordering.
-     */
-    public static <T extends Comparable<T>>
-        ApproximateQuantilesCombineFn<T, Top.Largest<T>> create(int numQuantiles) {
-      return create(numQuantiles, new Top.Largest<T>());
-    }
-
-    /**
-     * Returns an {@code ApproximateQuantilesCombineFn} that's like
-     * this one except that it uses the specified {@code epsilon}
-     * value.  Does not modify this combiner.
-     *
-     * <p>See {@link #create(int, Comparator, long,
-     * double)} for more information about the meaning of
-     * {@code epsilon}.
-     */
-    public ApproximateQuantilesCombineFn<T, ComparatorT> withEpsilon(double epsilon) {
-      return create(numQuantiles, compareFn, maxNumElements, epsilon);
-    }
-
-    /**
-     * Returns an {@code ApproximateQuantilesCombineFn} that's like
-     * this one except that it uses the specified {@code maxNumElements}
-     * value.  Does not modify this combiner.
-     *
-     * <p>See {@link #create(int, Comparator, long, double)} for more
-     * information about the meaning of {@code maxNumElements}.
-     */
-    public ApproximateQuantilesCombineFn<T, ComparatorT> withMaxInputSize(
-        long maxNumElements) {
-      return create(numQuantiles, compareFn, maxNumElements, maxNumElements);
-    }
-
-    /**
-     * Creates an approximate quantiles combiner with the given
-     * {@code compareFn} and desired number of quantiles.  A total of
-     * {@code numQuantiles} elements will appear in the output list,
-     * including the minimum and maximum.
-     *
-     * <p>The {@code Comparator} must be {@code Serializable}.
-     *
-     * <p>The default error bound is {@code epsilon}, which holds as long
-     * as the number of elements is less than {@code maxNumElements}.
-     * Specifically, if one considers the input as a sorted list x_1, ..., x_N,
-     * then the distance between the each exact quantile x_c and its
-     * approximation x_c' is bounded by {@code |c - c'| < epsilon * N}.
-     * Note that these errors are worst-case scenarios; in practice the accuracy
-     * tends to be much better.
-     */
-    public static <T, ComparatorT extends Comparator<T> & Serializable>
-        ApproximateQuantilesCombineFn<T, ComparatorT> create(
-            int numQuantiles,
-            ComparatorT compareFn,
-            long maxNumElements,
-            double epsilon) {
-      // Compute optimal b and k.
-      int b = 2;
-      while ((b - 2) * (1 << (b - 2)) < epsilon * maxNumElements) {
-        b++;
-      }
-      b--;
-      int k = Math.max(2, (int) Math.ceil(maxNumElements / (1 << (b - 1))));
-      return new ApproximateQuantilesCombineFn<T, ComparatorT>(
-          numQuantiles, compareFn, k, b, maxNumElements);
-    }
-
-    @Override
-    public QuantileState<T, ComparatorT> createAccumulator() {
-      return QuantileState.empty(compareFn, numQuantiles, numBuffers, bufferSize);
-    }
-
-    @Override
-    public Coder<QuantileState<T, ComparatorT>> getAccumulatorCoder(
-        CoderRegistry registry, Coder<T> elementCoder) {
-      return new QuantileStateCoder<>(compareFn, elementCoder);
-    }
-  }
-
-  /**
-   * Compact summarization of a collection on which quantiles can be estimated.
-   */
-  static class QuantileState<T, ComparatorT extends Comparator<T> & Serializable>
-      implements Accumulator<T, QuantileState<T, ComparatorT>, List<T>> {
-
-    private ComparatorT compareFn;
-    private int numQuantiles;
-    private int numBuffers;
-    private int bufferSize;
-
-    @Nullable
-    private T min;
-
-    @Nullable
-    private T max;
-
-    /**
-     * The set of buffers, ordered by level from smallest to largest.
-     */
-    private PriorityQueue<QuantileBuffer<T>> buffers;
-
-    /**
-     * The algorithm requires that the manipulated buffers always be filled
-     * to capacity to perform the collapse operation.  This operation can
-     * be extended to buffers of varying sizes by introducing the notion of
-     * fractional weights, but it's easier to simply combine the remainders
-     * from all shards into new, full buffers and then take them into account
-     * when computing the final output.
-     */
-    private List<T> unbufferedElements = Lists.newArrayList();
-
-    private QuantileState(
-        ComparatorT compareFn,
-        int numQuantiles,
-        @Nullable T min,
-        @Nullable T max,
-        int numBuffers,
-        int bufferSize,
-        Collection<T> unbufferedElements,
-        Collection<QuantileBuffer<T>> buffers) {
-      this.compareFn = compareFn;
-      this.numQuantiles = numQuantiles;
-      this.numBuffers = numBuffers;
-      this.bufferSize = bufferSize;
-      this.buffers = new PriorityQueue<>(numBuffers + 1);
-      this.min = min;
-      this.max = max;
-      this.unbufferedElements.addAll(unbufferedElements);
-      this.buffers.addAll(buffers);
-    }
-
-    public static <T, ComparatorT extends Comparator<T> & Serializable>
-        QuantileState<T, ComparatorT> empty(
-            ComparatorT compareFn, int numQuantiles, int numBuffers, int bufferSize) {
-      return new QuantileState<T, ComparatorT>(
-          compareFn,
-          numQuantiles,
-          null, /* min */
-          null, /* max */
-          numBuffers,
-          bufferSize,
-          Collections.<T>emptyList(),
-          Collections.<QuantileBuffer<T>>emptyList());
-    }
-
-    public static <T, ComparatorT extends Comparator<T> & Serializable>
-        QuantileState<T, ComparatorT> singleton(
-            ComparatorT compareFn, int numQuantiles, T elem, int numBuffers, int bufferSize) {
-      return new QuantileState<T, ComparatorT>(
-          compareFn,
-          numQuantiles,
-          elem, /* min */
-          elem, /* max */
-          numBuffers,
-          bufferSize,
-          Collections.singletonList(elem),
-          Collections.<QuantileBuffer<T>>emptyList());
-    }
-
-    /**
-     * Add a new element to the collection being summarized by this state.
-     */
-    @Override
-    public void addInput(T elem) {
-      if (isEmpty()) {
-        min = max = elem;
-      } else if (compareFn.compare(elem, min) < 0) {
-        min = elem;
-      } else if (compareFn.compare(elem, max) > 0) {
-        max = elem;
-      }
-      addUnbuffered(elem);
-    }
-
-    /**
-     * Add a new buffer to the unbuffered list, creating a new buffer and
-     * collapsing if needed.
-     */
-    private void addUnbuffered(T elem) {
-      unbufferedElements.add(elem);
-      if (unbufferedElements.size() == bufferSize) {
-        Collections.sort(unbufferedElements, compareFn);
-        buffers.add(new QuantileBuffer<T>(unbufferedElements));
-        unbufferedElements = Lists.newArrayListWithCapacity(bufferSize);
-        collapseIfNeeded();
-      }
-    }
-
-    /**
-     * Updates this as if adding all elements seen by other.
-     *
-     * <p>Note that this ignores the {@code Comparator} of the other {@link QuantileState}. In
-     * practice, they should generally be equal, but this method tolerates a mismatch.
-     */
-    @Override
-    public void mergeAccumulator(QuantileState<T, ComparatorT> other) {
-      if (other.isEmpty()) {
-        return;
-      }
-      if (min == null || compareFn.compare(other.min, min) < 0) {
-        min = other.min;
-      }
-      if (max == null || compareFn.compare(other.max, max) > 0) {
-        max = other.max;
-      }
-      for (T elem : other.unbufferedElements) {
-        addUnbuffered(elem);
-      }
-      buffers.addAll(other.buffers);
-      collapseIfNeeded();
-    }
-
-    public boolean isEmpty() {
-      return unbufferedElements.size() == 0 && buffers.size() == 0;
-    }
-
-    private void collapseIfNeeded() {
-      while (buffers.size() > numBuffers) {
-        List<QuantileBuffer<T>> toCollapse = Lists.newArrayList();
-        toCollapse.add(buffers.poll());
-        toCollapse.add(buffers.poll());
-        int minLevel = toCollapse.get(1).level;
-        while (!buffers.isEmpty() && buffers.peek().level == minLevel) {
-          toCollapse.add(buffers.poll());
-        }
-        buffers.add(collapse(toCollapse));
-      }
-    }
-
-    private QuantileBuffer<T> collapse(
-        Iterable<QuantileBuffer<T>> buffers) {
-      int newLevel = 0;
-      long newWeight = 0;
-      for (QuantileBuffer<T> buffer : buffers) {
-        // As presented in the paper, there should always be at least two
-        // buffers of the same (minimal) level to collapse, but it is possible
-        // to violate this condition when combining buffers from independently
-        // computed shards.  If they differ we take the max.
-        newLevel = Math.max(newLevel, buffer.level + 1);
-        newWeight += buffer.weight;
-      }
-      List<T> newElements =
-          interpolate(buffers, bufferSize, newWeight, offset(newWeight));
-      return new QuantileBuffer<>(newLevel, newWeight, newElements);
-    }
-
-    /**
-     * If the weight is even, we must round up or down.  Alternate between these two options to
-     * avoid a bias.
-     */
-    private long offset(long newWeight) {
-      if (newWeight % 2 == 1) {
-        return (newWeight + 1) / 2;
-      } else {
-        offsetJitter = 2 - offsetJitter;
-        return (newWeight + offsetJitter) / 2;
-      }
-    }
-
-    /** For alternating between biasing up and down in the above even weight collapse operation. */
-    private int offsetJitter = 0;
-
-
-    /**
-     * Emulates taking the ordered union of all elements in buffers, repeated
-     * according to their weight, and picking out the (k * step + offset)-th
-     * elements of this list for {@code 0 <= k < count}.
-     */
-    private List<T> interpolate(Iterable<QuantileBuffer<T>> buffers,
-                                int count, double step, double offset) {
-      List<Iterator<WeightedValue<T>>> iterators = Lists.newArrayList();
-      for (QuantileBuffer<T> buffer : buffers) {
-        iterators.add(buffer.sizedIterator());
-      }
-      // Each of the buffers is already sorted by element.
-      Iterator<WeightedValue<T>> sorted = Iterators.mergeSorted(
-          iterators,
-          new Comparator<WeightedValue<T>>() {
-            @Override
-            public int compare(WeightedValue<T> a, WeightedValue<T> b) {
-              return compareFn.compare(a.getValue(), b.getValue());
-            }
-          });
-
-      List<T> newElements = Lists.newArrayListWithCapacity(count);
-      WeightedValue<T> weightedElement = sorted.next();
-      double current = weightedElement.getWeight();
-      for (int j = 0; j < count; j++) {
-        double target = j * step + offset;
-        while (current <= target && sorted.hasNext()) {
-          weightedElement = sorted.next();
-          current += weightedElement.getWeight();
-        }
-        newElements.add(weightedElement.getValue());
-      }
-      return newElements;
-    }
-
-    /**
-     * Outputs numQuantiles elements consisting of the minimum, maximum, and
-     * numQuantiles - 2 evenly spaced intermediate elements.
-     *
-     * <p>Returns the empty list if no elements have been added.
-     */
-    @Override
-    public List<T> extractOutput() {
-      if (isEmpty()) {
-        return Lists.newArrayList();
-      }
-      long totalCount = unbufferedElements.size();
-      for (QuantileBuffer<T> buffer : buffers) {
-        totalCount += bufferSize * buffer.weight;
-      }
-      List<QuantileBuffer<T>> all = Lists.newArrayList(buffers);
-      if (!unbufferedElements.isEmpty()) {
-        Collections.sort(unbufferedElements, compareFn);
-        all.add(new QuantileBuffer<>(unbufferedElements));
-      }
-      double step = 1.0 * totalCount / (numQuantiles - 1);
-      double offset = (1.0 * totalCount - 1) / (numQuantiles - 1);
-      List<T> quantiles = interpolate(all, numQuantiles - 2, step, offset);
-      quantiles.add(0, min);
-      quantiles.add(max);
-      return quantiles;
-    }
-  }
-
-  /**
-   * A single buffer in the sense of the referenced algorithm.
-   */
-  private static class QuantileBuffer<T> implements Comparable<QuantileBuffer<T>> {
-    private int level;
-    private long weight;
-    private List<T> elements;
-
-    public QuantileBuffer(List<T> elements) {
-      this(0, 1, elements);
-    }
-
-    public QuantileBuffer(int level, long weight, List<T> elements) {
-      this.level = level;
-      this.weight = weight;
-      this.elements = elements;
-    }
-
-    @Override
-    public int compareTo(QuantileBuffer<T> other) {
-      return this.level - other.level;
-    }
-
-    @Override
-    public String toString() {
-      return "QuantileBuffer["
-          + "level=" + level
-          + ", weight="
-          + weight + ", elements=" + elements + "]";
-    }
-
-    public Iterator<WeightedValue<T>> sizedIterator() {
-      return new UnmodifiableIterator<WeightedValue<T>>() {
-        Iterator<T> iter = elements.iterator();
-        @Override
-        public boolean hasNext() {
-          return iter.hasNext();
-        }
-        @Override public WeightedValue<T> next() {
-          return WeightedValue.of(iter.next(), weight);
-        }
-      };
-    }
-  }
-
-  /**
-   * Coder for QuantileState.
-   */
-  private static class QuantileStateCoder<T, ComparatorT extends Comparator<T> & Serializable>
-      extends CustomCoder<QuantileState<T, ComparatorT>> {
-    private final ComparatorT compareFn;
-    private final Coder<T> elementCoder;
-    private final Coder<List<T>> elementListCoder;
-    private final Coder<Integer> intCoder = BigEndianIntegerCoder.of();
-
-    public QuantileStateCoder(ComparatorT compareFn, Coder<T> elementCoder) {
-      this.compareFn = compareFn;
-      this.elementCoder = elementCoder;
-      this.elementListCoder = ListCoder.of(elementCoder);
-    }
-
-    @Override
-    public void encode(
-        QuantileState<T, ComparatorT> state, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      Coder.Context nestedContext = context.nested();
-      intCoder.encode(state.numQuantiles, outStream, nestedContext);
-      intCoder.encode(state.bufferSize, outStream, nestedContext);
-      elementCoder.encode(state.min, outStream, nestedContext);
-      elementCoder.encode(state.max, outStream, nestedContext);
-      elementListCoder.encode(
-          state.unbufferedElements, outStream, nestedContext);
-      BigEndianIntegerCoder.of().encode(
-          state.buffers.size(), outStream, nestedContext);
-      for (QuantileBuffer<T> buffer : state.buffers) {
-        encodeBuffer(buffer, outStream, nestedContext);
-      }
-    }
-
-    @Override
-    public QuantileState<T, ComparatorT> decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      Coder.Context nestedContext = context.nested();
-      int numQuantiles = intCoder.decode(inStream, nestedContext);
-      int bufferSize = intCoder.decode(inStream, nestedContext);
-      T min = elementCoder.decode(inStream, nestedContext);
-      T max = elementCoder.decode(inStream, nestedContext);
-      List<T> unbufferedElements =
-          elementListCoder.decode(inStream, nestedContext);
-      int numBuffers =
-          BigEndianIntegerCoder.of().decode(inStream, nestedContext);
-      List<QuantileBuffer<T>> buffers = new ArrayList<>(numBuffers);
-      for (int i = 0; i < numBuffers; i++) {
-        buffers.add(decodeBuffer(inStream, nestedContext));
-      }
-      return new QuantileState<T, ComparatorT>(
-          compareFn, numQuantiles, min, max, numBuffers, bufferSize, unbufferedElements, buffers);
-    }
-
-    private void encodeBuffer(
-        QuantileBuffer<T> buffer, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      DataOutputStream outData = new DataOutputStream(outStream);
-      outData.writeInt(buffer.level);
-      outData.writeLong(buffer.weight);
-      elementListCoder.encode(buffer.elements, outStream, context);
-    }
-
-    private QuantileBuffer<T> decodeBuffer(
-        InputStream inStream, Coder.Context context)
-        throws IOException, CoderException {
-      DataInputStream inData = new DataInputStream(inStream);
-      return new QuantileBuffer<>(
-          inData.readInt(),
-          inData.readLong(),
-          elementListCoder.decode(inStream, context));
-    }
-
-    /**
-     * Notifies ElementByteSizeObserver about the byte size of the
-     * encoded value using this coder.
-     */
-    @Override
-    public void registerByteSizeObserver(
-        QuantileState<T, ComparatorT> state,
-        ElementByteSizeObserver observer,
-        Coder.Context context)
-        throws Exception {
-      Coder.Context nestedContext = context.nested();
-      elementCoder.registerByteSizeObserver(
-          state.min, observer, nestedContext);
-      elementCoder.registerByteSizeObserver(
-          state.max, observer, nestedContext);
-      elementListCoder.registerByteSizeObserver(
-          state.unbufferedElements, observer, nestedContext);
-
-      BigEndianIntegerCoder.of().registerByteSizeObserver(
-          state.buffers.size(), observer, nestedContext);
-      for (QuantileBuffer<T> buffer : state.buffers) {
-        observer.update(4L + 8);
-
-        elementListCoder.registerByteSizeObserver(
-            buffer.elements, observer, nestedContext);
-      }
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "QuantileState.ElementCoder must be deterministic",
-          elementCoder);
-      verifyDeterministic(
-          "QuantileState.ElementListCoder must be deterministic",
-          elementListCoder);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
deleted file mode 100644
index 3c936a2..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/ApproximateUnique.java
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.hash.Hashing;
-import com.google.common.hash.HashingOutputStream;
-import com.google.common.io.ByteStreams;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.PriorityQueue;
-
-/**
- * {@code PTransform}s for estimating the number of distinct elements
- * in a {@code PCollection}, or the number of distinct values
- * associated with each key in a {@code PCollection} of {@code KV}s.
- */
-public class ApproximateUnique {
-
-  /**
-   * Returns a {@code PTransform} that takes a {@code PCollection<T>}
-   * and returns a {@code PCollection<Long>} containing a single value
-   * that is an estimate of the number of distinct elements in the
-   * input {@code PCollection}.
-   *
-   * <p>The {@code sampleSize} parameter controls the estimation
-   * error.  The error is about {@code 2 / sqrt(sampleSize)}, so for
-   * {@code ApproximateUnique.globally(10000)} the estimation error is
-   * about 2%.  Similarly, for {@code ApproximateUnique.of(16)} the
-   * estimation error is about 50%.  If there are fewer than
-   * {@code sampleSize} distinct elements then the returned result
-   * will be exact with extremely high probability (the chance of a
-   * hash collision is about {@code sampleSize^2 / 2^65}).
-   *
-   * <p>This transform approximates the number of elements in a set
-   * by computing the top {@code sampleSize} hash values, and using
-   * that to extrapolate the size of the entire set of hash values by
-   * assuming the rest of the hash values are as densely distributed
-   * as the top {@code sampleSize}.
-   *
-   * <p>See also {@link #globally(double)}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<String> pc = ...;
-   * PCollection<Long> approxNumDistinct =
-   *     pc.apply(ApproximateUnique.<String>globally(1000));
-   * } </pre>
-   *
-   * @param <T> the type of the elements in the input {@code PCollection}
-   * @param sampleSize the number of entries in the statistical
-   *        sample; the higher this number, the more accurate the
-   *        estimate will be; should be {@code >= 16}
-   * @throws IllegalArgumentException if the {@code sampleSize}
-   *         argument is too small
-   */
-  public static <T> Globally<T> globally(int sampleSize) {
-    return new Globally<>(sampleSize);
-  }
-
-  /**
-   * Like {@link #globally(int)}, but specifies the desired maximum
-   * estimation error instead of the sample size.
-   *
-   * @param <T> the type of the elements in the input {@code PCollection}
-   * @param maximumEstimationError the maximum estimation error, which
-   *        should be in the range {@code [0.01, 0.5]}
-   * @throws IllegalArgumentException if the
-   *         {@code maximumEstimationError} argument is out of range
-   */
-  public static <T> Globally<T> globally(double maximumEstimationError) {
-    return new Globally<>(maximumEstimationError);
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes a
-   * {@code PCollection<KV<K, V>>} and returns a
-   * {@code PCollection<KV<K, Long>>} that contains an output element
-   * mapping each distinct key in the input {@code PCollection} to an
-   * estimate of the number of distinct values associated with that
-   * key in the input {@code PCollection}.
-   *
-   * <p>See {@link #globally(int)} for an explanation of the
-   * {@code sampleSize} parameter.  A separate sampling is computed
-   * for each distinct key of the input.
-   *
-   * <p>See also {@link #perKey(double)}.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<Integer, String>> pc = ...;
-   * PCollection<KV<Integer, Long>> approxNumDistinctPerKey =
-   *     pc.apply(ApproximateUnique.<Integer, String>perKey(1000));
-   * } </pre>
-   *
-   * @param <K> the type of the keys in the input and output
-   *        {@code PCollection}s
-   * @param <V> the type of the values in the input {@code PCollection}
-   * @param sampleSize the number of entries in the statistical
-   *        sample; the higher this number, the more accurate the
-   *        estimate will be; should be {@code >= 16}
-   * @throws IllegalArgumentException if the {@code sampleSize}
-   *         argument is too small
-   */
-  public static <K, V> PerKey<K, V> perKey(int sampleSize) {
-    return new PerKey<>(sampleSize);
-  }
-
-  /**
-   * Like {@link #perKey(int)}, but specifies the desired maximum
-   * estimation error instead of the sample size.
-   *
-   * @param <K> the type of the keys in the input and output
-   *        {@code PCollection}s
-   * @param <V> the type of the values in the input {@code PCollection}
-   * @param maximumEstimationError the maximum estimation error, which
-   *        should be in the range {@code [0.01, 0.5]}
-   * @throws IllegalArgumentException if the
-   *         {@code maximumEstimationError} argument is out of range
-   */
-  public static <K, V> PerKey<K, V> perKey(double maximumEstimationError) {
-    return new PerKey<>(maximumEstimationError);
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * {@code PTransform} for estimating the number of distinct elements
-   * in a {@code PCollection}.
-   *
-   * @param <T> the type of the elements in the input {@code PCollection}
-   */
-  static class Globally<T> extends PTransform<PCollection<T>, PCollection<Long>> {
-
-    /**
-     * The number of entries in the statistical sample; the higher this number,
-     * the more accurate the estimate will be.
-     */
-    private final long sampleSize;
-
-    /**
-     * @see ApproximateUnique#globally(int)
-     */
-    public Globally(int sampleSize) {
-      if (sampleSize < 16) {
-        throw new IllegalArgumentException(
-            "ApproximateUnique needs a sampleSize "
-            + ">= 16 for an estimation error <= 50%.  "
-            + "In general, the estimation "
-            + "error is about 2 / sqrt(sampleSize).");
-      }
-      this.sampleSize = sampleSize;
-    }
-
-    /**
-     * @see ApproximateUnique#globally(double)
-     */
-    public Globally(double maximumEstimationError) {
-      if (maximumEstimationError < 0.01 || maximumEstimationError > 0.5) {
-        throw new IllegalArgumentException(
-            "ApproximateUnique needs an "
-            + "estimation error between 1% (0.01) and 50% (0.5).");
-      }
-      this.sampleSize = sampleSizeFromEstimationError(maximumEstimationError);
-    }
-
-    @Override
-    public PCollection<Long> apply(PCollection<T> input) {
-      Coder<T> coder = input.getCoder();
-      return input.apply(
-          Combine.globally(
-              new ApproximateUniqueCombineFn<>(sampleSize, coder)));
-    }
-  }
-
-  /**
-   * {@code PTransform} for estimating the number of distinct values
-   * associated with each key in a {@code PCollection} of {@code KV}s.
-   *
-   * @param <K> the type of the keys in the input and output
-   *        {@code PCollection}s
-   * @param <V> the type of the values in the input {@code PCollection}
-   */
-  static class PerKey<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Long>>> {
-
-    private final long sampleSize;
-
-    /**
-     * @see ApproximateUnique#perKey(int)
-     */
-    public PerKey(int sampleSize) {
-      if (sampleSize < 16) {
-        throw new IllegalArgumentException(
-            "ApproximateUnique needs a "
-            + "sampleSize >= 16 for an estimation error <= 50%.  In general, "
-            + "the estimation error is about 2 / sqrt(sampleSize).");
-      }
-      this.sampleSize = sampleSize;
-    }
-
-    /**
-     * @see ApproximateUnique#perKey(double)
-     */
-    public PerKey(double estimationError) {
-      if (estimationError < 0.01 || estimationError > 0.5) {
-        throw new IllegalArgumentException(
-            "ApproximateUnique.PerKey needs an "
-            + "estimation error between 1% (0.01) and 50% (0.5).");
-      }
-      this.sampleSize = sampleSizeFromEstimationError(estimationError);
-    }
-
-    @Override
-    public PCollection<KV<K, Long>> apply(PCollection<KV<K, V>> input) {
-      Coder<KV<K, V>> inputCoder = input.getCoder();
-      if (!(inputCoder instanceof KvCoder)) {
-        throw new IllegalStateException(
-            "ApproximateUnique.PerKey requires its input to use KvCoder");
-      }
-      @SuppressWarnings("unchecked")
-      final Coder<V> coder = ((KvCoder<K, V>) inputCoder).getValueCoder();
-
-      return input.apply(
-          Combine.perKey(new ApproximateUniqueCombineFn<>(
-              sampleSize, coder).<K>asKeyedFn()));
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * {@code CombineFn} that computes an estimate of the number of
-   * distinct values that were combined.
-   *
-   * <p>Hashes input elements, computes the top {@code sampleSize}
-   * hash values, and uses those to extrapolate the size of the entire
-   * set of hash values by assuming the rest of the hash values are as
-   * densely distributed as the top {@code sampleSize}.
-   *
-   * <p>Used to implement
-   * {@link #globally(int) ApproximatUnique.globally(...)} and
-   * {@link #perKey(int) ApproximatUnique.perKey(...)}.
-   *
-   * @param <T> the type of the values being combined
-   */
-  public static class ApproximateUniqueCombineFn<T> extends
-      CombineFn<T, ApproximateUniqueCombineFn.LargestUnique, Long> {
-
-    /**
-     * The size of the space of hashes returned by the hash function.
-     */
-    static final double HASH_SPACE_SIZE =
-        Long.MAX_VALUE - (double) Long.MIN_VALUE;
-
-    /**
-     * A heap utility class to efficiently track the largest added elements.
-     */
-    public static class LargestUnique implements Serializable {
-      private PriorityQueue<Long> heap = new PriorityQueue<>();
-      private final long sampleSize;
-
-      /**
-       * Creates a heap to track the largest {@code sampleSize} elements.
-       *
-       * @param sampleSize the size of the heap
-       */
-      public LargestUnique(long sampleSize) {
-        this.sampleSize = sampleSize;
-      }
-
-      /**
-       * Adds a value to the heap, returning whether the value is (large enough
-       * to be) in the heap.
-       */
-      public boolean add(Long value) {
-        if (heap.contains(value)) {
-          return true;
-        } else if (heap.size() < sampleSize) {
-          heap.add(value);
-          return true;
-        } else if (value > heap.element()) {
-          heap.remove();
-          heap.add(value);
-          return true;
-        } else {
-          return false;
-        }
-      }
-
-      /**
-       * Returns the values in the heap, ordered largest to smallest.
-       */
-      public List<Long> extractOrderedList() {
-        // The only way to extract the order from the heap is element-by-element
-        // from smallest to largest.
-        Long[] array = new Long[heap.size()];
-        for (int i = heap.size() - 1; i >= 0; i--) {
-          array[i] = heap.remove();
-        }
-        return Arrays.asList(array);
-      }
-    }
-
-    private final long sampleSize;
-    private final Coder<T> coder;
-
-    public ApproximateUniqueCombineFn(long sampleSize, Coder<T> coder) {
-      this.sampleSize = sampleSize;
-      this.coder = coder;
-    }
-
-    @Override
-    public LargestUnique createAccumulator() {
-      return new LargestUnique(sampleSize);
-    }
-
-    @Override
-    public LargestUnique addInput(LargestUnique heap, T input) {
-      try {
-        heap.add(hash(input, coder));
-        return heap;
-      } catch (Throwable e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    @Override
-    public LargestUnique mergeAccumulators(Iterable<LargestUnique> heaps) {
-      Iterator<LargestUnique> iterator = heaps.iterator();
-      LargestUnique heap = iterator.next();
-      while (iterator.hasNext()) {
-        List<Long> largestHashes = iterator.next().extractOrderedList();
-        for (long hash : largestHashes) {
-          if (!heap.add(hash)) {
-            break; // The remainder of this list is all smaller.
-          }
-        }
-      }
-      return heap;
-    }
-
-    @Override
-    public Long extractOutput(LargestUnique heap) {
-      List<Long> largestHashes = heap.extractOrderedList();
-      if (largestHashes.size() < sampleSize) {
-        return (long) largestHashes.size();
-      } else {
-        long smallestSampleHash = largestHashes.get(largestHashes.size() - 1);
-        double sampleSpaceSize = Long.MAX_VALUE - (double) smallestSampleHash;
-        // This formula takes into account the possibility of hash collisions,
-        // which become more likely than not for 2^32 distinct elements.
-        // Note that log(1+x) ~ x for small x, so for sampleSize << maxHash
-        // log(1 - sampleSize/sampleSpace) / log(1 - 1/sampleSpace) ~ sampleSize
-        // and hence estimate ~ sampleSize * HASH_SPACE_SIZE / sampleSpace
-        // as one would expect.
-        double estimate = Math.log1p(-sampleSize / sampleSpaceSize)
-            / Math.log1p(-1 / sampleSpaceSize)
-            * HASH_SPACE_SIZE / sampleSpaceSize;
-        return Math.round(estimate);
-      }
-    }
-
-    @Override
-    public Coder<LargestUnique> getAccumulatorCoder(CoderRegistry registry,
-        Coder<T> inputCoder) {
-      return SerializableCoder.of(LargestUnique.class);
-    }
-
-    /**
-     * Encodes the given element using the given coder and hashes the encoding.
-     */
-    static <T> long hash(T element, Coder<T> coder) throws CoderException, IOException {
-      try (HashingOutputStream stream =
-              new HashingOutputStream(Hashing.murmur3_128(), ByteStreams.nullOutputStream())) {
-        coder.encode(element, stream, Context.OUTER);
-        return stream.hash().asLong();
-      }
-    }
-  }
-
-  /**
-   * Computes the sampleSize based on the desired estimation error.
-   *
-   * @param estimationError should be bounded by [0.01, 0.5]
-   * @return the sample size needed for the desired estimation error
-   */
-  static long sampleSizeFromEstimationError(double estimationError) {
-    return Math.round(Math.ceil(4.0 / Math.pow(estimationError, 2.0)));
-  }
-}

[21/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
deleted file mode 100644
index 8997050..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/MapElements.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-/**
- * {@code PTransform}s for mapping a simple function over the elements of a {@link PCollection}.
- */
-public class MapElements<InputT, OutputT>
-extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
-
-  /**
-   * For a {@code SerializableFunction<InputT, OutputT>} {@code fn} and output type descriptor,
-   * returns a {@code PTransform} that takes an input {@code PCollection<InputT>} and returns
-   * a {@code PCollection<OutputT>} containing {@code fn.apply(v)} for every element {@code v} in
-   * the input.
-   *
-   * <p>Example of use in Java 8:
-   * <pre>{@code
-   * PCollection<Integer> wordLengths = words.apply(
-   *     MapElements.via((String word) -> word.length())
-   *         .withOutputType(new TypeDescriptor<Integer>() {});
-   * }</pre>
-   *
-   * <p>In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type
-   * descriptor need not be provided.
-   */
-  public static <InputT, OutputT> MissingOutputTypeDescriptor<InputT, OutputT>
-  via(SerializableFunction<InputT, OutputT> fn) {
-    return new MissingOutputTypeDescriptor<>(fn);
-  }
-
-  /**
-   * For a {@code SimpleFunction<InputT, OutputT>} {@code fn}, returns a {@code PTransform} that
-   * takes an input {@code PCollection<InputT>} and returns a {@code PCollection<OutputT>}
-   * containing {@code fn.apply(v)} for every element {@code v} in the input.
-   *
-   * <p>This overload is intended primarily for use in Java 7. In Java 8, the overload
-   * {@link #via(SerializableFunction)} supports use of lambda for greater concision.
-   *
-   * <p>Example of use in Java 7:
-   * <pre>{@code
-   * PCollection<String> words = ...;
-   * PCollection<Integer> wordsPerLine = words.apply(MapElements.via(
-   *     new SimpleFunction<String, Integer>() {
-   *       public Integer apply(String word) {
-   *         return word.length();
-   *       }
-   *     }));
-   * }</pre>
-   */
-  public static <InputT, OutputT> MapElements<InputT, OutputT>
-  via(final SimpleFunction<InputT, OutputT> fn) {
-    return new MapElements<>(fn, fn.getOutputTypeDescriptor());
-  }
-
-  /**
-   * An intermediate builder for a {@link MapElements} transform. To complete the transform, provide
-   * an output type descriptor to {@link MissingOutputTypeDescriptor#withOutputType}. See
-   * {@link #via(SerializableFunction)} for a full example of use.
-   */
-  public static final class MissingOutputTypeDescriptor<InputT, OutputT> {
-
-    private final SerializableFunction<InputT, OutputT> fn;
-
-    private MissingOutputTypeDescriptor(SerializableFunction<InputT, OutputT> fn) {
-      this.fn = fn;
-    }
-
-    public MapElements<InputT, OutputT> withOutputType(TypeDescriptor<OutputT> outputType) {
-      return new MapElements<>(fn, outputType);
-    }
-  }
-
-  ///////////////////////////////////////////////////////////////////
-
-  private final SerializableFunction<InputT, OutputT> fn;
-  private final transient TypeDescriptor<OutputT> outputType;
-
-  private MapElements(
-      SerializableFunction<InputT, OutputT> fn,
-      TypeDescriptor<OutputT> outputType) {
-    this.fn = fn;
-    this.outputType = outputType;
-  }
-
-  @Override
-  public PCollection<OutputT> apply(PCollection<InputT> input) {
-    return input.apply(ParDo.named("Map").of(new DoFn<InputT, OutputT>() {
-      @Override
-      public void processElement(ProcessContext c) {
-        c.output(fn.apply(c.element()));
-      }
-    })).setTypeDescriptorInternal(outputType);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
deleted file mode 100644
index 8678e4f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Max.java
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.BinaryCombineFn;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
-import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
-
-import java.io.Serializable;
-import java.util.Comparator;
-
-/**
- * {@code PTransform}s for computing the maximum of the elements in a {@code PCollection}, or the
- * maximum of the values associated with each key in a {@code PCollection} of {@code KV}s.
- *
- * <p>Example 1: get the maximum of a {@code PCollection} of {@code Double}s.
- * <pre> {@code
- * PCollection<Double> input = ...;
- * PCollection<Double> max = input.apply(Max.doublesGlobally());
- * } </pre>
- *
- * <p>Example 2: calculate the maximum of the {@code Integer}s
- * associated with each unique key (which is of type {@code String}).
- * <pre> {@code
- * PCollection<KV<String, Integer>> input = ...;
- * PCollection<KV<String, Integer>> maxPerKey = input
- *     .apply(Max.<String>integersPerKey());
- * } </pre>
- */
-public class Max {
-
-  private Max() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a
-   * {@code PCollection<Integer>} whose contents is the maximum of the input {@code PCollection}'s
-   * elements, or {@code Integer.MIN_VALUE} if there are no elements.
-   */
-  public static Combine.Globally<Integer, Integer> integersGlobally() {
-    return Combine.globally(new MaxIntegerFn()).named("Max.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and
-   * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each
-   * distinct key in the input {@code PCollection} to the maximum of the values associated with that
-   * key in the input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
-    return Combine.<K, Integer, Integer>perKey(new MaxIntegerFn()).named("Max.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<Long>} and returns a {@code
-   * PCollection<Long>} whose contents is the maximum of the input {@code PCollection}'s elements,
-   * or {@code Long.MIN_VALUE} if there are no elements.
-   */
-  public static Combine.Globally<Long, Long> longsGlobally() {
-    return Combine.globally(new MaxLongFn()).named("Max.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a
-   * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in
-   * the input {@code PCollection} to the maximum of the values associated with that key in the
-   * input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
-    return Combine.<K, Long, Long>perKey(new MaxLongFn()).named("Max.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a
-   * {@code PCollection<Double>} whose contents is the maximum of the input {@code PCollection}'s
-   * elements, or {@code Double.NEGATIVE_INFINITY} if there are no elements.
-   */
-  public static Combine.Globally<Double, Double> doublesGlobally() {
-    return Combine.globally(new MaxDoubleFn()).named("Max.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns
-   * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key
-   * in the input {@code PCollection} to the maximum of the values associated with that key in the
-   * input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
-    return Combine.<K, Double, Double>perKey(new MaxDoubleFn()).named("Max.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
-   * PCollection<T>} whose contents is the maximum according to the natural ordering of {@code T}
-   * of the input {@code PCollection}'s elements, or {@code null} if there are no elements.
-   */
-  public static <T extends Comparable<? super T>>
-  Combine.Globally<T, T> globally() {
-    return Combine.<T, T>globally(MaxFn.<T>naturalOrder()).named("Max.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
-   * {@code PCollection<KV<K, T>>} that contains an output element mapping each distinct key in the
-   * input {@code PCollection} to the maximum according to the natural ordering of {@code T} of the
-   * values associated with that key in the input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K, T extends Comparable<? super T>>
-  Combine.PerKey<K, T, T> perKey() {
-    return Combine.<K, T, T>perKey(MaxFn.<T>naturalOrder()).named("Max.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
-   * PCollection<T>} whose contents is the maximum of the input {@code PCollection}'s elements, or
-   * {@code null} if there are no elements.
-   */
-  public static <T, ComparatorT extends Comparator<? super T> & Serializable>
-  Combine.Globally<T, T> globally(ComparatorT comparator) {
-    return Combine.<T, T>globally(MaxFn.of(comparator)).named("Max.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
-   * {@code PCollection<KV<K, T>>} that contains one output element per key mapping each
-   * to the maximum of the values associated with that key in the input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K, T, ComparatorT extends Comparator<? super T> & Serializable>
-  Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
-    return Combine.<K, T, T>perKey(MaxFn.of(comparator)).named("Max.PerKey");
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code CombineFn} that computes the maximum of a collection of elements of type {@code T}
-   * using an arbitrary {@link Comparator}, useful as an argument to {@link Combine#globally} or
-   * {@link Combine#perKey}.
-   *
-   * @param <T> the type of the values being compared
-   */
-  public static class MaxFn<T> extends BinaryCombineFn<T> {
-
-    private final T identity;
-    private final Comparator<? super T> comparator;
-
-    private <ComparatorT extends Comparator<? super T> & Serializable> MaxFn(
-        T identity, ComparatorT comparator) {
-      this.identity = identity;
-      this.comparator = comparator;
-    }
-
-    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
-    MaxFn<T> of(T identity, ComparatorT comparator) {
-      return new MaxFn<T>(identity, comparator);
-    }
-
-    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
-    MaxFn<T> of(ComparatorT comparator) {
-      return new MaxFn<T>(null, comparator);
-    }
-
-    public static <T extends Comparable<? super T>> MaxFn<T> naturalOrder(T identity) {
-      return new MaxFn<T>(identity, new Top.Largest<T>());
-    }
-
-    public static <T extends Comparable<? super T>> MaxFn<T> naturalOrder() {
-      return new MaxFn<T>(null, new Top.Largest<T>());
-    }
-
-    @Override
-    public T identity() {
-      return identity;
-    }
-
-    @Override
-    public T apply(T left, T right) {
-      return comparator.compare(left, right) >= 0 ? left : right;
-    }
-  }
-
-  /**
-   * A {@code CombineFn} that computes the maximum of a collection of {@code Integer}s, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class MaxIntegerFn extends MaxFn<Integer> implements
-      CounterProvider<Integer> {
-    public MaxIntegerFn() {
-      super(Integer.MIN_VALUE, new Top.Largest<Integer>());
-    }
-
-    @Override
-    public Counter<Integer> getCounter(String name) {
-      return Counter.ints(name, AggregationKind.MAX);
-    }
-  }
-
-  /**
-   * A {@code CombineFn} that computes the maximum of a collection of {@code Long}s, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class MaxLongFn extends MaxFn<Long> implements
-      CounterProvider<Long> {
-    public MaxLongFn() {
-      super(Long.MIN_VALUE, new Top.Largest<Long>());
-    }
-
-    @Override
-    public Counter<Long> getCounter(String name) {
-      return Counter.longs(name, AggregationKind.MAX);
-    }
-  }
-
-  /**
-   * A {@code CombineFn} that computes the maximum of a collection of {@code Double}s, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class MaxDoubleFn extends MaxFn<Double> implements
-      CounterProvider<Double> {
-    public MaxDoubleFn() {
-      super(Double.NEGATIVE_INFINITY, new Top.Largest<Double>());
-    }
-
-    @Override
-    public Counter<Double> getCounter(String name) {
-      return Counter.doubles(name, AggregationKind.MAX);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
deleted file mode 100644
index 7dccfb6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Mean.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.DoubleCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.AccumulatingCombineFn.Accumulator;
-import com.google.common.base.MoreObjects;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Objects;
-
-/**
- * {@code PTransform}s for computing the arithmetic mean
- * (a.k.a. average) of the elements in a {@code PCollection}, or the
- * mean of the values associated with each key in a
- * {@code PCollection} of {@code KV}s.
- *
- * <p>Example 1: get the mean of a {@code PCollection} of {@code Long}s.
- * <pre> {@code
- * PCollection<Long> input = ...;
- * PCollection<Double> mean = input.apply(Mean.<Long>globally());
- * } </pre>
- *
- * <p>Example 2: calculate the mean of the {@code Integer}s
- * associated with each unique key (which is of type {@code String}).
- * <pre> {@code
- * PCollection<KV<String, Integer>> input = ...;
- * PCollection<KV<String, Double>> meanPerKey =
- *     input.apply(Mean.<String, Integer>perKey());
- * } </pre>
- */
-public class Mean {
-
-  private Mean() { } // Namespace only
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<NumT>} and returns a
-   * {@code PCollection<Double>} whose contents is the mean of the
-   * input {@code PCollection}'s elements, or
-   * {@code 0} if there are no elements.
-   *
-   * @param <NumT> the type of the {@code Number}s being combined
-   */
-  public static <NumT extends Number> Combine.Globally<NumT, Double> globally() {
-    return Combine.<NumT, Double>globally(new MeanFn<>()).named("Mean.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input
-   * {@code PCollection<KV<K, N>>} and returns a
-   * {@code PCollection<KV<K, Double>>} that contains an output
-   * element mapping each distinct key in the input
-   * {@code PCollection} to the mean of the values associated with
-   * that key in the input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and bucketing.
-   *
-   * @param <K> the type of the keys
-   * @param <NumT> the type of the {@code Number}s being combined
-   */
-  public static <K, NumT extends Number> Combine.PerKey<K, NumT, Double> perKey() {
-    return Combine.<K, NumT, Double>perKey(new MeanFn<>()).named("Mean.PerKey");
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code Combine.CombineFn} that computes the arithmetic mean
-   * (a.k.a. average) of an {@code Iterable} of numbers of type
-   * {@code N}, useful as an argument to {@link Combine#globally} or
-   * {@link Combine#perKey}.
-   *
-   * <p>Returns {@code Double.NaN} if combining zero elements.
-   *
-   * @param <NumT> the type of the {@code Number}s being combined
-   */
-  static class MeanFn<NumT extends Number>
-  extends Combine.AccumulatingCombineFn<NumT, CountSum<NumT>, Double> {
-    /**
-     * Constructs a combining function that computes the mean over
-     * a collection of values of type {@code N}.
-     */
-    public MeanFn() {}
-
-    @Override
-    public CountSum<NumT> createAccumulator() {
-      return new CountSum<>();
-    }
-
-    @Override
-    public Coder<CountSum<NumT>> getAccumulatorCoder(
-        CoderRegistry registry, Coder<NumT> inputCoder) {
-      return new CountSumCoder<>();
-    }
-  }
-
-  /**
-   * Accumulator class for {@link MeanFn}.
-   */
-  static class CountSum<NumT extends Number>
-  implements Accumulator<NumT, CountSum<NumT>, Double> {
-
-    long count = 0;
-    double sum = 0.0;
-
-    public CountSum() {
-      this(0, 0);
-    }
-
-    public CountSum(long count, double sum) {
-      this.count = count;
-      this.sum = sum;
-    }
-
-    @Override
-    public void addInput(NumT element) {
-      count++;
-      sum += element.doubleValue();
-    }
-
-    @Override
-    public void mergeAccumulator(CountSum<NumT> accumulator) {
-      count += accumulator.count;
-      sum += accumulator.sum;
-    }
-
-    @Override
-    public Double extractOutput() {
-      return count == 0 ? Double.NaN : sum / count;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (!(other instanceof CountSum)) {
-        return false;
-      }
-      @SuppressWarnings("unchecked")
-      CountSum<?> otherCountSum = (CountSum<?>) other;
-      return (count == otherCountSum.count)
-          && (sum == otherCountSum.sum);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(count, sum);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(this)
-          .add("count", count)
-          .add("sum", sum)
-          .toString();
-    }
-  }
-
-  static class CountSumCoder<NumT extends Number>
-  extends AtomicCoder<CountSum<NumT>> {
-     private static final Coder<Long> LONG_CODER = BigEndianLongCoder.of();
-     private static final Coder<Double> DOUBLE_CODER = DoubleCoder.of();
-
-     @Override
-     public void encode(CountSum<NumT> value, OutputStream outStream, Coder.Context context)
-         throws CoderException, IOException {
-       Coder.Context nestedContext = context.nested();
-       LONG_CODER.encode(value.count, outStream, nestedContext);
-       DOUBLE_CODER.encode(value.sum, outStream, nestedContext);
-     }
-
-     @Override
-     public CountSum<NumT> decode(InputStream inStream, Coder.Context context)
-         throws CoderException, IOException {
-       Coder.Context nestedContext = context.nested();
-       return new CountSum<>(
-           LONG_CODER.decode(inStream, nestedContext),
-           DOUBLE_CODER.decode(inStream, nestedContext));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
deleted file mode 100644
index 47ab3a0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Min.java
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.BinaryCombineFn;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.Counter.AggregationKind;
-import com.google.cloud.dataflow.sdk.util.common.CounterProvider;
-
-import java.io.Serializable;
-import java.util.Comparator;
-
-/**
- * {@code PTransform}s for computing the minimum of the elements in a {@code PCollection}, or the
- * minimum of the values associated with each key in a {@code PCollection} of {@code KV}s.
- *
- * <p>Example 1: get the minimum of a {@code PCollection} of {@code Double}s.
- * <pre> {@code
- * PCollection<Double> input = ...;
- * PCollection<Double> min = input.apply(Min.doublesGlobally());
- * } </pre>
- *
- * <p>Example 2: calculate the minimum of the {@code Integer}s
- * associated with each unique key (which is of type {@code String}).
- * <pre> {@code
- * PCollection<KV<String, Integer>> input = ...;
- * PCollection<KV<String, Integer>> minPerKey = input
- *     .apply(Min.<String>integersPerKey());
- * } </pre>
- */
-public class Min {
-
-  private Min() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a
-   * {@code PCollection<Integer>} whose contents is a single value that is the minimum of the input
-   * {@code PCollection}'s elements, or {@code Integer.MAX_VALUE} if there are no elements.
-   */
-  public static Combine.Globally<Integer, Integer> integersGlobally() {
-    return Combine.globally(new MinIntegerFn()).named("Min.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and
-   * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each
-   * distinct key in the input {@code PCollection} to the minimum of the values associated with that
-   * key in the input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
-    return Combine.<K, Integer, Integer>perKey(new MinIntegerFn()).named("Min.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<Long>} and returns a {@code
-   * PCollection<Long>} whose contents is the minimum of the input {@code PCollection}'s elements,
-   * or {@code Long.MAX_VALUE} if there are no elements.
-   */
-  public static Combine.Globally<Long, Long> longsGlobally() {
-    return Combine.globally(new MinLongFn()).named("Min.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a
-   * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in
-   * the input {@code PCollection} to the minimum of the values associated with that key in the
-   * input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
-   return Combine.<K, Long, Long>perKey(new MinLongFn()).named("Min.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a
-   * {@code PCollection<Double>} whose contents is the minimum of the input {@code PCollection}'s
-   * elements, or {@code Double.POSITIVE_INFINITY} if there are no elements.
-   */
-  public static Combine.Globally<Double, Double> doublesGlobally() {
-    return Combine.globally(new MinDoubleFn()).named("Min.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns
-   * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key
-   * in the input {@code PCollection} to the minimum of the values associated with that key in the
-   * input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
-    return Combine.<K, Double, Double>perKey(new MinDoubleFn()).named("Min.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
-   * PCollection<T>} whose contents is the minimum according to the natural ordering of {@code T}
-   * of the input {@code PCollection}'s elements, or {@code null} if there are no elements.
-   */
-  public static <T extends Comparable<? super T>>
-  Combine.Globally<T, T> globally() {
-    return Combine.<T, T>globally(MinFn.<T>naturalOrder()).named("Min.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
-   * {@code PCollection<KV<K, T>>} that contains an output element mapping each distinct key in the
-   * input {@code PCollection} to the minimum according to the natural ordering of {@code T} of the
-   * values associated with that key in the input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K, T extends Comparable<? super T>>
-  Combine.PerKey<K, T, T> perKey() {
-    return Combine.<K, T, T>perKey(MinFn.<T>naturalOrder()).named("Min.PerKey");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
-   * PCollection<T>} whose contents is the minimum of the input {@code PCollection}'s elements, or
-   * {@code null} if there are no elements.
-   */
-  public static <T, ComparatorT extends Comparator<? super T> & Serializable>
-  Combine.Globally<T, T> globally(ComparatorT comparator) {
-    return Combine.<T, T>globally(MinFn.of(comparator)).named("Min.Globally");
-  }
-
-  /**
-   * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
-   * {@code PCollection<KV<K, T>>} that contains one output element per key mapping each
-   * to the minimum of the values associated with that key in the input {@code PCollection}.
-   *
-   * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
-   */
-  public static <K, T, ComparatorT extends Comparator<? super T> & Serializable>
-  Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
-    return Combine.<K, T, T>perKey(MinFn.of(comparator)).named("Min.PerKey");
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code CombineFn} that computes the maximum of a collection of elements of type {@code T}
-   * using an arbitrary {@link Comparator}, useful as an argument to {@link Combine#globally} or
-   * {@link Combine#perKey}.
-   *
-   * @param <T> the type of the values being compared
-   */
-  public static class MinFn<T> extends BinaryCombineFn<T> {
-
-    private final T identity;
-    private final Comparator<? super T> comparator;
-
-    private <ComparatorT extends Comparator<? super T> & Serializable> MinFn(
-        T identity, ComparatorT comparator) {
-      this.identity = identity;
-      this.comparator = comparator;
-    }
-
-    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
-    MinFn<T> of(T identity, ComparatorT comparator) {
-      return new MinFn<T>(identity, comparator);
-    }
-
-    public static <T, ComparatorT extends Comparator<? super T> & Serializable>
-    MinFn<T> of(ComparatorT comparator) {
-      return new MinFn<T>(null, comparator);
-    }
-
-    public static <T extends Comparable<? super T>> MinFn<T> naturalOrder(T identity) {
-      return new MinFn<T>(identity, new Top.Largest<T>());
-    }
-
-    public static <T extends Comparable<? super T>> MinFn<T> naturalOrder() {
-      return new MinFn<T>(null, new Top.Largest<T>());
-    }
-
-    @Override
-    public T identity() {
-      return identity;
-    }
-
-    @Override
-    public T apply(T left, T right) {
-      return comparator.compare(left, right) <= 0 ? left : right;
-    }
-  }
-
-  /**
-   * A {@code CombineFn} that computes the minimum of a collection of {@code Integer}s, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class MinIntegerFn extends MinFn<Integer> implements
-      CounterProvider<Integer> {
-    public MinIntegerFn() {
-      super(Integer.MAX_VALUE, new Top.Largest<Integer>());
-    }
-
-    @Override
-    public Counter<Integer> getCounter(String name) {
-      return Counter.ints(name, AggregationKind.MIN);
-    }
-  }
-
-  /**
-   * A {@code CombineFn} that computes the minimum of a collection of {@code Long}s, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class MinLongFn extends MinFn<Long> implements
-      CounterProvider<Long> {
-    public MinLongFn() {
-      super(Long.MAX_VALUE, new Top.Largest<Long>());
-    }
-
-    @Override
-    public Counter<Long> getCounter(String name) {
-      return Counter.longs(name, AggregationKind.MIN);
-    }
-  }
-
-  /**
-   * A {@code CombineFn} that computes the minimum of a collection of {@code Double}s, useful as an
-   * argument to {@link Combine#globally} or {@link Combine#perKey}.
-   */
-  public static class MinDoubleFn extends MinFn<Double> implements
-      CounterProvider<Double> {
-    public MinDoubleFn() {
-      super(Double.POSITIVE_INFINITY, new Top.Largest<Double>());
-    }
-
-    @Override
-    public Counter<Double> getCounter(String name) {
-      return Counter.doubles(name, AggregationKind.MIN);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
deleted file mode 100644
index d4496b8..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/PTransform.java
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.display.DisplayData.Builder;
-import com.google.cloud.dataflow.sdk.transforms.display.HasDisplayData;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.TypedPValue;
-
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
-
-/**
- * A {@code PTransform<InputT, OutputT>} is an operation that takes an
- * {@code InputT} (some subtype of {@link PInput}) and produces an
- * {@code OutputT} (some subtype of {@link POutput}).
- *
- * <p>Common PTransforms include root PTransforms like
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read},
- * {@link Create}, processing and
- * conversion operations like {@link ParDo},
- * {@link GroupByKey},
- * {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey},
- * {@link Combine}, and {@link Count}, and outputting
- * PTransforms like
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.  Users also
- * define their own application-specific composite PTransforms.
- *
- * <p>Each {@code PTransform<InputT, OutputT>} has a single
- * {@code InputT} type and a single {@code OutputT} type.  Many
- * PTransforms conceptually transform one input value to one output
- * value, and in this case {@code InputT} and {@code Output} are
- * typically instances of
- * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
- * A root
- * PTransform conceptually has no input; in this case, conventionally
- * a {@link com.google.cloud.dataflow.sdk.values.PBegin} object
- * produced by calling {@link Pipeline#begin} is used as the input.
- * An outputting PTransform conceptually has no output; in this case,
- * conventionally {@link com.google.cloud.dataflow.sdk.values.PDone}
- * is used as its output type.  Some PTransforms conceptually have
- * multiple inputs and/or outputs; in these cases special "bundling"
- * classes like
- * {@link com.google.cloud.dataflow.sdk.values.PCollectionList},
- * {@link com.google.cloud.dataflow.sdk.values.PCollectionTuple}
- * are used
- * to combine multiple values into a single bundle for passing into or
- * returning from the PTransform.
- *
- * <p>A {@code PTransform<InputT, OutputT>} is invoked by calling
- * {@code apply()} on its {@code InputT}, returning its {@code OutputT}.
- * Calls can be chained to concisely create linear pipeline segments.
- * For example:
- *
- * <pre> {@code
- * PCollection<T1> pc1 = ...;
- * PCollection<T2> pc2 =
- *     pc1.apply(ParDo.of(new MyDoFn<T1,KV<K,V>>()))
- *        .apply(GroupByKey.<K, V>create())
- *        .apply(Combine.perKey(new MyKeyedCombineFn<K,V>()))
- *        .apply(ParDo.of(new MyDoFn2<KV<K,V>,T2>()));
- * } </pre>
- *
- * <p>PTransform operations have unique names, which are used by the
- * system when explaining what's going on during optimization and
- * execution.  Each PTransform gets a system-provided default name,
- * but it's a good practice to specify an explicit name, where
- * possible, using the {@code named()} method offered by some
- * PTransforms such as {@link ParDo}.  For example:
- *
- * <pre> {@code
- * ...
- * .apply(ParDo.named("Step1").of(new MyDoFn3()))
- * ...
- * } </pre>
- *
- * <p>Each PCollection output produced by a PTransform,
- * either directly or within a "bundling" class, automatically gets
- * its own name derived from the name of its producing PTransform.
- *
- * <p>Each PCollection output produced by a PTransform
- * also records a {@link com.google.cloud.dataflow.sdk.coders.Coder}
- * that specifies how the elements of that PCollection
- * are to be encoded as a byte string, if necessary.  The
- * PTransform may provide a default Coder for any of its outputs, for
- * instance by deriving it from the PTransform input's Coder.  If the
- * PTransform does not specify the Coder for an output PCollection,
- * the system will attempt to infer a Coder for it, based on
- * what's known at run-time about the Java type of the output's
- * elements.  The enclosing {@link Pipeline}'s
- * {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry}
- * (accessible via {@link Pipeline#getCoderRegistry}) defines the
- * mapping from Java types to the default Coder to use, for a standard
- * set of Java types; users can extend this mapping for additional
- * types, via
- * {@link com.google.cloud.dataflow.sdk.coders.CoderRegistry#registerCoder}.
- * If this inference process fails, either because the Java type was
- * not known at run-time (e.g., due to Java's "erasure" of generic
- * types) or there was no default Coder registered, then the Coder
- * should be specified manually by calling
- * {@link com.google.cloud.dataflow.sdk.values.TypedPValue#setCoder}
- * on the output PCollection.  The Coder of every output
- * PCollection must be determined one way or another
- * before that output is used as an input to another PTransform, or
- * before the enclosing Pipeline is run.
- *
- * <p>A small number of PTransforms are implemented natively by the
- * Google Cloud Dataflow SDK; such PTransforms simply return an
- * output value as their apply implementation.
- * The majority of PTransforms are
- * implemented as composites of other PTransforms.  Such a PTransform
- * subclass typically just implements {@link #apply}, computing its
- * Output value from its {@code InputT} value.  User programs are encouraged to
- * use this mechanism to modularize their own code.  Such composite
- * abstractions get their own name, and navigating through the
- * composition hierarchy of PTransforms is supported by the monitoring
- * interface.  Examples of composite PTransforms can be found in this
- * directory and in examples.  From the caller's point of view, there
- * is no distinction between a PTransform implemented natively and one
- * implemented in terms of other PTransforms; both kinds of PTransform
- * are invoked in the same way, using {@code apply()}.
- *
- * <h3>Note on Serialization</h3>
- *
- * <p>{@code PTransform} doesn't actually support serialization, despite
- * implementing {@code Serializable}.
- *
- * <p>{@code PTransform} is marked {@code Serializable} solely
- * because it is common for an anonymous {@code DoFn},
- * instance to be created within an
- * {@code apply()} method of a composite {@code PTransform}.
- *
- * <p>Each of those {@code *Fn}s is {@code Serializable}, but
- * unfortunately its instance state will contain a reference to the
- * enclosing {@code PTransform} instance, and so attempt to serialize
- * the {@code PTransform} instance, even though the {@code *Fn}
- * instance never references anything about the enclosing
- * {@code PTransform}.
- *
- * <p>To allow such anonymous {@code *Fn}s to be written
- * conveniently, {@code PTransform} is marked as {@code Serializable},
- * and includes dummy {@code writeObject()} and {@code readObject()}
- * operations that do not save or restore any state.
- *
- * @see <a href=
- * "https://cloud.google.com/dataflow/java-sdk/applying-transforms"
- * >Applying Transformations</a>
- *
- * @param <InputT> the type of the input to this PTransform
- * @param <OutputT> the type of the output of this PTransform
- */
-public abstract class PTransform<InputT extends PInput, OutputT extends POutput>
-    implements Serializable /* See the note above */, HasDisplayData {
-  /**
-   * Applies this {@code PTransform} on the given {@code InputT}, and returns its
-   * {@code Output}.
-   *
-   * <p>Composite transforms, which are defined in terms of other transforms,
-   * should return the output of one of the composed transforms.  Non-composite
-   * transforms, which do not apply any transforms internally, should return
-   * a new unbound output and register evaluators (via backend-specific
-   * registration methods).
-   *
-   * <p>The default implementation throws an exception.  A derived class must
-   * either implement apply, or else each runner must supply a custom
-   * implementation via
-   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner#apply}.
-   */
-  public OutputT apply(InputT input) {
-    throw new IllegalArgumentException(
-        "Runner " + input.getPipeline().getRunner()
-            + " has not registered an implementation for the required primitive operation "
-            + this);
-  }
-
-  /**
-   * Called before invoking apply (which may be intercepted by the runner) to
-   * verify this transform is fully specified and applicable to the specified
-   * input.
-   *
-   * <p>By default, does nothing.
-   */
-  public void validate(InputT input) { }
-
-  /**
-   * Returns the transform name.
-   *
-   * <p>This name is provided by the transform creator and is not required to be unique.
-   */
-  public String getName() {
-    return name != null ? name : getKindString();
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  // See the note about about PTransform's fake Serializability, to
-  // understand why all of its instance state is transient.
-
-  /**
-   * The base name of this {@code PTransform}, e.g., from
-   * {@link ParDo#named(String)}, or from defaults, or {@code null} if not
-   * yet assigned.
-   */
-  protected final transient String name;
-
-  protected PTransform() {
-    this.name = null;
-  }
-
-  protected PTransform(String name) {
-    this.name = name;
-  }
-
-  @Override
-  public String toString() {
-    if (name == null) {
-      return getKindString();
-    } else {
-      return getName() + " [" + getKindString() + "]";
-    }
-  }
-
-  /**
-   * Returns the name to use by default for this {@code PTransform}
-   * (not including the names of any enclosing {@code PTransform}s).
-   *
-   * <p>By default, returns the base name of this {@code PTransform}'s class.
-   *
-   * <p>The caller is responsible for ensuring that names of applied
-   * {@code PTransform}s are unique, e.g., by adding a uniquifying
-   * suffix when needed.
-   */
-  protected String getKindString() {
-    if (getClass().isAnonymousClass()) {
-      return "AnonymousTransform";
-    } else {
-      return StringUtils.approximatePTransformName(getClass());
-    }
-  }
-
-  private void writeObject(ObjectOutputStream oos) {
-    // We don't really want to be serializing this object, but we
-    // often have serializable anonymous DoFns nested within a
-    // PTransform.
-  }
-
-  private void readObject(ObjectInputStream oos) {
-    // We don't really want to be serializing this object, but we
-    // often have serializable anonymous DoFns nested within a
-    // PTransform.
-  }
-
-  /**
-   * Returns the default {@code Coder} to use for the output of this
-   * single-output {@code PTransform}.
-   *
-   * <p>By default, always throws
-   *
-   * @throws CannotProvideCoderException if no coder can be inferred
-   */
-  protected Coder<?> getDefaultOutputCoder() throws CannotProvideCoderException {
-    throw new CannotProvideCoderException(
-      "PTransform.getDefaultOutputCoder called.");
-  }
-
-  /**
-   * Returns the default {@code Coder} to use for the output of this
-   * single-output {@code PTransform} when applied to the given input.
-   *
-   * @throws CannotProvideCoderException if none can be inferred.
-   *
-   * <p>By default, always throws.
-   */
-  protected Coder<?> getDefaultOutputCoder(@SuppressWarnings("unused") InputT input)
-      throws CannotProvideCoderException {
-    return getDefaultOutputCoder();
-  }
-
-  /**
-   * Returns the default {@code Coder} to use for the given output of
-   * this single-output {@code PTransform} when applied to the given input.
-   *
-   * @throws CannotProvideCoderException if none can be inferred.
-   *
-   * <p>By default, always throws.
-   */
-  public <T> Coder<T> getDefaultOutputCoder(
-      InputT input, @SuppressWarnings("unused") TypedPValue<T> output)
-      throws CannotProvideCoderException {
-    @SuppressWarnings("unchecked")
-    Coder<T> defaultOutputCoder = (Coder<T>) getDefaultOutputCoder(input);
-    return defaultOutputCoder;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * <p>By default, does not register any display data. Implementors may override this method
-   * to provide their own display metadata.
-   */
-  @Override
-  public void populateDisplayData(Builder builder) {
-  }
-}

[63/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

Directory reorganization

Move Java 8 examples from "java8examples/" into "examples/java8/".


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/11bb9e0e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/11bb9e0e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/11bb9e0e

Branch: refs/heads/master
Commit: 11bb9e0e61f8b15ce81e5181baa5458bb715a059
Parents: 2eaa709
Author: Davor Bonaci <da...@google.com>
Authored: Wed Mar 23 17:16:47 2016 -0700
Committer: Davor Bonaci <da...@google.com>
Committed: Wed Mar 23 18:33:33 2016 -0700

----------------------------------------------------------------------
 examples/java8/pom.xml                          | 279 +++++++++++++
 .../examples/MinimalWordCountJava8.java         |  68 +++
 .../examples/complete/game/GameStats.java       | 339 +++++++++++++++
 .../examples/complete/game/HourlyTeamScore.java | 193 +++++++++
 .../examples/complete/game/LeaderBoard.java     | 237 +++++++++++
 .../dataflow/examples/complete/game/README.md   | 113 +++++
 .../examples/complete/game/UserScore.java       | 239 +++++++++++
 .../complete/game/injector/Injector.java        | 415 +++++++++++++++++++
 .../complete/game/injector/InjectorUtils.java   | 101 +++++
 .../injector/RetryHttpInitializerWrapper.java   | 126 ++++++
 .../complete/game/utils/WriteToBigQuery.java    | 134 ++++++
 .../game/utils/WriteWindowedToBigQuery.java     |  76 ++++
 .../examples/MinimalWordCountJava8Test.java     | 103 +++++
 .../examples/complete/game/GameStatsTest.java   |  76 ++++
 .../complete/game/HourlyTeamScoreTest.java      | 111 +++++
 .../examples/complete/game/UserScoreTest.java   | 154 +++++++
 java8examples/pom.xml                           | 279 -------------
 .../examples/MinimalWordCountJava8.java         |  68 ---
 .../examples/complete/game/GameStats.java       | 339 ---------------
 .../examples/complete/game/HourlyTeamScore.java | 193 ---------
 .../examples/complete/game/LeaderBoard.java     | 237 -----------
 .../dataflow/examples/complete/game/README.md   | 113 -----
 .../examples/complete/game/UserScore.java       | 239 -----------
 .../complete/game/injector/Injector.java        | 415 -------------------
 .../complete/game/injector/InjectorUtils.java   | 101 -----
 .../injector/RetryHttpInitializerWrapper.java   | 126 ------
 .../complete/game/utils/WriteToBigQuery.java    | 134 ------
 .../game/utils/WriteWindowedToBigQuery.java     |  76 ----
 .../examples/MinimalWordCountJava8Test.java     | 103 -----
 .../examples/complete/game/GameStatsTest.java   |  76 ----
 .../complete/game/HourlyTeamScoreTest.java      | 111 -----
 .../examples/complete/game/UserScoreTest.java   | 154 -------
 pom.xml                                         |   2 +-
 33 files changed, 2765 insertions(+), 2765 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java8/pom.xml b/examples/java8/pom.xml
new file mode 100644
index 0000000..7d55c31
--- /dev/null
+++ b/examples/java8/pom.xml
@@ -0,0 +1,279 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>parent</artifactId>
+    <version>0.1.0-incubating-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>java8examples-all</artifactId>
+  <name>Apache Beam :: Examples :: Java 8 All</name>
+  <description>Apache Beam Java SDK provides a simple, Java-based
+    interface for processing virtually any size data.
+    This artifact includes examples of the SDK from a Java 8
+    user.</description>
+
+  <packaging>jar</packaging>
+
+  <profiles>
+    <profile>
+      <id>DataflowPipelineTests</id>
+      <properties>
+        <runIntegrationTestOnService>true</runIntegrationTestOnService>
+        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
+        <testParallelValue>both</testParallelValue>
+      </properties>
+    </profile>
+  </profiles>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+          <testSource>1.8</testSource>
+          <testTarget>1.8</testTarget>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals><goal>analyze-only</goal></goals>
+            <configuration>
+              <failOnWarning>true</failOnWarning>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.12</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>6.6</version>
+          </dependency>
+        </dependencies>
+        <configuration>
+          <configLocation>../../checkstyle.xml</configLocation>
+          <consoleOutput>true</consoleOutput>
+          <failOnViolation>true</failOnViolation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <includeResources>false</includeResources>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Source plugin for generating source and test-source JARs. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>2.4</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>attach-test-sources</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>default-test-jar</id>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <!-- Coverage analysis for unit tests. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-sdk-all</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-examples-all</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+      <version>${joda.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <version>${hamcrest.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.10.19</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <version>${bigquery.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.http-client</groupId>
+      <artifactId>google-http-client</artifactId>
+      <version>${google-clients.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.oauth-client</groupId>
+      <artifactId>google-oauth-client</artifactId>
+      <version>${google-clients.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-pubsub</artifactId>
+      <version>${pubsub.version}</version>
+      <exclusions>
+        <!-- Exclude an old version of guava that is being pulled
+             in by a transitive dependency of google-api-client -->
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.api-client</groupId>
+      <artifactId>google-api-client</artifactId>
+      <version>${google-clients.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava-jdk5</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
new file mode 100644
index 0000000..c115ea0
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Count;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import java.util.Arrays;
+
+/**
+ * An example that counts words in Shakespeare, using Java 8 language features.
+ *
+ * <p>See {@link MinimalWordCount} for a comprehensive explanation.
+ */
+public class MinimalWordCountJava8 {
+
+  public static void main(String[] args) {
+    DataflowPipelineOptions options = PipelineOptionsFactory.create()
+        .as(DataflowPipelineOptions.class);
+
+    options.setRunner(BlockingDataflowPipelineRunner.class);
+
+    // CHANGE 1 of 3: Your project ID is required in order to run your pipeline on the Google Cloud.
+    options.setProject("SET_YOUR_PROJECT_ID_HERE");
+
+    // CHANGE 2 of 3: Your Google Cloud Storage path is required for staging local files.
+    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
+
+    Pipeline p = Pipeline.create(options);
+
+    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
+     .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
+         .withOutputType(new TypeDescriptor<String>() {}))
+     .apply(Filter.byPredicate((String word) -> !word.isEmpty()))
+     .apply(Count.<String>perElement())
+     .apply(MapElements
+         .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())
+         .withOutputType(new TypeDescriptor<String>() {}))
+
+     // CHANGE 3 of 3: The Google Cloud Storage path is required for outputting the results to.
+     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java
new file mode 100644
index 0000000..7c67d10
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/GameStats.java
@@ -0,0 +1,339 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.Combine;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.Mean;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.transforms.Values;
+import com.google.cloud.dataflow.sdk.transforms.View;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Sessions;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionView;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.joda.time.DateTimeZone;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * This class is the fourth in a series of four pipelines that tell a story in a 'gaming'
+ * domain, following {@link UserScore}, {@link HourlyTeamScore}, and {@link LeaderBoard}.
+ * New concepts: session windows and finding session duration; use of both
+ * singleton and non-singleton side inputs.
+ *
+ * <p> This pipeline builds on the {@link LeaderBoard} functionality, and adds some "business
+ * intelligence" analysis: abuse detection and usage patterns. The pipeline derives the Mean user
+ * score sum for a window, and uses that information to identify likely spammers/robots. (The robots
+ * have a higher click rate than the human users). The 'robot' users are then filtered out when
+ * calculating the team scores.
+ *
+ * <p> Additionally, user sessions are tracked: that is, we find bursts of user activity using
+ * session windows. Then, the mean session duration information is recorded in the context of
+ * subsequent fixed windowing. (This could be used to tell us what games are giving us greater
+ * user retention).
+ *
+ * <p> Run {@code com.google.cloud.dataflow.examples.complete.game.injector.Injector} to generate
+ * pubsub data for this pipeline. The {@code Injector} documentation provides more detail.
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist. The PubSub topic you specify should
+ * be the same topic to which the Injector is publishing.
+ */
+public class GameStats extends LeaderBoard {
+
+  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
+
+  private static DateTimeFormatter fmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+
+  /**
+   * Filter out all but those users with a high clickrate, which we will consider as 'spammy' uesrs.
+   * We do this by finding the mean total score per user, then using that information as a side
+   * input to filter out all but those user scores that are > (mean * SCORE_WEIGHT)
+   */
+  // [START DocInclude_AbuseDetect]
+  public static class CalculateSpammyUsers
+      extends PTransform<PCollection<KV<String, Integer>>, PCollection<KV<String, Integer>>> {
+    private static final Logger LOG = LoggerFactory.getLogger(CalculateSpammyUsers.class);
+    private static final double SCORE_WEIGHT = 2.5;
+
+    @Override
+    public PCollection<KV<String, Integer>> apply(PCollection<KV<String, Integer>> userScores) {
+
+      // Get the sum of scores for each user.
+      PCollection<KV<String, Integer>> sumScores = userScores
+          .apply("UserSum", Sum.<String>integersPerKey());
+
+      // Extract the score from each element, and use it to find the global mean.
+      final PCollectionView<Double> globalMeanScore = sumScores.apply(Values.<Integer>create())
+          .apply(Mean.<Integer>globally().asSingletonView());
+
+      // Filter the user sums using the global mean.
+      PCollection<KV<String, Integer>> filtered = sumScores
+          .apply(ParDo
+              .named("ProcessAndFilter")
+              // use the derived mean total score as a side input
+              .withSideInputs(globalMeanScore)
+              .of(new DoFn<KV<String, Integer>, KV<String, Integer>>() {
+                private final Aggregator<Long, Long> numSpammerUsers =
+                  createAggregator("SpammerUsers", new Sum.SumLongFn());
+                @Override
+                public void processElement(ProcessContext c) {
+                  Integer score = c.element().getValue();
+                  Double gmc = c.sideInput(globalMeanScore);
+                  if (score > (gmc * SCORE_WEIGHT)) {
+                    LOG.info("user " + c.element().getKey() + " spammer score " + score
+                        + " with mean " + gmc);
+                    numSpammerUsers.addValue(1L);
+                    c.output(c.element());
+                  }
+                }
+              }));
+      return filtered;
+    }
+  }
+  // [END DocInclude_AbuseDetect]
+
+  /**
+   * Calculate and output an element's session duration.
+   */
+  private static class UserSessionInfoFn extends DoFn<KV<String, Integer>, Integer>
+      implements RequiresWindowAccess {
+
+    @Override
+    public void processElement(ProcessContext c) {
+      IntervalWindow w = (IntervalWindow) c.window();
+      int duration = new Duration(
+          w.start(), w.end()).toPeriod().toStandardMinutes().getMinutes();
+      c.output(duration);
+    }
+  }
+
+
+  /**
+   * Options supported by {@link GameStats}.
+   */
+  static interface Options extends LeaderBoard.Options {
+    @Description("Numeric value of fixed window duration for user analysis, in minutes")
+    @Default.Integer(60)
+    Integer getFixedWindowDuration();
+    void setFixedWindowDuration(Integer value);
+
+    @Description("Numeric value of gap between user sessions, in minutes")
+    @Default.Integer(5)
+    Integer getSessionGap();
+    void setSessionGap(Integer value);
+
+    @Description("Numeric value of fixed window for finding mean of user session duration, "
+        + "in minutes")
+    @Default.Integer(30)
+    Integer getUserActivityWindowDuration();
+    void setUserActivityWindowDuration(Integer value);
+
+    @Description("Prefix used for the BigQuery table names")
+    @Default.String("game_stats")
+    String getTablePrefix();
+    void setTablePrefix(String value);
+  }
+
+
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write information about team score sums.
+   */
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureWindowedWrite() {
+    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfigure.put("team",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+            c -> c.element().getKey()));
+    tableConfigure.put("total_score",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
+            c -> c.element().getValue()));
+    tableConfigure.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    tableConfigure.put("processing_time",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> fmt.print(Instant.now())));
+    return tableConfigure;
+  }
+
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write information about mean user session time.
+   */
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<Double>>
+      configureSessionWindowWrite() {
+
+    Map<String, WriteWindowedToBigQuery.FieldInfo<Double>> tableConfigure =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<Double>>();
+    tableConfigure.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<Double>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    tableConfigure.put("mean_duration",
+        new WriteWindowedToBigQuery.FieldInfo<Double>("FLOAT", c -> c.element()));
+    return tableConfigure;
+  }
+
+
+
+  public static void main(String[] args) throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    // Enforce that this pipeline is always run in streaming mode.
+    options.setStreaming(true);
+    // Allow the pipeline to be cancelled automatically.
+    options.setRunner(DataflowPipelineRunner.class);
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    Pipeline pipeline = Pipeline.create(options);
+
+    // Read Events from Pub/Sub using custom timestamps
+    PCollection<GameActionInfo> rawEvents = pipeline
+        .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
+        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
+
+    // Extract username/score pairs from the event stream
+    PCollection<KV<String, Integer>> userEvents =
+        rawEvents.apply("ExtractUserScore",
+          MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
+            .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
+
+    // Calculate the total score per user over fixed windows, and
+    // cumulative updates for late data.
+    final PCollectionView<Map<String, Integer>> spammersView = userEvents
+      .apply(Window.named("FixedWindowsUser")
+          .<KV<String, Integer>>into(FixedWindows.of(
+              Duration.standardMinutes(options.getFixedWindowDuration())))
+          )
+
+      // Filter out everyone but those with (SCORE_WEIGHT * avg) clickrate.
+      // These might be robots/spammers.
+      .apply("CalculateSpammyUsers", new CalculateSpammyUsers())
+      // Derive a view from the collection of spammer users. It will be used as a side input
+      // in calculating the team score sums, below.
+      .apply("CreateSpammersView", View.<String, Integer>asMap());
+
+    // [START DocInclude_FilterAndCalc]
+    // Calculate the total score per team over fixed windows,
+    // and emit cumulative updates for late data. Uses the side input derived above-- the set of
+    // suspected robots-- to filter out scores from those users from the sum.
+    // Write the results to BigQuery.
+    rawEvents
+      .apply(Window.named("WindowIntoFixedWindows")
+          .<GameActionInfo>into(FixedWindows.of(
+              Duration.standardMinutes(options.getFixedWindowDuration())))
+          )
+      // Filter out the detected spammer users, using the side input derived above.
+      .apply(ParDo.named("FilterOutSpammers")
+              .withSideInputs(spammersView)
+              .of(new DoFn<GameActionInfo, GameActionInfo>() {
+                @Override
+                public void processElement(ProcessContext c) {
+                  // If the user is not in the spammers Map, output the data element.
+                  if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
+                    c.output(c.element());
+                  }
+                }
+              }))
+      // Extract and sum teamname/score pairs from the event data.
+      .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
+      // [END DocInclude_FilterAndCalc]
+      // Write the result to BigQuery
+      .apply("WriteTeamSums",
+             new WriteWindowedToBigQuery<KV<String, Integer>>(
+                options.getTablePrefix() + "_team", configureWindowedWrite()));
+
+
+    // [START DocInclude_SessionCalc]
+    // Detect user sessions-- that is, a burst of activity separated by a gap from further
+    // activity. Find and record the mean session lengths.
+    // This information could help the game designers track the changing user engagement
+    // as their set of games changes.
+    userEvents
+      .apply(Window.named("WindowIntoSessions")
+            .<KV<String, Integer>>into(
+                  Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap())))
+        .withOutputTimeFn(OutputTimeFns.outputAtEndOfWindow()))
+      // For this use, we care only about the existence of the session, not any particular
+      // information aggregated over it, so the following is an efficient way to do that.
+      .apply(Combine.perKey(x -> 0))
+      // Get the duration per session.
+      .apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn()))
+      // [END DocInclude_SessionCalc]
+      // [START DocInclude_Rewindow]
+      // Re-window to process groups of session sums according to when the sessions complete.
+      .apply(Window.named("WindowToExtractSessionMean")
+            .<Integer>into(
+                FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration()))))
+      // Find the mean session duration in each window.
+      .apply(Mean.<Integer>globally().withoutDefaults())
+      // Write this info to a BigQuery table.
+      .apply("WriteAvgSessionLength",
+             new WriteWindowedToBigQuery<Double>(
+                options.getTablePrefix() + "_sessions", configureSessionWindowWrite()));
+    // [END DocInclude_Rewindow]
+
+
+    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
+    // command line.
+    PipelineResult result = pipeline.run();
+    dataflowUtils.waitToFinish(result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
new file mode 100644
index 0000000..481b9df
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScore.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.WithTimestamps;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.joda.time.DateTimeZone;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * This class is the second in a series of four pipelines that tell a story in a 'gaming'
+ * domain, following {@link UserScore}. In addition to the concepts introduced in {@link UserScore},
+ * new concepts include: windowing and element timestamps; use of {@code Filter.byPredicate()}.
+ *
+ * <p> This pipeline processes data collected from gaming events in batch, building on {@link
+ * UserScore} but using fixed windows. It calculates the sum of scores per team, for each window,
+ * optionally allowing specification of two timestamps before and after which data is filtered out.
+ * This allows a model where late data collected after the intended analysis window can be included,
+ * and any late-arriving data prior to the beginning of the analysis window can be removed as well.
+ * By using windowing and adding element timestamps, we can do finer-grained analysis than with the
+ * {@link UserScore} pipeline. However, our batch processing is high-latency, in that we don't get
+ * results from plays at the beginning of the batch's time period until the batch is processed.
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist.
+ *
+ * <p> Optionally include {@code --input} to specify the batch input file path.
+ * To indicate a time after which the data should be filtered out, include the
+ * {@code --stopMin} arg. E.g., {@code --stopMin=2015-10-18-23-59} indicates that any data
+ * timestamped after 23:59 PST on 2015-10-18 should not be included in the analysis.
+ * To indicate a time before which data should be filtered out, include the {@code --startMin} arg.
+ * If you're using the default input specified in {@link UserScore},
+ * "gs://dataflow-samples/game/gaming_data*.csv", then
+ * {@code --startMin=2015-11-16-16-10 --stopMin=2015-11-17-16-10} are good values.
+ */
+public class HourlyTeamScore extends UserScore {
+
+  private static DateTimeFormatter fmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+  private static DateTimeFormatter minFmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd-HH-mm")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+
+
+  /**
+   * Options supported by {@link HourlyTeamScore}.
+   */
+  static interface Options extends UserScore.Options {
+
+    @Description("Numeric value of fixed window duration, in minutes")
+    @Default.Integer(60)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+
+    @Description("String representation of the first minute after which to generate results,"
+        + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST."
+        + "Any input data timestamped prior to that minute won't be included in the sums.")
+    @Default.String("1970-01-01-00-00")
+    String getStartMin();
+    void setStartMin(String value);
+
+    @Description("String representation of the first minute for which to not generate results,"
+        + "in the format: yyyy-MM-dd-HH-mm . This time should be in PST."
+        + "Any input data timestamped after that minute won't be included in the sums.")
+    @Default.String("2100-01-01-00-00")
+    String getStopMin();
+    void setStopMin(String value);
+
+    @Description("The BigQuery table name. Should not already exist.")
+    @Default.String("hourly_team_score")
+    String getTableName();
+    void setTableName(String value);
+  }
+
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is passed to the {@link WriteWindowedToBigQuery} constructor to write team score sums and
+   * includes information about window start time.
+   */
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureWindowedTableWrite() {
+    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfig =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfig.put("team",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+            c -> c.element().getKey()));
+    tableConfig.put("total_score",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
+            c -> c.element().getValue()));
+    tableConfig.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    return tableConfig;
+  }
+
+
+  /**
+   * Run a batch pipeline to do windowed analysis of the data.
+   */
+  // [START DocInclude_HTSMain]
+  public static void main(String[] args) throws Exception {
+    // Begin constructing a pipeline configured by commandline flags.
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline pipeline = Pipeline.create(options);
+
+    final Instant stopMinTimestamp = new Instant(minFmt.parseMillis(options.getStopMin()));
+    final Instant startMinTimestamp = new Instant(minFmt.parseMillis(options.getStartMin()));
+
+    // Read 'gaming' events from a text file.
+    pipeline.apply(TextIO.Read.from(options.getInput()))
+      // Parse the incoming data.
+      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
+
+      // Filter out data before and after the given times so that it is not included
+      // in the calculations. As we collect data in batches (say, by day), the batch for the day
+      // that we want to analyze could potentially include some late-arriving data from the previous
+      // day. If so, we want to weed it out. Similarly, if we include data from the following day
+      // (to scoop up late-arriving events from the day we're analyzing), we need to weed out events
+      // that fall after the time period we want to analyze.
+      // [START DocInclude_HTSFilters]
+      .apply("FilterStartTime", Filter.byPredicate(
+          (GameActionInfo gInfo)
+              -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
+      .apply("FilterEndTime", Filter.byPredicate(
+          (GameActionInfo gInfo)
+              -> gInfo.getTimestamp() < stopMinTimestamp.getMillis()))
+      // [END DocInclude_HTSFilters]
+
+      // [START DocInclude_HTSAddTsAndWindow]
+      // Add an element timestamp based on the event log, and apply fixed windowing.
+      .apply("AddEventTimestamps",
+             WithTimestamps.of((GameActionInfo i) -> new Instant(i.getTimestamp())))
+      .apply(Window.named("FixedWindowsTeam")
+          .<GameActionInfo>into(FixedWindows.of(
+                Duration.standardMinutes(options.getWindowDuration()))))
+      // [END DocInclude_HTSAddTsAndWindow]
+
+      // Extract and sum teamname/score pairs from the event data.
+      .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
+      .apply("WriteTeamScoreSums",
+        new WriteWindowedToBigQuery<KV<String, Integer>>(options.getTableName(),
+            configureWindowedTableWrite()));
+
+
+    pipeline.run();
+  }
+  // [END DocInclude_HTSMain]
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
new file mode 100644
index 0000000..4185376
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.joda.time.DateTimeZone;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TimeZone;
+
+/**
+ * This class is the third in a series of four pipelines that tell a story in a 'gaming' domain,
+ * following {@link UserScore} and {@link HourlyTeamScore}. Concepts include: processing unbounded
+ * data using fixed windows; use of custom timestamps and event-time processing; generation of
+ * early/speculative results; using .accumulatingFiredPanes() to do cumulative processing of late-
+ * arriving data.
+ *
+ * <p> This pipeline processes an unbounded stream of 'game events'. The calculation of the team
+ * scores uses fixed windowing based on event time (the time of the game play event), not
+ * processing time (the time that an event is processed by the pipeline). The pipeline calculates
+ * the sum of scores per team, for each window. By default, the team scores are calculated using
+ * one-hour windows.
+ *
+ * <p> In contrast-- to demo another windowing option-- the user scores are calculated using a
+ * global window, which periodically (every ten minutes) emits cumulative user score sums.
+ *
+ * <p> In contrast to the previous pipelines in the series, which used static, finite input data,
+ * here we're using an unbounded data source, which lets us provide speculative results, and allows
+ * handling of late data, at much lower latency. We can use the early/speculative results to keep a
+ * 'leaderboard' updated in near-realtime. Our handling of late data lets us generate correct
+ * results, e.g. for 'team prizes'. We're now outputing window results as they're
+ * calculated, giving us much lower latency than with the previous batch examples.
+ *
+ * <p> Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
+ * documentation provides more detail on how to do this.
+ *
+ * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
+ * like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist.
+ * The PubSub topic you specify should be the same topic to which the Injector is publishing.
+ */
+public class LeaderBoard extends HourlyTeamScore {
+
+  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
+
+  private static DateTimeFormatter fmt =
+      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+  static final Duration FIVE_MINUTES = Duration.standardMinutes(5);
+  static final Duration TEN_MINUTES = Duration.standardMinutes(10);
+
+
+  /**
+   * Options supported by {@link LeaderBoard}.
+   */
+  static interface Options extends HourlyTeamScore.Options, DataflowExampleOptions {
+
+    @Description("Pub/Sub topic to read from")
+    @Validation.Required
+    String getTopic();
+    void setTopic(String value);
+
+    @Description("Numeric value of fixed window duration for team analysis, in minutes")
+    @Default.Integer(60)
+    Integer getTeamWindowDuration();
+    void setTeamWindowDuration(Integer value);
+
+    @Description("Numeric value of allowed data lateness, in minutes")
+    @Default.Integer(120)
+    Integer getAllowedLateness();
+    void setAllowedLateness(Integer value);
+
+    @Description("Prefix used for the BigQuery table names")
+    @Default.String("leaderboard")
+    String getTableName();
+    void setTableName(String value);
+  }
+
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write team score sums and includes event timing information.
+   */
+  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureWindowedTableWrite() {
+
+    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfigure.put("team",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+            c -> c.element().getKey()));
+    tableConfigure.put("total_score",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
+            c -> c.element().getValue()));
+    tableConfigure.put("window_start",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
+          c -> { IntervalWindow w = (IntervalWindow) c.window();
+                 return fmt.print(w.start()); }));
+    tableConfigure.put("processing_time",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> fmt.print(Instant.now())));
+    tableConfigure.put("timing",
+        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> c.pane().getTiming().toString()));
+    return tableConfigure;
+  }
+
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is used to write user score sums.
+   */
+  protected static Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>
+      configureGlobalWindowBigQueryWrite() {
+
+    Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        configureBigQueryWrite();
+    tableConfigure.put("processing_time",
+        new WriteToBigQuery.FieldInfo<KV<String, Integer>>(
+            "STRING", c -> fmt.print(Instant.now())));
+    return tableConfigure;
+  }
+
+
+  public static void main(String[] args) throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    // Enforce that this pipeline is always run in streaming mode.
+    options.setStreaming(true);
+    // For example purposes, allow the pipeline to be easily cancelled instead of running
+    // continuously.
+    options.setRunner(DataflowPipelineRunner.class);
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    Pipeline pipeline = Pipeline.create(options);
+
+    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
+    // data elements, and parse the data.
+    PCollection<GameActionInfo> gameEvents = pipeline
+        .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
+        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
+
+    // [START DocInclude_WindowAndTrigger]
+    // Extract team/score pairs from the event stream, using hour-long windows by default.
+    gameEvents
+        .apply(Window.named("LeaderboardTeamFixedWindows")
+          .<GameActionInfo>into(FixedWindows.of(
+              Duration.standardMinutes(options.getTeamWindowDuration())))
+          // We will get early (speculative) results as well as cumulative
+          // processing of late data.
+          .triggering(
+            AfterWatermark.pastEndOfWindow()
+            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
+                  .plusDelayOf(FIVE_MINUTES))
+            .withLateFirings(AfterProcessingTime.pastFirstElementInPane()
+                  .plusDelayOf(TEN_MINUTES)))
+          .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness()))
+          .accumulatingFiredPanes())
+        // Extract and sum teamname/score pairs from the event data.
+        .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
+        // Write the results to BigQuery.
+        .apply("WriteTeamScoreSums",
+               new WriteWindowedToBigQuery<KV<String, Integer>>(
+                  options.getTableName() + "_team", configureWindowedTableWrite()));
+    // [END DocInclude_WindowAndTrigger]
+
+    // [START DocInclude_ProcTimeTrigger]
+    // Extract user/score pairs from the event stream using processing time, via global windowing.
+    // Get periodic updates on all users' running scores.
+    gameEvents
+        .apply(Window.named("LeaderboardUserGlobalWindow")
+          .<GameActionInfo>into(new GlobalWindows())
+          // Get periodic results every ten minutes.
+              .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                  .plusDelayOf(TEN_MINUTES)))
+              .accumulatingFiredPanes()
+              .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness())))
+        // Extract and sum username/score pairs from the event data.
+        .apply("ExtractUserScore", new ExtractAndSumScore("user"))
+        // Write the results to BigQuery.
+        .apply("WriteUserScoreSums",
+               new WriteToBigQuery<KV<String, Integer>>(
+                  options.getTableName() + "_user", configureGlobalWindowBigQueryWrite()));
+    // [END DocInclude_ProcTimeTrigger]
+
+    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
+    // command line.
+    PipelineResult result = pipeline.run();
+    dataflowUtils.waitToFinish(result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md
new file mode 100644
index 0000000..79b55ce
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md
@@ -0,0 +1,113 @@
+
+# 'Gaming' examples
+
+
+This directory holds a series of example Dataflow pipelines in a simple 'mobile
+gaming' domain. They all require Java 8.  Each pipeline successively introduces
+new concepts, and gives some examples of using Java 8 syntax in constructing
+Dataflow pipelines. Other than usage of Java 8 lambda expressions, the concepts
+that are used apply equally well in Java 7.
+
+In the gaming scenario, many users play, as members of different teams, over
+the course of a day, and their actions are logged for processing. Some of the
+logged game events may be late-arriving, if users play on mobile devices and go
+transiently offline for a period.
+
+The scenario includes not only "regular" users, but "robot users", which have a
+higher click rate than the regular users, and may move from team to team.
+
+The first two pipelines in the series use pre-generated batch data samples. The
+second two pipelines read from a [PubSub](https://cloud.google.com/pubsub/)
+topic input.  For these examples, you will also need to run the
+`injector.Injector` program, which generates and publishes the gaming data to
+PubSub. The javadocs for each pipeline have more detailed information on how to
+run that pipeline.
+
+All of these pipelines write their results to BigQuery table(s).
+
+
+## The pipelines in the 'gaming' series
+
+### UserScore
+
+The first pipeline in the series is `UserScore`. This pipeline does batch
+processing of data collected from gaming events. It calculates the sum of
+scores per user, over an entire batch of gaming data (collected, say, for each
+day). The batch processing will not include any late data that arrives after
+the day's cutoff point.
+
+### HourlyTeamScore
+
+The next pipeline in the series is `HourlyTeamScore`. This pipeline also
+processes data collected from gaming events in batch. It builds on `UserScore`,
+but uses [fixed windows](https://cloud.google.com/dataflow/model/windowing), by
+default an hour in duration. It calculates the sum of scores per team, for each
+window, optionally allowing specification of two timestamps before and after
+which data is filtered out. This allows a model where late data collected after
+the intended analysis window can be included in the analysis, and any late-
+arriving data prior to the beginning of the analysis window can be removed as
+well.
+
+By using windowing and adding element timestamps, we can do finer-grained
+analysis than with the `UserScore` pipeline — we're now tracking scores for
+each hour rather than over the course of a whole day. However, our batch
+processing is high-latency, in that we don't get results from plays at the
+beginning of the batch's time period until the complete batch is processed.
+
+### LeaderBoard
+
+The third pipeline in the series is `LeaderBoard`. This pipeline processes an
+unbounded stream of 'game events' from a PubSub topic. The calculation of the
+team scores uses fixed windowing based on event time (the time of the game play
+event), not processing time (the time that an event is processed by the
+pipeline). The pipeline calculates the sum of scores per team, for each window.
+By default, the team scores are calculated using one-hour windows.
+
+In contrast — to demo another windowing option — the user scores are calculated
+using a global window, which periodically (every ten minutes) emits cumulative
+user score sums.
+
+In contrast to the previous pipelines in the series, which used static, finite
+input data, here we're using an unbounded data source, which lets us provide
+_speculative_ results, and allows handling of late data, at much lower latency.
+E.g., we could use the early/speculative results to keep a 'leaderboard'
+updated in near-realtime. Our handling of late data lets us generate correct
+results, e.g. for 'team prizes'. We're now outputing window results as they're
+calculated, giving us much lower latency than with the previous batch examples.
+
+### GameStats
+
+The fourth pipeline in the series is `GameStats`. This pipeline builds
+on the `LeaderBoard` functionality — supporting output of speculative and late
+data — and adds some "business intelligence" analysis: identifying abuse
+detection. The pipeline derives the Mean user score sum for a window, and uses
+that information to identify likely spammers/robots. (The injector is designed
+so that the "robots" have a higher click rate than the "real" users). The robot
+users are then filtered out when calculating the team scores.
+
+Additionally, user sessions are tracked: that is, we find bursts of user
+activity using session windows. Then, the mean session duration information is
+recorded in the context of subsequent fixed windowing. (This could be used to
+tell us what games are giving us greater user retention).
+
+### Running the PubSub Injector
+
+The `LeaderBoard` and `GameStats` example pipelines read unbounded data
+from a PubSub topic.
+
+Use the `injector.Injector` program to generate this data and publish to a
+PubSub topic. See the `Injector`javadocs for more information on how to run the
+injector. Set up the injector before you start one of these pipelines. Then,
+when you start the pipeline, pass as an argument the name of that PubSub topic.
+See the pipeline javadocs for the details.
+
+## Viewing the results in BigQuery
+
+All of the pipelines write their results to BigQuery.  `UserScore` and
+`HourlyTeamScore` each write one table, and `LeaderBoard` and
+`GameStats` each write two. The pipelines have default table names that
+you can override when you start up the pipeline if those tables already exist.
+
+Depending on the windowing intervals defined in a given pipeline, you may have
+to wait for a while (more than an hour) before you start to see results written
+to the BigQuery tables.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java
new file mode 100644
index 0000000..de06ce3
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game;
+
+import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.Sum;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
+
+import org.apache.avro.reflect.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This class is the first in a series of four pipelines that tell a story in a 'gaming' domain.
+ * Concepts: batch processing; reading input from Google Cloud Storage and writing output to
+ * BigQuery; using standalone DoFns; use of the sum by key transform; examples of
+ * Java 8 lambda syntax.
+ *
+ * <p> In this gaming scenario, many users play, as members of different teams, over the course of a
+ * day, and their actions are logged for processing.  Some of the logged game events may be late-
+ * arriving, if users play on mobile devices and go transiently offline for a period.
+ *
+ * <p> This pipeline does batch processing of data collected from gaming events. It calculates the
+ * sum of scores per user, over an entire batch of gaming data (collected, say, for each day). The
+ * batch processing will not include any late data that arrives after the day's cutoff point.
+ *
+ * <p> To execute this pipeline using the Dataflow service and static example input data, specify
+ * the pipeline configuration like this:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ *   --dataset=YOUR-DATASET
+ * }
+ * </pre>
+ * where the BigQuery dataset you specify must already exist.
+ *
+ * <p> Optionally include the --input argument to specify a batch input file.
+ * See the --input default value for example batch data file, or use {@link injector.Injector} to
+ * generate your own batch data.
+  */
+public class UserScore {
+
+  /**
+   * Class to hold info about a game event.
+   */
+  @DefaultCoder(AvroCoder.class)
+  static class GameActionInfo {
+    @Nullable String user;
+    @Nullable String team;
+    @Nullable Integer score;
+    @Nullable Long timestamp;
+
+    public GameActionInfo() {}
+
+    public GameActionInfo(String user, String team, Integer score, Long timestamp) {
+      this.user = user;
+      this.team = team;
+      this.score = score;
+      this.timestamp = timestamp;
+    }
+
+    public String getUser() {
+      return this.user;
+    }
+    public String getTeam() {
+      return this.team;
+    }
+    public Integer getScore() {
+      return this.score;
+    }
+    public String getKey(String keyname) {
+      if (keyname.equals("team")) {
+        return this.team;
+      } else {  // return username as default
+        return this.user;
+      }
+    }
+    public Long getTimestamp() {
+      return this.timestamp;
+    }
+  }
+
+
+  /**
+   * Parses the raw game event info into GameActionInfo objects. Each event line has the following
+   * format: username,teamname,score,timestamp_in_ms,readable_time
+   * e.g.:
+   * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
+   * The human-readable time string is not used here.
+   */
+  static class ParseEventFn extends DoFn<String, GameActionInfo> {
+
+    // Log and count parse errors.
+    private static final Logger LOG = LoggerFactory.getLogger(ParseEventFn.class);
+    private final Aggregator<Long, Long> numParseErrors =
+        createAggregator("ParseErrors", new Sum.SumLongFn());
+
+    @Override
+    public void processElement(ProcessContext c) {
+      String[] components = c.element().split(",");
+      try {
+        String user = components[0].trim();
+        String team = components[1].trim();
+        Integer score = Integer.parseInt(components[2].trim());
+        Long timestamp = Long.parseLong(components[3].trim());
+        GameActionInfo gInfo = new GameActionInfo(user, team, score, timestamp);
+        c.output(gInfo);
+      } catch (ArrayIndexOutOfBoundsException | NumberFormatException e) {
+        numParseErrors.addValue(1L);
+        LOG.info("Parse error on " + c.element() + ", " + e.getMessage());
+      }
+    }
+  }
+
+  /**
+   * A transform to extract key/score information from GameActionInfo, and sum the scores. The
+   * constructor arg determines whether 'team' or 'user' info is extracted.
+   */
+  // [START DocInclude_USExtractXform]
+  public static class ExtractAndSumScore
+      extends PTransform<PCollection<GameActionInfo>, PCollection<KV<String, Integer>>> {
+
+    private final String field;
+
+    ExtractAndSumScore(String field) {
+      this.field = field;
+    }
+
+    @Override
+    public PCollection<KV<String, Integer>> apply(
+        PCollection<GameActionInfo> gameInfo) {
+
+      return gameInfo
+        .apply(MapElements
+            .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))
+            .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}))
+        .apply(Sum.<String>integersPerKey());
+    }
+  }
+  // [END DocInclude_USExtractXform]
+
+
+  /**
+   * Options supported by {@link UserScore}.
+   */
+  public static interface Options extends PipelineOptions {
+
+    @Description("Path to the data file(s) containing game data.")
+    // The default maps to two large Google Cloud Storage files (each ~12GB) holding two subsequent
+    // day's worth (roughly) of data.
+    @Default.String("gs://dataflow-samples/game/gaming_data*.csv")
+    String getInput();
+    void setInput(String value);
+
+    @Description("BigQuery Dataset to write tables to. Must already exist.")
+    @Validation.Required
+    String getDataset();
+    void setDataset(String value);
+
+    @Description("The BigQuery table name. Should not already exist.")
+    @Default.String("user_score")
+    String getTableName();
+    void setTableName(String value);
+  }
+
+  /**
+   * Create a map of information that describes how to write pipeline output to BigQuery. This map
+   * is passed to the {@link WriteToBigQuery} constructor to write user score sums.
+   */
+  protected static Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>
+    configureBigQueryWrite() {
+    Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
+        new HashMap<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>();
+    tableConfigure.put("user",
+        new WriteToBigQuery.FieldInfo<KV<String, Integer>>("STRING", c -> c.element().getKey()));
+    tableConfigure.put("total_score",
+        new WriteToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER", c -> c.element().getValue()));
+    return tableConfigure;
+  }
+
+
+  /**
+   * Run a batch pipeline.
+   */
+ // [START DocInclude_USMain]
+  public static void main(String[] args) throws Exception {
+    // Begin constructing a pipeline configured by commandline flags.
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline pipeline = Pipeline.create(options);
+
+    // Read events from a text file and parse them.
+    pipeline.apply(TextIO.Read.from(options.getInput()))
+      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
+      // Extract and sum username/score pairs from the event data.
+      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
+      .apply("WriteUserScoreSums",
+          new WriteToBigQuery<KV<String, Integer>>(options.getTableName(),
+                                                   configureBigQueryWrite()));
+
+    // Run the batch pipeline.
+    pipeline.run();
+  }
+  // [END DocInclude_USMain]
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
new file mode 100644
index 0000000..1691c54
--- /dev/null
+++ b/examples/java8/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
@@ -0,0 +1,415 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete.game.injector;
+
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.PublishRequest;
+import com.google.api.services.pubsub.model.PubsubMessage;
+import com.google.common.collect.ImmutableMap;
+
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import java.io.BufferedOutputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.TimeZone;
+
+
+/**
+ * This is a generator that simulates usage data from a mobile game, and either publishes the data
+ * to a pubsub topic or writes it to a file.
+ *
+ * <p> The general model used by the generator is the following. There is a set of teams with team
+ * members. Each member is scoring points for their team. After some period, a team will dissolve
+ * and a new one will be created in its place. There is also a set of 'Robots', or spammer users.
+ * They hop from team to team. The robots are set to have a higher 'click rate' (generate more
+ * events) than the regular team members.
+ *
+ * <p> Each generated line of data has the following form:
+ * username,teamname,score,timestamp_in_ms,readable_time
+ * e.g.:
+ * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
+ *
+ * <p> The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
+ * specified. It takes the following arguments:
+ * {@code Injector project-name (topic-name|none) (filename|none)}.
+ *
+ * <p> To run the Injector in the mode where it publishes to PubSub, you will need to authenticate
+ * locally using project-based service account credentials to avoid running over PubSub
+ * quota.
+ * See https://developers.google.com/identity/protocols/application-default-credentials
+ * for more information on using service account credentials. Set the GOOGLE_APPLICATION_CREDENTIALS
+ * environment variable to point to your downloaded service account credentials before starting the
+ * program, e.g.:
+ * {@code export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json}.
+ * If you do not do this, then your injector will only run for a few minutes on your
+ * 'user account' credentials before you will start to see quota error messages like:
+ * "Request throttled due to user QPS limit being reached", and see this exception:
+ * ".com.google.api.client.googleapis.json.GoogleJsonResponseException: 429 Too Many Requests".
+ * Once you've set up your credentials, run the Injector like this":
+  * <pre>{@code
+ * Injector <project-name> <topic-name> none
+ * }
+ * </pre>
+ * The pubsub topic will be created if it does not exist.
+ *
+ * <p> To run the injector in write-to-file-mode, set the topic name to "none" and specify the
+ * filename:
+ * <pre>{@code
+ * Injector <project-name> none <filename>
+ * }
+ * </pre>
+ */
+class Injector {
+  private static Pubsub pubsub;
+  private static Random random = new Random();
+  private static String topic;
+  private static String project;
+  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
+
+  // QPS ranges from 800 to 1000.
+  private static final int MIN_QPS = 800;
+  private static final int QPS_RANGE = 200;
+  // How long to sleep, in ms, between creation of the threads that make API requests to PubSub.
+  private static final int THREAD_SLEEP_MS = 500;
+
+  // Lists used to generate random team names.
+  private static final ArrayList<String> COLORS =
+      new ArrayList<String>(Arrays.asList(
+         "Magenta", "AliceBlue", "Almond", "Amaranth", "Amber",
+         "Amethyst", "AndroidGreen", "AntiqueBrass", "Fuchsia", "Ruby", "AppleGreen",
+         "Apricot", "Aqua", "ArmyGreen", "Asparagus", "Auburn", "Azure", "Banana",
+         "Beige", "Bisque", "BarnRed", "BattleshipGrey"));
+
+  private static final ArrayList<String> ANIMALS =
+      new ArrayList<String>(Arrays.asList(
+         "Echidna", "Koala", "Wombat", "Marmot", "Quokka", "Kangaroo", "Dingo", "Numbat", "Emu",
+         "Wallaby", "CaneToad", "Bilby", "Possum", "Cassowary", "Kookaburra", "Platypus",
+         "Bandicoot", "Cockatoo", "Antechinus"));
+
+  // The list of live teams.
+  private static ArrayList<TeamInfo> liveTeams = new ArrayList<TeamInfo>();
+
+  private static DateTimeFormatter fmt =
+    DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
+        .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
+
+
+  // The total number of robots in the system.
+  private static final int NUM_ROBOTS = 20;
+  // Determines the chance that a team will have a robot team member.
+  private static final int ROBOT_PROBABILITY = 3;
+  private static final int NUM_LIVE_TEAMS = 15;
+  private static final int BASE_MEMBERS_PER_TEAM = 5;
+  private static final int MEMBERS_PER_TEAM = 15;
+  private static final int MAX_SCORE = 20;
+  private static final int LATE_DATA_RATE = 5 * 60 * 2;       // Every 10 minutes
+  private static final int BASE_DELAY_IN_MILLIS = 5 * 60 * 1000;  // 5-10 minute delay
+  private static final int FUZZY_DELAY_IN_MILLIS = 5 * 60 * 1000;
+
+  // The minimum time a 'team' can live.
+  private static final int BASE_TEAM_EXPIRATION_TIME_IN_MINS = 20;
+  private static final int TEAM_EXPIRATION_TIME_IN_MINS = 20;
+
+
+  /**
+   * A class for holding team info: the name of the team, when it started,
+   * and the current team members. Teams may but need not include one robot team member.
+   */
+  private static class TeamInfo {
+    String teamName;
+    long startTimeInMillis;
+    int expirationPeriod;
+    // The team might but need not include 1 robot. Will be non-null if so.
+    String robot;
+    int numMembers;
+
+    private TeamInfo(String teamName, long startTimeInMillis, String robot) {
+      this.teamName = teamName;
+      this.startTimeInMillis = startTimeInMillis;
+      // How long until this team is dissolved.
+      this.expirationPeriod = random.nextInt(TEAM_EXPIRATION_TIME_IN_MINS) +
+        BASE_TEAM_EXPIRATION_TIME_IN_MINS;
+      this.robot = robot;
+      // Determine the number of team members.
+      numMembers = random.nextInt(MEMBERS_PER_TEAM) + BASE_MEMBERS_PER_TEAM;
+    }
+
+    String getTeamName() {
+      return teamName;
+    }
+    String getRobot() {
+      return robot;
+    }
+
+    long getStartTimeInMillis() {
+      return startTimeInMillis;
+    }
+    long getEndTimeInMillis() {
+      return startTimeInMillis + (expirationPeriod * 60 * 1000);
+    }
+    String getRandomUser() {
+      int userNum = random.nextInt(numMembers);
+      return "user" + userNum + "_" + teamName;
+    }
+
+    int numMembers() {
+      return numMembers;
+    }
+
+    @Override
+    public String toString() {
+      return "(" + teamName + ", num members: " + numMembers() + ", starting at: "
+        + startTimeInMillis + ", expires in: " + expirationPeriod + ", robot: " + robot + ")";
+    }
+  }
+
+  /** Utility to grab a random element from an array of Strings. */
+  private static String randomElement(ArrayList<String> list) {
+    int index = random.nextInt(list.size());
+    return list.get(index);
+  }
+
+  /**
+   * Get and return a random team. If the selected team is too old w.r.t its expiration, remove
+   * it, replacing it with a new team.
+   */
+  private static TeamInfo randomTeam(ArrayList<TeamInfo> list) {
+    int index = random.nextInt(list.size());
+    TeamInfo team = list.get(index);
+    // If the selected team is expired, remove it and return a new team.
+    long currTime = System.currentTimeMillis();
+    if ((team.getEndTimeInMillis() < currTime) || team.numMembers() == 0) {
+      System.out.println("\nteam " + team + " is too old; replacing.");
+      System.out.println("start time: " + team.getStartTimeInMillis() +
+        ", end time: " + team.getEndTimeInMillis() +
+        ", current time:" + currTime);
+      removeTeam(index);
+      // Add a new team in its stead.
+      return (addLiveTeam());
+    } else {
+      return team;
+    }
+  }
+
+  /**
+   * Create and add a team. Possibly add a robot to the team.
+   */
+  private static synchronized TeamInfo addLiveTeam() {
+    String teamName = randomElement(COLORS) + randomElement(ANIMALS);
+    String robot = null;
+    // Decide if we want to add a robot to the team.
+    if (random.nextInt(ROBOT_PROBABILITY) == 0) {
+      robot = "Robot-" + random.nextInt(NUM_ROBOTS);
+    }
+    // Create the new team.
+    TeamInfo newTeam = new TeamInfo(teamName, System.currentTimeMillis(), robot);
+    liveTeams.add(newTeam);
+    System.out.println("[+" + newTeam + "]");
+    return newTeam;
+  }
+
+  /**
+   * Remove a specific team.
+   */
+  private static synchronized void removeTeam(int teamIndex) {
+    TeamInfo removedTeam = liveTeams.remove(teamIndex);
+    System.out.println("[-" + removedTeam + "]");
+  }
+
+  /** Generate a user gaming event. */
+  private static String generateEvent(Long currTime, int delayInMillis) {
+    TeamInfo team = randomTeam(liveTeams);
+    String teamName = team.getTeamName();
+    String user;
+    final int parseErrorRate = 900000;
+
+    String robot = team.getRobot();
+    // If the team has an associated robot team member...
+    if (robot != null) {
+      // Then use that robot for the message with some probability.
+      // Set this probability to higher than that used to select any of the 'regular' team
+      // members, so that if there is a robot on the team, it has a higher click rate.
+      if (random.nextInt(team.numMembers() / 2) == 0) {
+        user = robot;
+      } else {
+        user = team.getRandomUser();
+      }
+    } else { // No robot.
+      user = team.getRandomUser();
+    }
+    String event = user + "," + teamName + "," + random.nextInt(MAX_SCORE);
+    // Randomly introduce occasional parse errors. You can see a custom counter tracking the number
+    // of such errors in the Dataflow Monitoring UI, as the example pipeline runs.
+    if (random.nextInt(parseErrorRate) == 0) {
+      System.out.println("Introducing a parse error.");
+      event = "THIS LINE REPRESENTS CORRUPT DATA AND WILL CAUSE A PARSE ERROR";
+    }
+    return addTimeInfoToEvent(event, currTime, delayInMillis);
+  }
+
+  /**
+   * Add time info to a generated gaming event.
+   */
+  private static String addTimeInfoToEvent(String message, Long currTime, int delayInMillis) {
+    String eventTimeString =
+        Long.toString((currTime - delayInMillis) / 1000 * 1000);
+    // Add a (redundant) 'human-readable' date string to make the data semantics more clear.
+    String dateString = fmt.print(currTime);
+    message = message + "," + eventTimeString + "," + dateString;
+    return message;
+  }
+
+  /**
+   * Publish 'numMessages' arbitrary events from live users with the provided delay, to a
+   * PubSub topic.
+   */
+  public static void publishData(int numMessages, int delayInMillis)
+      throws IOException {
+    List<PubsubMessage> pubsubMessages = new ArrayList<>();
+
+    for (int i = 0; i < Math.max(1, numMessages); i++) {
+      Long currTime = System.currentTimeMillis();
+      String message = generateEvent(currTime, delayInMillis);
+      PubsubMessage pubsubMessage = new PubsubMessage()
+              .encodeData(message.getBytes("UTF-8"));
+      pubsubMessage.setAttributes(
+          ImmutableMap.of(TIMESTAMP_ATTRIBUTE,
+              Long.toString((currTime - delayInMillis) / 1000 * 1000)));
+      if (delayInMillis != 0) {
+        System.out.println(pubsubMessage.getAttributes());
+        System.out.println("late data for: " + message);
+      }
+      pubsubMessages.add(pubsubMessage);
+    }
+
+    PublishRequest publishRequest = new PublishRequest();
+    publishRequest.setMessages(pubsubMessages);
+    pubsub.projects().topics().publish(topic, publishRequest).execute();
+  }
+
+  /**
+   * Publish generated events to a file.
+   */
+  public static void publishDataToFile(String fileName, int numMessages, int delayInMillis)
+      throws IOException {
+    PrintWriter out = new PrintWriter(new OutputStreamWriter(
+        new BufferedOutputStream(new FileOutputStream(fileName, true)), "UTF-8"));
+
+    try {
+      for (int i = 0; i < Math.max(1, numMessages); i++) {
+        Long currTime = System.currentTimeMillis();
+        String message = generateEvent(currTime, delayInMillis);
+        out.println(message);
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+    } finally {
+      if (out != null) {
+        out.flush();
+        out.close();
+      }
+    }
+  }
+
+
+  public static void main(String[] args) throws IOException, InterruptedException {
+    if (args.length < 3) {
+      System.out.println("Usage: Injector project-name (topic-name|none) (filename|none)");
+      System.exit(1);
+    }
+    boolean writeToFile = false;
+    boolean writeToPubsub = true;
+    project = args[0];
+    String topicName = args[1];
+    String fileName = args[2];
+    // The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
+    // specified; otherwise, it will try to write to a file.
+    if (topicName.equalsIgnoreCase("none")) {
+      writeToFile = true;
+      writeToPubsub = false;
+    }
+    if (writeToPubsub) {
+      // Create the PubSub client.
+      pubsub = InjectorUtils.getClient();
+      // Create the PubSub topic as necessary.
+      topic = InjectorUtils.getFullyQualifiedTopicName(project, topicName);
+      InjectorUtils.createTopic(pubsub, topic);
+      System.out.println("Injecting to topic: " + topic);
+    } else {
+      if (fileName.equalsIgnoreCase("none")) {
+        System.out.println("Filename not specified.");
+        System.exit(1);
+      }
+      System.out.println("Writing to file: " + fileName);
+    }
+    System.out.println("Starting Injector");
+
+    // Start off with some random live teams.
+    while (liveTeams.size() < NUM_LIVE_TEAMS) {
+      addLiveTeam();
+    }
+
+    // Publish messages at a rate determined by the QPS and Thread sleep settings.
+    for (int i = 0; true; i++) {
+      if (Thread.activeCount() > 10) {
+        System.err.println("I'm falling behind!");
+      }
+
+      // Decide if this should be a batch of late data.
+      final int numMessages;
+      final int delayInMillis;
+      if (i % LATE_DATA_RATE == 0) {
+        // Insert delayed data for one user (one message only)
+        delayInMillis = BASE_DELAY_IN_MILLIS + random.nextInt(FUZZY_DELAY_IN_MILLIS);
+        numMessages = 1;
+        System.out.println("DELAY(" + delayInMillis + ", " + numMessages + ")");
+      } else {
+        System.out.print(".");
+        delayInMillis = 0;
+        numMessages = MIN_QPS + random.nextInt(QPS_RANGE);
+      }
+
+      if (writeToFile) { // Won't use threading for the file write.
+        publishDataToFile(fileName, numMessages, delayInMillis);
+      } else { // Write to PubSub.
+        // Start a thread to inject some data.
+        new Thread(){
+          @Override
+          public void run() {
+            try {
+              publishData(numMessages, delayInMillis);
+            } catch (IOException e) {
+              System.err.println(e);
+            }
+          }
+        }.start();
+      }
+
+      // Wait before creating another injector thread.
+      Thread.sleep(THREAD_SLEEP_MS);
+    }
+  }
+}

[12/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java
deleted file mode 100644
index 6da673d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/FinishedTriggersSet.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.common.collect.Sets;
-
-import java.util.Set;
-
-/**
- * An implementation of {@link FinishedTriggers} atop a user-provided mutable {@link Set}.
- */
-public class FinishedTriggersSet implements FinishedTriggers {
-
-  private final Set<ExecutableTrigger<?>> finishedTriggers;
-
-  private FinishedTriggersSet(Set<ExecutableTrigger<?>> finishedTriggers) {
-    this.finishedTriggers = finishedTriggers;
-  }
-
-  public static FinishedTriggersSet fromSet(Set<ExecutableTrigger<?>> finishedTriggers) {
-    return new FinishedTriggersSet(finishedTriggers);
-  }
-
-  /**
-   * Returns a mutable {@link Set} of the underlying triggers that are finished.
-   */
-  public Set<ExecutableTrigger<?>> getFinishedTriggers() {
-    return finishedTriggers;
-  }
-
-  @Override
-  public boolean isFinished(ExecutableTrigger<?> trigger) {
-    return finishedTriggers.contains(trigger);
-  }
-
-  @Override
-  public void setFinished(ExecutableTrigger<?> trigger, boolean value) {
-    if (value) {
-      finishedTriggers.add(trigger);
-    } else {
-      finishedTriggers.remove(trigger);
-    }
-  }
-
-  @Override
-  public void clearRecursively(ExecutableTrigger<?> trigger) {
-    finishedTriggers.remove(trigger);
-    for (ExecutableTrigger<?> subTrigger : trigger.subTriggers()) {
-      clearRecursively(subTrigger);
-    }
-  }
-
-  @Override
-  public FinishedTriggersSet copy() {
-    return fromSet(Sets.newHashSet(finishedTriggers));
-  }
-
-}
-
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
deleted file mode 100644
index 8b6f495..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcpCredentialFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.io.IOException;
-import java.security.GeneralSecurityException;
-
-/**
- * Construct an oauth credential to be used by the SDK and the SDK workers.
- * Returns a GCP credential.
- */
-public class GcpCredentialFactory implements CredentialFactory {
-  private GcpOptions options;
-
-  private GcpCredentialFactory(GcpOptions options) {
-    this.options = options;
-  }
-
-  public static GcpCredentialFactory fromOptions(PipelineOptions options) {
-    return new GcpCredentialFactory(options.as(GcpOptions.class));
-  }
-
-  @Override
-  public Credential getCredential()  throws IOException, GeneralSecurityException {
-    return Credentials.getCredential(options);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
deleted file mode 100644
index ce933f5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsIOChannelFactory.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-
-import java.io.IOException;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.List;
-
-/**
- * Implements IOChannelFactory for GCS.
- */
-public class GcsIOChannelFactory implements IOChannelFactory {
-
-  private final GcsOptions options;
-
-  public GcsIOChannelFactory(GcsOptions options) {
-    this.options = options;
-  }
-
-  @Override
-  public Collection<String> match(String spec) throws IOException {
-    GcsPath path = GcsPath.fromUri(spec);
-    GcsUtil util = options.getGcsUtil();
-    List<GcsPath> matched = util.expand(path);
-
-    List<String> specs = new LinkedList<>();
-    for (GcsPath match : matched) {
-      specs.add(match.toString());
-    }
-
-    return specs;
-  }
-
-  @Override
-  public ReadableByteChannel open(String spec) throws IOException {
-    GcsPath path = GcsPath.fromUri(spec);
-    GcsUtil util = options.getGcsUtil();
-    return util.open(path);
-  }
-
-  @Override
-  public WritableByteChannel create(String spec, String mimeType)
-      throws IOException {
-    GcsPath path = GcsPath.fromUri(spec);
-    GcsUtil util = options.getGcsUtil();
-    return util.create(path, mimeType);
-  }
-
-  @Override
-  public long getSizeBytes(String spec) throws IOException {
-    GcsPath path = GcsPath.fromUri(spec);
-    GcsUtil util = options.getGcsUtil();
-    return util.fileSize(path);
-  }
-
-  @Override
-  public boolean isReadSeekEfficient(String spec) throws IOException {
-    // TODO It is incorrect to return true here for files with content encoding set to gzip.
-    return true;
-  }
-
-  @Override
-  public String resolve(String path, String other) throws IOException {
-    return GcsPath.fromUri(path).resolve(other).toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
deleted file mode 100644
index 4219bc4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsStager.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.common.base.Preconditions;
-
-import java.util.List;
-
-/**
- * Utility class for staging files to GCS.
- */
-public class GcsStager implements Stager {
-  private DataflowPipelineOptions options;
-
-  private GcsStager(DataflowPipelineOptions options) {
-    this.options = options;
-  }
-
-  public static GcsStager fromOptions(PipelineOptions options) {
-    return new GcsStager(options.as(DataflowPipelineOptions.class));
-  }
-
-  @Override
-  public List<DataflowPackage> stageFiles() {
-    Preconditions.checkNotNull(options.getStagingLocation());
-    List<String> filesToStage = options.getFilesToStage();
-    String windmillBinary =
-        options.as(DataflowPipelineDebugOptions.class).getOverrideWindmillBinary();
-    if (windmillBinary != null) {
-      filesToStage.add("windmill_main=" + windmillBinary);
-    }
-    return PackageUtil.stageClasspathElements(
-        options.getFilesToStage(), options.getStagingLocation());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
deleted file mode 100644
index 8fd258f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java
+++ /dev/null
@@ -1,406 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.storage.Storage;
-import com.google.api.services.storage.model.Objects;
-import com.google.api.services.storage.model.StorageObject;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel;
-import com.google.cloud.hadoop.gcsio.GoogleCloudStorageWriteChannel;
-import com.google.cloud.hadoop.gcsio.ObjectWriteConditions;
-import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.cloud.hadoop.util.AsyncWriteChannelOptions;
-import com.google.cloud.hadoop.util.ClientRequestHelper;
-import com.google.cloud.hadoop.util.ResilientOperation;
-import com.google.cloud.hadoop.util.RetryDeterminer;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.nio.channels.SeekableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * Provides operations on GCS.
- */
-public class GcsUtil {
-  /**
-   * This is a {@link DefaultValueFactory} able to create a {@link GcsUtil} using
-   * any transport flags specified on the {@link PipelineOptions}.
-   */
-  public static class GcsUtilFactory implements DefaultValueFactory<GcsUtil> {
-    /**
-     * Returns an instance of {@link GcsUtil} based on the
-     * {@link PipelineOptions}.
-     *
-     * <p>If no instance has previously been created, one is created and the value
-     * stored in {@code options}.
-     */
-    @Override
-    public GcsUtil create(PipelineOptions options) {
-      LOG.debug("Creating new GcsUtil");
-      GcsOptions gcsOptions = options.as(GcsOptions.class);
-
-      return new GcsUtil(Transport.newStorageClient(gcsOptions).build(),
-          gcsOptions.getExecutorService(), gcsOptions.getGcsUploadBufferSizeBytes());
-    }
-  }
-
-  private static final Logger LOG = LoggerFactory.getLogger(GcsUtil.class);
-
-  /** Maximum number of items to retrieve per Objects.List request. */
-  private static final long MAX_LIST_ITEMS_PER_CALL = 1024;
-
-  /** Matches a glob containing a wildcard, capturing the portion before the first wildcard. */
-  private static final Pattern GLOB_PREFIX = Pattern.compile("(?<PREFIX>[^\\[*?]*)[\\[*?].*");
-
-  private static final String RECURSIVE_WILDCARD = "[*]{2}";
-
-  /**
-   * A {@link Pattern} for globs with a recursive wildcard.
-   */
-  private static final Pattern RECURSIVE_GCS_PATTERN =
-      Pattern.compile(".*" + RECURSIVE_WILDCARD + ".*");
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /** Client for the GCS API. */
-  private Storage storageClient;
-  /** Buffer size for GCS uploads (in bytes). */
-  @Nullable private final Integer uploadBufferSizeBytes;
-
-  // Helper delegate for turning IOExceptions from API calls into higher-level semantics.
-  private final ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-
-  // Exposed for testing.
-  final ExecutorService executorService;
-
-  /**
-   * Returns true if the given GCS pattern is supported otherwise fails with an
-   * exception.
-   */
-  public boolean isGcsPatternSupported(String gcsPattern) {
-    if (RECURSIVE_GCS_PATTERN.matcher(gcsPattern).matches()) {
-      throw new IllegalArgumentException("Unsupported wildcard usage in \"" + gcsPattern + "\": "
-          + " recursive wildcards are not supported.");
-    }
-
-    return true;
-  }
-
-  private GcsUtil(
-      Storage storageClient, ExecutorService executorService,
-      @Nullable Integer uploadBufferSizeBytes) {
-    this.storageClient = storageClient;
-    this.uploadBufferSizeBytes = uploadBufferSizeBytes;
-    this.executorService = executorService;
-  }
-
-  // Use this only for testing purposes.
-  protected void setStorageClient(Storage storageClient) {
-    this.storageClient = storageClient;
-  }
-
-  /**
-   * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded
-   * in the result. For patterns that only match a single object, we ensure that the object
-   * exists.
-   */
-  public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
-    Preconditions.checkArgument(isGcsPatternSupported(gcsPattern.getObject()));
-    Matcher m = GLOB_PREFIX.matcher(gcsPattern.getObject());
-    Pattern p = null;
-    String prefix = null;
-    if (!m.matches()) {
-      // Not a glob.
-      Storage.Objects.Get getObject = storageClient.objects().get(
-          gcsPattern.getBucket(), gcsPattern.getObject());
-      try {
-        // Use a get request to fetch the metadata of the object,
-        // the request has strong global consistency.
-        ResilientOperation.retry(
-            ResilientOperation.getGoogleRequestCallable(getObject),
-            new AttemptBoundedExponentialBackOff(3, 200),
-            RetryDeterminer.SOCKET_ERRORS,
-            IOException.class);
-        return ImmutableList.of(gcsPattern);
-      } catch (IOException | InterruptedException e) {
-        if (e instanceof IOException && errorExtractor.itemNotFound((IOException) e)) {
-          // If the path was not found, return an empty list.
-          return ImmutableList.of();
-        }
-        throw new IOException("Unable to match files for pattern " + gcsPattern, e);
-      }
-    } else {
-      // Part before the first wildcard character.
-      prefix = m.group("PREFIX");
-      p = Pattern.compile(globToRegexp(gcsPattern.getObject()));
-    }
-
-    LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(),
-        prefix, p.toString());
-
-    // List all objects that start with the prefix (including objects in sub-directories).
-    Storage.Objects.List listObject = storageClient.objects().list(gcsPattern.getBucket());
-    listObject.setMaxResults(MAX_LIST_ITEMS_PER_CALL);
-    listObject.setPrefix(prefix);
-
-    String pageToken = null;
-    List<GcsPath> results = new LinkedList<>();
-    do {
-      if (pageToken != null) {
-        listObject.setPageToken(pageToken);
-      }
-
-      Objects objects;
-      try {
-        objects = ResilientOperation.retry(
-            ResilientOperation.getGoogleRequestCallable(listObject),
-            new AttemptBoundedExponentialBackOff(3, 200),
-            RetryDeterminer.SOCKET_ERRORS,
-            IOException.class);
-      } catch (Exception e) {
-        throw new IOException("Unable to match files in bucket " + gcsPattern.getBucket()
-            +  ", prefix " + prefix + " against pattern " + p.toString(), e);
-      }
-      //Objects objects = listObject.execute();
-      Preconditions.checkNotNull(objects);
-
-      if (objects.getItems() == null) {
-        break;
-      }
-
-      // Filter objects based on the regex.
-      for (StorageObject o : objects.getItems()) {
-        String name = o.getName();
-        // Skip directories, which end with a slash.
-        if (p.matcher(name).matches() && !name.endsWith("/")) {
-          LOG.debug("Matched object: {}", name);
-          results.add(GcsPath.fromObject(o));
-        }
-      }
-
-      pageToken = objects.getNextPageToken();
-    } while (pageToken != null);
-
-    return results;
-  }
-
-  @VisibleForTesting
-  @Nullable
-  Integer getUploadBufferSizeBytes() {
-    return uploadBufferSizeBytes;
-  }
-
-  /**
-   * Returns the file size from GCS or throws {@link FileNotFoundException}
-   * if the resource does not exist.
-   */
-  public long fileSize(GcsPath path) throws IOException {
-    return fileSize(path, new AttemptBoundedExponentialBackOff(4, 200), Sleeper.DEFAULT);
-  }
-
-  /**
-   * Returns the file size from GCS or throws {@link FileNotFoundException}
-   * if the resource does not exist.
-   */
-  @VisibleForTesting
-  long fileSize(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException {
-      Storage.Objects.Get getObject =
-          storageClient.objects().get(path.getBucket(), path.getObject());
-      try {
-        StorageObject object = ResilientOperation.retry(
-            ResilientOperation.getGoogleRequestCallable(getObject),
-            backoff,
-            RetryDeterminer.SOCKET_ERRORS,
-            IOException.class,
-            sleeper);
-        return object.getSize().longValue();
-      } catch (Exception e) {
-        if (e instanceof IOException && errorExtractor.itemNotFound((IOException) e)) {
-          throw new FileNotFoundException(path.toString());
-        }
-        throw new IOException("Unable to get file size", e);
-     }
-  }
-
-  /**
-   * Opens an object in GCS.
-   *
-   * <p>Returns a SeekableByteChannel that provides access to data in the bucket.
-   *
-   * @param path the GCS filename to read from
-   * @return a SeekableByteChannel that can read the object data
-   * @throws IOException
-   */
-  public SeekableByteChannel open(GcsPath path)
-      throws IOException {
-    return new GoogleCloudStorageReadChannel(storageClient, path.getBucket(),
-            path.getObject(), errorExtractor,
-            new ClientRequestHelper<StorageObject>());
-  }
-
-  /**
-   * Creates an object in GCS.
-   *
-   * <p>Returns a WritableByteChannel that can be used to write data to the
-   * object.
-   *
-   * @param path the GCS file to write to
-   * @param type the type of object, eg "text/plain".
-   * @return a Callable object that encloses the operation.
-   * @throws IOException
-   */
-  public WritableByteChannel create(GcsPath path,
-      String type) throws IOException {
-    GoogleCloudStorageWriteChannel channel = new GoogleCloudStorageWriteChannel(
-        executorService,
-        storageClient,
-        new ClientRequestHelper<StorageObject>(),
-        path.getBucket(),
-        path.getObject(),
-        AsyncWriteChannelOptions.newBuilder().build(),
-        new ObjectWriteConditions(),
-        Collections.<String, String>emptyMap(),
-        type);
-    if (uploadBufferSizeBytes != null) {
-      channel.setUploadBufferSize(uploadBufferSizeBytes);
-    }
-    channel.initialize();
-    return channel;
-  }
-
-  /**
-   * Returns whether the GCS bucket exists. If the bucket exists, it must
-   * be accessible otherwise the permissions exception will be propagated.
-   */
-  public boolean bucketExists(GcsPath path) throws IOException {
-    return bucketExists(path, new AttemptBoundedExponentialBackOff(4, 200), Sleeper.DEFAULT);
-  }
-
-  /**
-   * Returns whether the GCS bucket exists. This will return false if the bucket
-   * is inaccessible due to permissions.
-   */
-  @VisibleForTesting
-  boolean bucketExists(GcsPath path, BackOff backoff, Sleeper sleeper) throws IOException {
-    Storage.Buckets.Get getBucket =
-        storageClient.buckets().get(path.getBucket());
-
-      try {
-        ResilientOperation.retry(
-            ResilientOperation.getGoogleRequestCallable(getBucket),
-            backoff,
-            new RetryDeterminer<IOException>() {
-              @Override
-              public boolean shouldRetry(IOException e) {
-                if (errorExtractor.itemNotFound(e) || errorExtractor.accessDenied(e)) {
-                  return false;
-                }
-                return RetryDeterminer.SOCKET_ERRORS.shouldRetry(e);
-              }
-            },
-            IOException.class,
-            sleeper);
-        return true;
-      } catch (GoogleJsonResponseException e) {
-        if (errorExtractor.itemNotFound(e) || errorExtractor.accessDenied(e)) {
-          return false;
-        }
-        throw e;
-      } catch (InterruptedException e) {
-        throw new IOException(
-            String.format("Error while attempting to verify existence of bucket gs://%s",
-                path.getBucket()), e);
-     }
-  }
-
-  /**
-   * Expands glob expressions to regular expressions.
-   *
-   * @param globExp the glob expression to expand
-   * @return a string with the regular expression this glob expands to
-   */
-  static String globToRegexp(String globExp) {
-    StringBuilder dst = new StringBuilder();
-    char[] src = globExp.toCharArray();
-    int i = 0;
-    while (i < src.length) {
-      char c = src[i++];
-      switch (c) {
-        case '*':
-          dst.append("[^/]*");
-          break;
-        case '?':
-          dst.append("[^/]");
-          break;
-        case '.':
-        case '+':
-        case '{':
-        case '}':
-        case '(':
-        case ')':
-        case '|':
-        case '^':
-        case '$':
-          // These need to be escaped in regular expressions
-          dst.append('\\').append(c);
-          break;
-        case '\\':
-          i = doubleSlashes(dst, src, i);
-          break;
-        default:
-          dst.append(c);
-          break;
-      }
-    }
-    return dst.toString();
-  }
-
-  private static int doubleSlashes(StringBuilder dst, char[] src, int i) {
-    // Emit the next character without special interpretation
-    dst.append('\\');
-    if ((i - 1) != src.length) {
-      dst.append(src[i]);
-      i++;
-    } else {
-      // A backslash at the very end is treated like an escaped backslash
-      dst.append('\\');
-    }
-    return i;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
deleted file mode 100644
index 89a4fcb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowViaWindowSetDoFn.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner.ReduceFnExecutor;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-/**
- * A general {@link GroupAlsoByWindowsDoFn}. This delegates all of the logic to the
- * {@link ReduceFnRunner}.
- */
-@SystemDoFnInternal
-public class GroupAlsoByWindowViaWindowSetDoFn<
-        K, InputT, OutputT, W extends BoundedWindow, RinT extends KeyedWorkItem<K, InputT>>
-    extends DoFn<RinT, KV<K, OutputT>> implements ReduceFnExecutor<K, InputT, OutputT, W> {
-
-  public static <K, InputT, OutputT, W extends BoundedWindow>
-      DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
-          WindowingStrategy<?, W> strategy, SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
-    return new GroupAlsoByWindowViaWindowSetDoFn<>(strategy, reduceFn);
-  }
-
-  protected final Aggregator<Long, Long> droppedDueToClosedWindow =
-      createAggregator(
-          GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
-  protected final Aggregator<Long, Long> droppedDueToLateness =
-      createAggregator(GroupAlsoByWindowsDoFn.DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
-
-  private final WindowingStrategy<Object, W> windowingStrategy;
-  private SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn;
-
-  private GroupAlsoByWindowViaWindowSetDoFn(
-      WindowingStrategy<?, W> windowingStrategy,
-      SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
-    @SuppressWarnings("unchecked")
-    WindowingStrategy<Object, W> noWildcard = (WindowingStrategy<Object, W>) windowingStrategy;
-    this.windowingStrategy = noWildcard;
-    this.reduceFn = reduceFn;
-  }
-
-  @Override
-  public void processElement(ProcessContext c) throws Exception {
-    KeyedWorkItem<K, InputT> element = c.element();
-
-    K key = c.element().key();
-    TimerInternals timerInternals = c.windowingInternals().timerInternals();
-
-    // It is the responsibility of the user of GroupAlsoByWindowsViaWindowSet to only
-    // provide a WindowingInternals instance with the appropriate key type for StateInternals.
-    @SuppressWarnings("unchecked")
-    StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals();
-
-    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner =
-        new ReduceFnRunner<>(
-            key,
-            windowingStrategy,
-            stateInternals,
-            timerInternals,
-            c.windowingInternals(),
-            droppedDueToClosedWindow,
-            reduceFn,
-            c.getPipelineOptions());
-
-    reduceFnRunner.processElements(element.elementsIterable());
-    for (TimerData timer : element.timersIterable()) {
-      reduceFnRunner.onTimer(timer);
-    }
-    reduceFnRunner.persist();
-  }
-
-  @Override
-  public DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asDoFn() {
-    // Safe contravariant cast
-    @SuppressWarnings("unchecked")
-    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> asFn =
-        (DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>>) this;
-    return asFn;
-  }
-
-  @Override
-  public Aggregator<Long, Long> getDroppedDueToLatenessAggregator() {
-    return droppedDueToLateness;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
deleted file mode 100644
index 175921d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsDoFn.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-/**
- * DoFn that merges windows and groups elements in those windows, optionally
- * combining values.
- *
- * @param <K> key type
- * @param <InputT> input value element type
- * @param <OutputT> output value element type
- * @param <W> window type
- */
-@SystemDoFnInternal
-public abstract class GroupAlsoByWindowsDoFn<K, InputT, OutputT, W extends BoundedWindow>
-    extends DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> {
-  public static final String DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER = "DroppedDueToClosedWindow";
-  public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "DroppedDueToLateness";
-
-  protected final Aggregator<Long, Long> droppedDueToClosedWindow =
-      createAggregator(DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER, new Sum.SumLongFn());
-  protected final Aggregator<Long, Long> droppedDueToLateness =
-      createAggregator(DROPPED_DUE_TO_LATENESS_COUNTER, new Sum.SumLongFn());
-
-  /**
-   * Create the default {@link GroupAlsoByWindowsDoFn}, which uses window sets to implement the
-   * grouping.
-   *
-   * @param windowingStrategy The window function and trigger to use for grouping
-   * @param inputCoder the input coder to use
-   */
-  public static <K, V, W extends BoundedWindow> GroupAlsoByWindowsDoFn<K, V, Iterable<V>, W>
-      createDefault(WindowingStrategy<?, W> windowingStrategy, Coder<V> inputCoder) {
-    return new GroupAlsoByWindowsViaOutputBufferDoFn<>(
-        windowingStrategy, SystemReduceFn.<K, V, W>buffering(inputCoder));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
deleted file mode 100644
index d394e81..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GroupAlsoByWindowsViaOutputBufferDoFn.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.List;
-
-/**
- * The default batch {@link GroupAlsoByWindowsDoFn} implementation, if no specialized "fast path"
- * implementation is applicable.
- */
-@SystemDoFnInternal
-public class GroupAlsoByWindowsViaOutputBufferDoFn<K, InputT, OutputT, W extends BoundedWindow>
-   extends GroupAlsoByWindowsDoFn<K, InputT, OutputT, W> {
-
-  private final WindowingStrategy<?, W> strategy;
-  private SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn;
-
-  public GroupAlsoByWindowsViaOutputBufferDoFn(
-      WindowingStrategy<?, W> windowingStrategy,
-      SystemReduceFn<K, InputT, ?, OutputT, W> reduceFn) {
-    this.strategy = windowingStrategy;
-    this.reduceFn = reduceFn;
-  }
-
-  @Override
-  public void processElement(
-      DoFn<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>>.ProcessContext c)
-          throws Exception {
-    K key = c.element().getKey();
-    // Used with Batch, we know that all the data is available for this key. We can't use the
-    // timer manager from the context because it doesn't exist. So we create one and emulate the
-    // watermark, knowing that we have all data and it is in timestamp order.
-    BatchTimerInternals timerInternals = new BatchTimerInternals(Instant.now());
-
-    // It is the responsibility of the user of GroupAlsoByWindowsViaOutputBufferDoFn to only
-    // provide a WindowingInternals instance with the appropriate key type for StateInternals.
-    @SuppressWarnings("unchecked")
-    StateInternals<K> stateInternals = (StateInternals<K>) c.windowingInternals().stateInternals();
-
-    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner =
-        new ReduceFnRunner<K, InputT, OutputT, W>(
-            key,
-            strategy,
-            stateInternals,
-            timerInternals,
-            c.windowingInternals(),
-            droppedDueToClosedWindow,
-            reduceFn,
-            c.getPipelineOptions());
-
-    Iterable<List<WindowedValue<InputT>>> chunks =
-        Iterables.partition(c.element().getValue(), 1000);
-    for (Iterable<WindowedValue<InputT>> chunk : chunks) {
-      // Process the chunk of elements.
-      reduceFnRunner.processElements(chunk);
-
-      // Then, since elements are sorted by their timestamp, advance the input watermark
-      // to the first element, and fire any timers that may have been scheduled.
-      timerInternals.advanceInputWatermark(reduceFnRunner, chunk.iterator().next().getTimestamp());
-
-      // Fire any processing timers that need to fire
-      timerInternals.advanceProcessingTime(reduceFnRunner, Instant.now());
-
-      // Leave the output watermark undefined. Since there's no late data in batch mode
-      // there's really no need to track it as we do for streaming.
-    }
-
-    // Finish any pending windows by advancing the input watermark to infinity.
-    timerInternals.advanceInputWatermark(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE);
-
-    // Finally, advance the processing time to infinity to fire any timers.
-    timerInternals.advanceProcessingTime(reduceFnRunner, BoundedWindow.TIMESTAMP_MAX_VALUE);
-
-    reduceFnRunner.persist();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
deleted file mode 100644
index f7d0b9a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelFactory.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.util.Collection;
-
-/**
- * Defines a factory for working with read and write channels.
- *
- * <p>Channels provide an abstract API for IO operations.
- *
- * <p>See <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/channels/package-summary.html"
- * >Java NIO Channels</a>
- */
-public interface IOChannelFactory {
-
-  /**
-   * Matches a specification, which may contain globs, against available
-   * resources.
-   *
-   * <p>Glob handling is dependent on the implementation.  Implementations should
-   * all support globs in the final component of a path (eg /foo/bar/*.txt),
-   * however they are not required to support globs in the directory paths.
-   *
-   * <p>The list of resources returned are required to exist and not represent abstract
-   * resources such as symlinks and directories.
-   */
-  Collection<String> match(String spec) throws IOException;
-
-  /**
-   * Returns a read channel for the given specification.
-   *
-   * <p>The specification is not expanded; it is used verbatim.
-   *
-   * <p>If seeking is supported, then this returns a
-   * {@link java.nio.channels.SeekableByteChannel}.
-   */
-  ReadableByteChannel open(String spec) throws IOException;
-
-  /**
-   * Returns a write channel for the given specification.
-   *
-   * <p>The specification is not expanded; is it used verbatim.
-   */
-  WritableByteChannel create(String spec, String mimeType) throws IOException;
-
-  /**
-   * Returns the size in bytes for the given specification.
-   *
-   * <p>The specification is not expanded; it is used verbatim.
-   *
-   * <p>{@link FileNotFoundException} will be thrown if the resource does not exist.
-   */
-  long getSizeBytes(String spec) throws IOException;
-
-  /**
-   * Returns {@code true} if the channel created when invoking method {@link #open} for the given
-   * file specification is guaranteed to be of type {@link java.nio.channels.SeekableByteChannel
-   * SeekableByteChannel} and if seeking into positions of the channel is recommended. Returns
-   * {@code false} if the channel returned is not a {@code SeekableByteChannel}. May return
-   * {@code false} even if the channel returned is a {@code SeekableByteChannel}, if seeking is not
-   * efficient for the given file specification.
-   *
-   * <p>Only efficiently seekable files can be split into offset ranges.
-   *
-   * <p>The specification is not expanded; it is used verbatim.
-   */
-  boolean isReadSeekEfficient(String spec) throws IOException;
-
-  /**
-   * Resolve the given {@code other} against the {@code path}.
-   *
-   * <p>If the {@code other} parameter is an absolute path then this method trivially returns
-   * other. If {@code other} is an empty path then this method trivially returns the given
-   * {@code path}. Otherwise this method considers the given {@code path} to be a directory and
-   * resolves the {@code other} path against this path. In the simplest case, the {@code other}
-   * path does not have a root component, in which case this method joins the {@code other} path
-   * to the given {@code path} and returns a resulting path that ends with the {@code other} path.
-   * Where the {@code other} path has a root component then resolution is highly implementation
-   * dependent and therefore unspecified.
-   */
-  public String resolve(String path, String other) throws IOException;
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
deleted file mode 100644
index cbf420e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IOChannelUtils.java
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.nio.channels.WritableByteChannel;
-import java.text.DecimalFormat;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Provides utilities for creating read and write channels.
- */
-public class IOChannelUtils {
-  // TODO: add registration mechanism for adding new schemas.
-  private static final Map<String, IOChannelFactory> FACTORY_MAP =
-      Collections.synchronizedMap(new HashMap<String, IOChannelFactory>());
-
-  // Pattern that matches shard placeholders within a shard template.
-  private static final Pattern SHARD_FORMAT_RE = Pattern.compile("(S+|N+)");
-
-  /**
-   * Associates a scheme with an {@link IOChannelFactory}.
-   *
-   * <p>The given factory is used to construct read and write channels when
-   * a URI is provided with the given scheme.
-   *
-   * <p>For example, when reading from "gs://bucket/path", the scheme "gs" is
-   * used to lookup the appropriate factory.
-   */
-  public static void setIOFactory(String scheme, IOChannelFactory factory) {
-    FACTORY_MAP.put(scheme, factory);
-  }
-
-  /**
-   * Registers standard factories globally. This requires {@link PipelineOptions}
-   * to provide, e.g., credentials for GCS.
-   */
-  public static void registerStandardIOFactories(PipelineOptions options) {
-    setIOFactory("gs", new GcsIOChannelFactory(options.as(GcsOptions.class)));
-  }
-
-  /**
-   * Creates a write channel for the given filename.
-   */
-  public static WritableByteChannel create(String filename, String mimeType)
-      throws IOException {
-    return getFactory(filename).create(filename, mimeType);
-  }
-
-  /**
-   * Creates a write channel for the given file components.
-   *
-   * <p>If numShards is specified, then a ShardingWritableByteChannel is
-   * returned.
-   *
-   * <p>Shard numbers are 0 based, meaning they start with 0 and end at the
-   * number of shards - 1.
-   */
-  public static WritableByteChannel create(String prefix, String shardTemplate,
-      String suffix, int numShards, String mimeType) throws IOException {
-    if (numShards == 1) {
-      return create(constructName(prefix, shardTemplate, suffix, 0, 1),
-                    mimeType);
-    }
-
-    // It is the callers responsibility to close this channel.
-    @SuppressWarnings("resource")
-    ShardingWritableByteChannel shardingChannel =
-        new ShardingWritableByteChannel();
-
-    Set<String> outputNames = new HashSet<>();
-    for (int i = 0; i < numShards; i++) {
-      String outputName =
-          constructName(prefix, shardTemplate, suffix, i, numShards);
-      if (!outputNames.add(outputName)) {
-        throw new IllegalArgumentException(
-            "Shard name collision detected for: " + outputName);
-      }
-      WritableByteChannel channel = create(outputName, mimeType);
-      shardingChannel.addChannel(channel);
-    }
-
-    return shardingChannel;
-  }
-
-  /**
-   * Returns the size in bytes for the given specification.
-   *
-   * <p>The specification is not expanded; it is used verbatim.
-   *
-   * <p>{@link FileNotFoundException} will be thrown if the resource does not exist.
-   */
-  public static long getSizeBytes(String spec) throws IOException {
-    return getFactory(spec).getSizeBytes(spec);
-  }
-
-  /**
-   * Constructs a fully qualified name from components.
-   *
-   * <p>The name is built from a prefix, shard template (with shard numbers
-   * applied), and a suffix.  All components are required, but may be empty
-   * strings.
-   *
-   * <p>Within a shard template, repeating sequences of the letters "S" or "N"
-   * are replaced with the shard number, or number of shards respectively.  The
-   * numbers are formatted with leading zeros to match the length of the
-   * repeated sequence of letters.
-   *
-   * <p>For example, if prefix = "output", shardTemplate = "-SSS-of-NNN", and
-   * suffix = ".txt", with shardNum = 1 and numShards = 100, the following is
-   * produced:  "output-001-of-100.txt".
-   */
-  public static String constructName(String prefix,
-      String shardTemplate, String suffix, int shardNum, int numShards) {
-    // Matcher API works with StringBuffer, rather than StringBuilder.
-    StringBuffer sb = new StringBuffer();
-    sb.append(prefix);
-
-    Matcher m = SHARD_FORMAT_RE.matcher(shardTemplate);
-    while (m.find()) {
-      boolean isShardNum = (m.group(1).charAt(0) == 'S');
-
-      char[] zeros = new char[m.end() - m.start()];
-      Arrays.fill(zeros, '0');
-      DecimalFormat df = new DecimalFormat(String.valueOf(zeros));
-      String formatted = df.format(isShardNum
-                                   ? shardNum
-                                   : numShards);
-      m.appendReplacement(sb, formatted);
-    }
-    m.appendTail(sb);
-
-    sb.append(suffix);
-    return sb.toString();
-  }
-
-  private static final Pattern URI_SCHEME_PATTERN = Pattern.compile(
-      "(?<scheme>[a-zA-Z][-a-zA-Z0-9+.]*)://.*");
-
-  /**
-   * Returns the IOChannelFactory associated with an input specification.
-   */
-  public static IOChannelFactory getFactory(String spec) throws IOException {
-    // The spec is almost, but not quite, a URI. In particular,
-    // the reserved characters '[', ']', and '?' have meanings that differ
-    // from their use in the URI spec. ('*' is not reserved).
-    // Here, we just need the scheme, which is so circumscribed as to be
-    // very easy to extract with a regex.
-    Matcher matcher = URI_SCHEME_PATTERN.matcher(spec);
-
-    if (!matcher.matches()) {
-      return new FileIOChannelFactory();
-    }
-
-    String scheme = matcher.group("scheme");
-    IOChannelFactory ioFactory = FACTORY_MAP.get(scheme);
-    if (ioFactory != null) {
-      return ioFactory;
-    }
-
-    throw new IOException("Unable to find handler for " + spec);
-  }
-
-  /**
-   * Resolve the given {@code other} against the {@code path}.
-   *
-   * <p>If the {@code other} parameter is an absolute path then this method trivially returns
-   * other. If {@code other} is an empty path then this method trivially returns the given
-   * {@code path}. Otherwise this method considers the given {@code path} to be a directory and
-   * resolves the {@code other} path against this path. In the simplest case, the {@code other}
-   * path does not have a root component, in which case this method joins the {@code other} path
-   * to the given {@code path} and returns a resulting path that ends with the {@code other} path.
-   * Where the {@code other} path has a root component then resolution is highly implementation
-   * dependent and therefore unspecified.
-   */
-  public static String resolve(String path, String other) throws IOException {
-    return getFactory(path).resolve(path, other);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
deleted file mode 100644
index dbe249e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IllegalMutationException.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * Thrown when a value appears to have been mutated, but that mutation is forbidden.
- */
-public class IllegalMutationException extends RuntimeException {
-  private Object savedValue;
-  private Object newValue;
-
-  public IllegalMutationException(String message, Object savedValue, Object newValue) {
-    super(message);
-    this.savedValue = savedValue;
-    this.newValue = newValue;
-  }
-
-  public IllegalMutationException(
-      String message, Object savedValue, Object newValue, Throwable cause) {
-    super(message, cause);
-    this.savedValue = savedValue;
-    this.newValue = newValue;
-  }
-
-  /**
-   * The original value, before the illegal mutation.
-   */
-  public Object getSavedValue() {
-    return savedValue;
-  }
-
-  /**
-   * The value after the illegal mutation.
-   */
-  public Object getNewValue() {
-    return newValue;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
deleted file mode 100644
index 99442d0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/InstanceBuilder.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.util.LinkedList;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * Utility for creating objects dynamically.
- *
- * @param <T> type type of object returned by this instance builder
- */
-public class InstanceBuilder<T> {
-
-  /**
-   * Create an InstanceBuilder for the given type.
-   *
-   * <p>The specified type is the type returned by {@link #build}, which is
-   * typically the common base type or interface of the instance being
-   * constructed.
-   */
-  public static <T> InstanceBuilder<T> ofType(Class<T> type) {
-    return new InstanceBuilder<>(type);
-  }
-
-  /**
-   * Create an InstanceBuilder for the given type.
-   *
-   * <p>The specified type is the type returned by {@link #build}, which is
-   * typically the common base type or interface for the instance to be
-   * constructed.
-   *
-   * <p>The TypeDescriptor argument allows specification of generic types.  For example,
-   * a {@code List<String>} return type can be specified as
-   * {@code ofType(new TypeDescriptor<List<String>>(){})}.
-   */
-  public static <T> InstanceBuilder<T> ofType(TypeDescriptor<T> token) {
-    @SuppressWarnings("unchecked")
-    Class<T> type = (Class<T>) token.getRawType();
-    return new InstanceBuilder<>(type);
-  }
-
-  /**
-   * Sets the class name to be constructed.
-   *
-   * <p>If the name is a simple name (ie {@link Class#getSimpleName()}), then
-   * the package of the return type is added as a prefix.
-   *
-   * <p>The default class is the return type, specified in {@link #ofType}.
-   *
-   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
-   *
-   * @throws ClassNotFoundException if no class can be found by the given name
-   */
-  public InstanceBuilder<T> fromClassName(String name)
-      throws ClassNotFoundException {
-    Preconditions.checkArgument(factoryClass == null,
-        "Class name may only be specified once");
-    if (name.indexOf('.') == -1) {
-      name = type.getPackage().getName() + "." + name;
-    }
-
-    try {
-      factoryClass = Class.forName(name);
-    } catch (ClassNotFoundException e) {
-      throw new ClassNotFoundException(
-          String.format("Could not find class: %s", name), e);
-    }
-    return this;
-  }
-
-  /**
-   * Sets the factory class to use for instance construction.
-   *
-   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
-   */
-  public InstanceBuilder<T> fromClass(Class<?> factoryClass) {
-    this.factoryClass = factoryClass;
-    return this;
-  }
-
-  /**
-   * Sets the name of the factory method used to construct the instance.
-   *
-   * <p>The default, if no factory method was specified, is to look for a class
-   * constructor.
-   *
-   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
-   */
-  public InstanceBuilder<T> fromFactoryMethod(String methodName) {
-    Preconditions.checkArgument(this.methodName == null,
-        "Factory method name may only be specified once");
-    this.methodName = methodName;
-    return this;
-  }
-
-  /**
-   * Adds an argument to be passed to the factory method.
-   *
-   * <p>The argument type is used to lookup the factory method. This type may be
-   * a supertype of the argument value's class.
-   *
-   * <p>Modifies and returns the {@code InstanceBuilder} for chaining.
-   *
-   * @param <ArgT> the argument type
-   */
-  public <ArgT> InstanceBuilder<T> withArg(Class<? super ArgT> argType, ArgT value) {
-    parameterTypes.add(argType);
-    arguments.add(value);
-    return this;
-  }
-
-  /**
-   * Creates the instance by calling the factory method with the given
-   * arguments.
-   *
-   * <h3>Defaults</h3>
-   * <ul>
-   *   <li>factory class: defaults to the output type class, overridden
-   *   via {@link #fromClassName(String)}.
-   *   <li>factory method: defaults to using a constructor on the factory
-   *   class, overridden via {@link #fromFactoryMethod(String)}.
-   * </ul>
-   *
-   * @throws RuntimeException if the method does not exist, on type mismatch,
-   * or if the method cannot be made accessible.
-   */
-  public T build() {
-    if (factoryClass == null) {
-      factoryClass = type;
-    }
-
-    Class<?>[] types = parameterTypes
-        .toArray(new Class<?>[parameterTypes.size()]);
-
-    // TODO: cache results, to speed repeated type lookups?
-    if (methodName != null) {
-      return buildFromMethod(types);
-    } else {
-      return buildFromConstructor(types);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Type of object to construct.
-   */
-  private final Class<T> type;
-
-  /**
-   * Types of parameters for Method lookup.
-   *
-   * @see Class#getDeclaredMethod(String, Class[])
-   */
-  private final List<Class<?>> parameterTypes = new LinkedList<>();
-
-  /**
-   * Arguments to factory method {@link Method#invoke(Object, Object...)}.
-   */
-  private final List<Object> arguments = new LinkedList<>();
-
-  /**
-   * Name of factory method, or null to invoke the constructor.
-   */
-  @Nullable private String methodName;
-
-  /**
-   * Factory class, or null to instantiate {@code type}.
-   */
-  @Nullable private Class<?> factoryClass;
-
-  private InstanceBuilder(Class<T> type) {
-    this.type = type;
-  }
-
-  private T buildFromMethod(Class<?>[] types) {
-    Preconditions.checkState(factoryClass != null);
-    Preconditions.checkState(methodName != null);
-
-    try {
-      Method method = factoryClass.getDeclaredMethod(methodName, types);
-
-      Preconditions.checkState(Modifier.isStatic(method.getModifiers()),
-          "Factory method must be a static method for "
-              + factoryClass.getName() + "#" + method.getName()
-      );
-
-      Preconditions.checkState(type.isAssignableFrom(method.getReturnType()),
-          "Return type for " + factoryClass.getName() + "#" + method.getName()
-              + " must be assignable to " + type.getSimpleName());
-
-      if (!method.isAccessible()) {
-        method.setAccessible(true);
-      }
-
-      Object[] args = arguments.toArray(new Object[arguments.size()]);
-      return type.cast(method.invoke(null, args));
-
-    } catch (NoSuchMethodException e) {
-      throw new RuntimeException(
-          String.format("Unable to find factory method %s#%s(%s)",
-              factoryClass.getSimpleName(),
-              methodName,
-              Joiner.on(", ").join(types)));
-
-    } catch (IllegalAccessException | InvocationTargetException e) {
-      throw new RuntimeException(
-          String.format("Failed to construct instance from factory method %s#%s(%s)",
-              factoryClass.getSimpleName(),
-              methodName,
-              Joiner.on(", ").join(types)),
-          e);
-    }
-  }
-
-  private T buildFromConstructor(Class<?>[] types) {
-    Preconditions.checkState(factoryClass != null);
-
-    try {
-      Constructor<?> constructor = factoryClass.getDeclaredConstructor(types);
-
-      Preconditions.checkState(type.isAssignableFrom(factoryClass),
-          "Instance type " + factoryClass.getName()
-              + " must be assignable to " + type.getSimpleName());
-
-      if (!constructor.isAccessible()) {
-        constructor.setAccessible(true);
-      }
-
-      Object[] args = arguments.toArray(new Object[arguments.size()]);
-      return type.cast(constructor.newInstance(args));
-
-    } catch (NoSuchMethodException e) {
-      throw new RuntimeException("Unable to find constructor for "
-          + factoryClass.getName());
-
-    } catch (InvocationTargetException |
-        InstantiationException |
-        IllegalAccessException e) {
-      throw new RuntimeException("Failed to construct instance from "
-          + "constructor " + factoryClass.getName(), e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
deleted file mode 100644
index 4406ee5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/IntervalBoundedExponentialBackOff.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.BackOff;
-import com.google.common.base.Preconditions;
-
-/**
- * Implementation of {@link BackOff} that increases the back off period for each retry attempt
- * using a randomization function that grows exponentially.
- *
- * <p>Example: The initial interval is .5 seconds and the maximum interval is 60 secs.
- * For 14 tries the sequence will be (values in seconds):
- *
- * <pre>
- * retry#      retry_interval     randomized_interval
- * 1             0.5                [0.25,   0.75]
- * 2             0.75               [0.375,  1.125]
- * 3             1.125              [0.562,  1.687]
- * 4             1.687              [0.8435, 2.53]
- * 5             2.53               [1.265,  3.795]
- * 6             3.795              [1.897,  5.692]
- * 7             5.692              [2.846,  8.538]
- * 8             8.538              [4.269, 12.807]
- * 9            12.807              [6.403, 19.210]
- * 10           28.832              [14.416, 43.248]
- * 11           43.248              [21.624, 64.873]
- * 12           60.0                [30.0, 90.0]
- * 13           60.0                [30.0, 90.0]
- * 14           60.0                [30.0, 90.0]
- * </pre>
- *
- * <p>Implementation is not thread-safe.
- */
-public class IntervalBoundedExponentialBackOff implements BackOff {
-  public static final double DEFAULT_MULTIPLIER = 1.5;
-  public static final double DEFAULT_RANDOMIZATION_FACTOR = 0.5;
-  private final long maximumIntervalMillis;
-  private final long initialIntervalMillis;
-  private int currentAttempt;
-
-  public IntervalBoundedExponentialBackOff(int maximumIntervalMillis, long initialIntervalMillis) {
-    Preconditions.checkArgument(
-        maximumIntervalMillis > 0, "Maximum interval must be greater than zero.");
-    Preconditions.checkArgument(
-        initialIntervalMillis > 0, "Initial interval must be greater than zero.");
-    this.maximumIntervalMillis = maximumIntervalMillis;
-    this.initialIntervalMillis = initialIntervalMillis;
-    reset();
-  }
-
-  @Override
-  public void reset() {
-    currentAttempt = 1;
-  }
-
-  @Override
-  public long nextBackOffMillis() {
-    double currentIntervalMillis =
-        Math.min(
-            initialIntervalMillis * Math.pow(DEFAULT_MULTIPLIER, currentAttempt - 1),
-            maximumIntervalMillis);
-    double randomOffset =
-        (Math.random() * 2 - 1) * DEFAULT_RANDOMIZATION_FACTOR * currentIntervalMillis;
-    currentAttempt += 1;
-    return Math.round(currentIntervalMillis + randomOffset);
-  }
-
-  public boolean atMaxInterval() {
-    return initialIntervalMillis * Math.pow(DEFAULT_MULTIPLIER, currentAttempt - 1)
-        >= maximumIntervalMillis;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItem.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItem.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItem.java
deleted file mode 100644
index 355f0bb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItem.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-
-/**
- * Interface that contains all the timers and elements associated with a specific work item.
- *
- * @param <K> the key type
- * @param <ElemT> the element type
- */
-public interface KeyedWorkItem<K, ElemT> {
-  /**
-   * Returns the key.
-   */
-  K key();
-
-  /**
-   * Returns an iterable containing the timers.
-   */
-  Iterable<TimerData> timersIterable();
-
-  /**
-   * Returns an iterable containing the elements.
-   */
-  Iterable<WindowedValue<ElemT>> elementsIterable();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java
deleted file mode 100644
index 398e82a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItemCoder.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerDataCoder;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.common.collect.ImmutableList;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.List;
-
-/**
- * A {@link Coder} for {@link KeyedWorkItem KeyedWorkItems}.
- */
-public class KeyedWorkItemCoder<K, ElemT> extends StandardCoder<KeyedWorkItem<K, ElemT>> {
-  /**
-   * Create a new {@link KeyedWorkItemCoder} with the provided key coder, element coder, and window
-   * coder.
-   */
-  public static <K, ElemT> KeyedWorkItemCoder<K, ElemT> of(
-      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
-    return new KeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
-  }
-
-  @JsonCreator
-  public static <K, ElemT> KeyedWorkItemCoder<K, ElemT> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
-    checkArgument(components.size() == 3, "Expecting 3 components, got %s", components.size());
-    @SuppressWarnings("unchecked")
-    Coder<K> keyCoder = (Coder<K>) components.get(0);
-    @SuppressWarnings("unchecked")
-    Coder<ElemT> elemCoder = (Coder<ElemT>) components.get(1);
-    @SuppressWarnings("unchecked")
-    Coder<? extends BoundedWindow> windowCoder = (Coder<? extends BoundedWindow>) components.get(2);
-    return new KeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
-  }
-
-  private final Coder<K> keyCoder;
-  private final Coder<ElemT> elemCoder;
-  private final Coder<? extends BoundedWindow> windowCoder;
-  private final Coder<Iterable<TimerData>> timersCoder;
-  private final Coder<Iterable<WindowedValue<ElemT>>> elemsCoder;
-
-  private KeyedWorkItemCoder(
-      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
-    this.keyCoder = keyCoder;
-    this.elemCoder = elemCoder;
-    this.windowCoder = windowCoder;
-    this.timersCoder = IterableCoder.of(TimerDataCoder.of(windowCoder));
-    this.elemsCoder = IterableCoder.of(FullWindowedValueCoder.of(elemCoder, windowCoder));
-  }
-
-  @Override
-  public void encode(KeyedWorkItem<K, ElemT> value, OutputStream outStream, Coder.Context context)
-      throws CoderException, IOException {
-    Coder.Context nestedContext = context.nested();
-    keyCoder.encode(value.key(), outStream, nestedContext);
-    timersCoder.encode(value.timersIterable(), outStream, nestedContext);
-    elemsCoder.encode(value.elementsIterable(), outStream, nestedContext);
-  }
-
-  @Override
-  public KeyedWorkItem<K, ElemT> decode(InputStream inStream, Coder.Context context)
-      throws CoderException, IOException {
-    Coder.Context nestedContext = context.nested();
-    K key = keyCoder.decode(inStream, nestedContext);
-    Iterable<TimerData> timers = timersCoder.decode(inStream, nestedContext);
-    Iterable<WindowedValue<ElemT>> elems = elemsCoder.decode(inStream, nestedContext);
-    return KeyedWorkItems.workItem(key, timers, elems);
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return ImmutableList.of(keyCoder, elemCoder, windowCoder);
-  }
-
-  @Override
-  public void verifyDeterministic() throws Coder.NonDeterministicException {
-    keyCoder.verifyDeterministic();
-    timersCoder.verifyDeterministic();
-    elemsCoder.verifyDeterministic();
-  }
-
-  /**
-   * {@inheritDoc}.
-   *
-   * {@link KeyedWorkItemCoder} is not consistent with equals as it can return a
-   * {@link KeyedWorkItem} of a type different from the originally encoded type.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return false;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
deleted file mode 100644
index 734bd2c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/KeyedWorkItems.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.Iterables;
-
-import java.util.Collections;
-import java.util.Objects;
-
-/**
- * Static utility methods that provide {@link KeyedWorkItem} implementations.
- */
-public class KeyedWorkItems {
-  /**
-   * Returns an implementation of {@link KeyedWorkItem} that wraps around an elements iterable.
-   *
-   * @param <K> the key type
-   * @param <ElemT> the element type
-   */
-  public static <K, ElemT> KeyedWorkItem<K, ElemT> elementsWorkItem(
-      K key, Iterable<WindowedValue<ElemT>> elementsIterable) {
-    return new ComposedKeyedWorkItem<>(key, Collections.<TimerData>emptyList(), elementsIterable);
-  }
-
-  /**
-   * Returns an implementation of {@link KeyedWorkItem} that wraps around an timers iterable.
-   *
-   * @param <K> the key type
-   * @param <ElemT> the element type
-   */
-  public static <K, ElemT> KeyedWorkItem<K, ElemT> timersWorkItem(
-      K key, Iterable<TimerData> timersIterable) {
-    return new ComposedKeyedWorkItem<>(
-        key, timersIterable, Collections.<WindowedValue<ElemT>>emptyList());
-  }
-
-  /**
-   * Returns an implementation of {@link KeyedWorkItem} that wraps around
-   * an timers iterable and an elements iterable.
-   *
-   * @param <K> the key type
-   * @param <ElemT> the element type
-   */
-  public static <K, ElemT> KeyedWorkItem<K, ElemT> workItem(
-      K key, Iterable<TimerData> timersIterable, Iterable<WindowedValue<ElemT>> elementsIterable) {
-    return new ComposedKeyedWorkItem<>(key, timersIterable, elementsIterable);
-  }
-
-  /**
-   * A {@link KeyedWorkItem} composed of an underlying key, {@link TimerData} iterable, and element
-   * iterable.
-   */
-  public static class ComposedKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
-    private final K key;
-    private final Iterable<TimerData> timers;
-    private final Iterable<WindowedValue<ElemT>> elements;
-
-    private ComposedKeyedWorkItem(
-        K key, Iterable<TimerData> timers, Iterable<WindowedValue<ElemT>> elements) {
-      this.key = key;
-      this.timers = timers;
-      this.elements = elements;
-    }
-
-    @Override
-    public K key() {
-      return key;
-    }
-
-    @Override
-    public Iterable<TimerData> timersIterable() {
-      return timers;
-    }
-
-    @Override
-    public Iterable<WindowedValue<ElemT>> elementsIterable() {
-      return elements;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == null || !(other instanceof ComposedKeyedWorkItem)) {
-        return false;
-      }
-      KeyedWorkItem<?, ?> that = (KeyedWorkItem<?, ?>) other;
-      return Objects.equals(this.key, that.key())
-          && Iterables.elementsEqual(this.timersIterable(), that.timersIterable())
-          && Iterables.elementsEqual(this.elementsIterable(), that.elementsIterable());
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(key, timers, elements);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(ComposedKeyedWorkItem.class)
-          .add("key", key)
-          .add("elements", elements)
-          .add("timers", timers)
-          .toString();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
deleted file mode 100644
index 31927ab..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/LateDataDroppingDoFnRunner.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.base.Predicate;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-/**
- * A customized {@link DoFnRunner} that handles late data dropping for
- * a {@link KeyedWorkItem} input {@link DoFn}.
- *
- * <p>It expands windows before checking data lateness.
- *
- * <p>{@link KeyedWorkItem KeyedWorkItems} are always in empty windows.
- *
- * @param <K> key type
- * @param <InputT> input value element type
- * @param <OutputT> output value element type
- * @param <W> window type
- */
-public class LateDataDroppingDoFnRunner<K, InputT, OutputT, W extends BoundedWindow>
-    implements DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> {
-  private final DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner;
-  private final LateDataFilter lateDataFilter;
-
-  public LateDataDroppingDoFnRunner(
-      DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner,
-      WindowingStrategy<?, ?> windowingStrategy,
-      TimerInternals timerInternals,
-      Aggregator<Long, Long> droppedDueToLateness) {
-    this.doFnRunner = doFnRunner;
-    lateDataFilter = new LateDataFilter(windowingStrategy, timerInternals, droppedDueToLateness);
-  }
-
-  @Override
-  public void startBundle() {
-    doFnRunner.startBundle();
-  }
-
-  @Override
-  public void processElement(WindowedValue<KeyedWorkItem<K, InputT>> elem) {
-    Iterable<WindowedValue<InputT>> nonLateElements = lateDataFilter.filter(
-        elem.getValue().key(), elem.getValue().elementsIterable());
-    KeyedWorkItem<K, InputT> keyedWorkItem = KeyedWorkItems.workItem(
-        elem.getValue().key(), elem.getValue().timersIterable(), nonLateElements);
-    doFnRunner.processElement(elem.withValue(keyedWorkItem));
-  }
-
-  @Override
-  public void finishBundle() {
-    doFnRunner.finishBundle();
-  }
-
-  /**
-   * It filters late data in a {@link KeyedWorkItem}.
-   */
-  @VisibleForTesting
-  static class LateDataFilter {
-    private final WindowingStrategy<?, ?> windowingStrategy;
-    private final TimerInternals timerInternals;
-    private final Aggregator<Long, Long> droppedDueToLateness;
-
-    public LateDataFilter(
-        WindowingStrategy<?, ?> windowingStrategy,
-        TimerInternals timerInternals,
-        Aggregator<Long, Long> droppedDueToLateness) {
-      this.windowingStrategy = windowingStrategy;
-      this.timerInternals = timerInternals;
-      this.droppedDueToLateness = droppedDueToLateness;
-    }
-
-    /**
-     * Returns an {@code Iterable<WindowedValue<InputT>>} that only contains
-     * non-late input elements.
-     */
-    public <K, InputT> Iterable<WindowedValue<InputT>> filter(
-        final K key, Iterable<WindowedValue<InputT>> elements) {
-      Iterable<Iterable<WindowedValue<InputT>>> windowsExpandedElements = Iterables.transform(
-          elements,
-          new Function<WindowedValue<InputT>, Iterable<WindowedValue<InputT>>>() {
-            @Override
-            public Iterable<WindowedValue<InputT>> apply(final WindowedValue<InputT> input) {
-              return Iterables.transform(
-                  input.getWindows(),
-                  new Function<BoundedWindow, WindowedValue<InputT>>() {
-                    @Override
-                    public WindowedValue<InputT> apply(BoundedWindow window) {
-                      return WindowedValue.of(
-                          input.getValue(), input.getTimestamp(), window, input.getPane());
-                    }
-                  });
-            }});
-
-      Iterable<WindowedValue<InputT>> nonLateElements = Iterables.filter(
-          Iterables.concat(windowsExpandedElements),
-          new Predicate<WindowedValue<InputT>>() {
-            @Override
-            public boolean apply(WindowedValue<InputT> input) {
-              BoundedWindow window = Iterables.getOnlyElement(input.getWindows());
-              if (canDropDueToExpiredWindow(window)) {
-                // The element is too late for this window.
-                droppedDueToLateness.addValue(1L);
-                WindowTracing.debug(
-                    "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
-                    + "since too far behind inputWatermark:{}; outputWatermark:{}",
-                    input.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
-                    timerInternals.currentOutputWatermarkTime());
-                return false;
-              } else {
-                return true;
-              }
-            }
-          });
-      return nonLateElements;
-    }
-
-    /** Is {@code window} expired w.r.t. the garbage collection watermark? */
-    private boolean canDropDueToExpiredWindow(BoundedWindow window) {
-      Instant inputWM = timerInternals.currentInputWatermarkTime();
-      return inputWM != null
-          && window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(inputWM);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MapAggregatorValues.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MapAggregatorValues.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MapAggregatorValues.java
deleted file mode 100644
index a4d8ffd..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/MapAggregatorValues.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.runners.AggregatorValues;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.common.base.MoreObjects;
-
-import java.util.Map;
-
-/**
- * An {@link AggregatorValues} implementation that is backed by an in-memory map.
- *
- * @param <T> the output type of the {@link Aggregator}
- */
-public class MapAggregatorValues<T> extends AggregatorValues<T> {
-  private final Map<String, T> stepValues;
-
-  public MapAggregatorValues(Map<String, T> stepValues) {
-    this.stepValues = stepValues;
-  }
-
-  @Override
-  public Map<String, T> getValuesAtSteps() {
-    return stepValues;
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(MapAggregatorValues.class)
-        .add("stepValues", stepValues)
-        .toString();
-  }
-}

[17/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
deleted file mode 100644
index fac2c28..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterWatermark.java
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Instant;
-
-import java.util.List;
-import java.util.Objects;
-
-/**
- * <p>{@code AfterWatermark} triggers fire based on progress of the system watermark. This time is a
- * lower-bound, sometimes heuristically established, on event times that have been fully processed
- * by the pipeline.
- *
- * <p>For sources that provide non-heuristic watermarks (e.g.
- * {@link com.google.cloud.dataflow.sdk.io.PubsubIO} when using arrival times as event times), the
- * watermark is a strict guarantee that no data with an event time earlier than
- * that watermark will ever be observed in the pipeline. In this case, it's safe to assume that any
- * pane triggered by an {@code AfterWatermark} trigger with a reference point at or beyond the end
- * of the window will be the last pane ever for that window.
- *
- * <p>For sources that provide heuristic watermarks (e.g.
- * {@link com.google.cloud.dataflow.sdk.io.PubsubIO} when using user-supplied event times), the
- * watermark itself becomes an <i>estimate</i> that no data with an event time earlier than that
- * watermark (i.e. "late data") will ever be observed in the pipeline. These heuristics can
- * often be quite accurate, but the chance of seeing late data for any given window is non-zero.
- * Thus, if absolute correctness over time is important to your use case, you may want to consider
- * using a trigger that accounts for late data. The default trigger,
- * {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}, which fires
- * once when the watermark passes the end of the window and then immediately therafter when any
- * late data arrives, is one such example.
- *
- * <p>The watermark is the clock that defines {@link TimeDomain#EVENT_TIME}.
- *
- * Additionaly firings before or after the watermark can be requested by calling
- * {@code AfterWatermark.pastEndOfWindow.withEarlyFirings(OnceTrigger)} or
- * {@code AfterWatermark.pastEndOfWindow.withEarlyFirings(OnceTrigger)}.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used.
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public class AfterWatermark<W extends BoundedWindow> {
-
-  // Static factory class.
-  private AfterWatermark() {}
-
-  /**
-   * Creates a trigger that fires when the watermark passes the end of the window.
-   */
-  public static <W extends BoundedWindow> FromEndOfWindow<W> pastEndOfWindow() {
-    return new FromEndOfWindow<W>();
-  }
-
-  /**
-   * Interface for building an AfterWatermarkTrigger with early firings already filled in.
-   */
-  public interface AfterWatermarkEarly<W extends BoundedWindow> extends TriggerBuilder<W> {
-    /**
-     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
-     * the given {@code Trigger} fires after the watermark has passed the end of the window.
-     */
-    TriggerBuilder<W> withLateFirings(OnceTrigger<W> lateTrigger);
-  }
-
-  /**
-   * Interface for building an AfterWatermarkTrigger with late firings already filled in.
-   */
-  public interface AfterWatermarkLate<W extends BoundedWindow> extends TriggerBuilder<W> {
-    /**
-     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
-     * the given {@code Trigger} fires before the watermark has passed the end of the window.
-     */
-    TriggerBuilder<W> withEarlyFirings(OnceTrigger<W> earlyTrigger);
-  }
-
-  /**
-   * A trigger which never fires. Used for the "early" trigger when only a late trigger was
-   * specified.
-   */
-  private static class NeverTrigger<W extends BoundedWindow> extends OnceTrigger<W> {
-
-    protected NeverTrigger() {
-      super(null);
-    }
-
-    @Override
-    public void onElement(OnElementContext c) throws Exception { }
-
-    @Override
-    public void onMerge(OnMergeContext c) throws Exception { }
-
-    @Override
-    protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-      return this;
-    }
-
-    @Override
-    public Instant getWatermarkThatGuaranteesFiring(W window) {
-      return BoundedWindow.TIMESTAMP_MAX_VALUE;
-    }
-
-    @Override
-    public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-      return false;
-    }
-
-    @Override
-    protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception {
-      throw new UnsupportedOperationException(
-          String.format("%s should never fire", getClass().getSimpleName()));
-    }
-  }
-
-  private static class AfterWatermarkEarlyAndLate<W extends BoundedWindow>
-      extends Trigger<W>
-      implements TriggerBuilder<W>, AfterWatermarkEarly<W>, AfterWatermarkLate<W> {
-
-    private static final int EARLY_INDEX = 0;
-    private static final int LATE_INDEX = 1;
-
-    private final OnceTrigger<W> earlyTrigger;
-    private final OnceTrigger<W> lateTrigger;
-
-    @SuppressWarnings("unchecked")
-    private AfterWatermarkEarlyAndLate(OnceTrigger<W> earlyTrigger, OnceTrigger<W> lateTrigger) {
-      super(lateTrigger == null
-          ? ImmutableList.<Trigger<W>>of(earlyTrigger)
-          : ImmutableList.<Trigger<W>>of(earlyTrigger, lateTrigger));
-      this.earlyTrigger = checkNotNull(earlyTrigger, "earlyTrigger should not be null");
-      this.lateTrigger = lateTrigger;
-    }
-
-    @Override
-    public TriggerBuilder<W> withEarlyFirings(OnceTrigger<W> earlyTrigger) {
-      return new AfterWatermarkEarlyAndLate<W>(earlyTrigger, lateTrigger);
-    }
-
-    @Override
-    public TriggerBuilder<W> withLateFirings(OnceTrigger<W> lateTrigger) {
-      return new AfterWatermarkEarlyAndLate<W>(earlyTrigger, lateTrigger);
-    }
-
-    @Override
-    public void onElement(OnElementContext c) throws Exception {
-      if (!c.trigger().isMerging()) {
-        // If merges can never happen, we just run the unfinished subtrigger
-        c.trigger().firstUnfinishedSubTrigger().invokeOnElement(c);
-      } else {
-        // If merges can happen, we run for all subtriggers because they might be
-        // de-activated or re-activated
-        for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-          subTrigger.invokeOnElement(c);
-        }
-      }
-    }
-
-    @Override
-    public void onMerge(OnMergeContext c) throws Exception {
-      // NOTE that the ReduceFnRunner will delete all end-of-window timers for the
-      // merged-away windows.
-
-      ExecutableTrigger<W> earlySubtrigger = c.trigger().subTrigger(EARLY_INDEX);
-      // We check the early trigger to determine if we are still processing it or
-      // if the end of window has transitioned us to the late trigger
-      OnMergeContext earlyContext = c.forTrigger(earlySubtrigger);
-
-      // If the early trigger is still active in any merging window then it is still active in
-      // the new merged window, because even if the merged window is "done" some pending elements
-      // haven't had a chance to fire.
-      if (!earlyContext.trigger().finishedInAllMergingWindows() || !endOfWindowReached(c)) {
-        earlyContext.trigger().setFinished(false);
-        if (lateTrigger != null) {
-          ExecutableTrigger<W> lateSubtrigger = c.trigger().subTrigger(LATE_INDEX);
-          OnMergeContext lateContext = c.forTrigger(lateSubtrigger);
-          lateContext.trigger().setFinished(false);
-          lateSubtrigger.invokeClear(lateContext);
-        }
-      } else {
-        // Otherwise the early trigger and end-of-window bit is done for good.
-        earlyContext.trigger().setFinished(true);
-        if (lateTrigger != null) {
-          c.trigger().subTrigger(LATE_INDEX).invokeOnMerge(c);
-        }
-      }
-    }
-
-    @Override
-    public Trigger<W> getContinuationTrigger() {
-      return new AfterWatermarkEarlyAndLate<W>(
-          earlyTrigger.getContinuationTrigger(),
-          lateTrigger == null ? null : lateTrigger.getContinuationTrigger());
-    }
-
-    @Override
-    protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-      throw new UnsupportedOperationException(
-          "Should not call getContinuationTrigger(List<Trigger<W>>)");
-    }
-
-    @Override
-    public Instant getWatermarkThatGuaranteesFiring(W window) {
-      // Even without an early or late trigger, we'll still produce a firing at the watermark.
-      return window.maxTimestamp();
-    }
-
-    private boolean endOfWindowReached(Trigger<W>.TriggerContext context) {
-      return context.currentEventTime() != null
-          && context.currentEventTime().isAfter(context.window().maxTimestamp());
-    }
-
-    @Override
-    public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-      if (!context.trigger().isFinished(EARLY_INDEX)) {
-        // We have not yet transitioned to late firings.
-        // We should fire if either the trigger is ready or we reach the end of the window.
-        return context.trigger().subTrigger(EARLY_INDEX).invokeShouldFire(context)
-            || endOfWindowReached(context);
-      } else if (lateTrigger == null) {
-        return false;
-      } else {
-        // We are running the late trigger
-        return context.trigger().subTrigger(LATE_INDEX).invokeShouldFire(context);
-      }
-    }
-
-    @Override
-    public void onFire(Trigger<W>.TriggerContext context) throws Exception {
-      if (!context.forTrigger(context.trigger().subTrigger(EARLY_INDEX)).trigger().isFinished()) {
-        onNonLateFiring(context);
-      } else if (lateTrigger != null) {
-        onLateFiring(context);
-      } else {
-        // all done
-        context.trigger().setFinished(true);
-      }
-    }
-
-    private void onNonLateFiring(Trigger<W>.TriggerContext context) throws Exception {
-      // We have not yet transitioned to late firings.
-      ExecutableTrigger<W> earlySubtrigger = context.trigger().subTrigger(EARLY_INDEX);
-      Trigger<W>.TriggerContext earlyContext = context.forTrigger(earlySubtrigger);
-
-      if (!endOfWindowReached(context)) {
-        // This is an early firing, since we have not arrived at the end of the window
-        // Implicitly repeats
-        earlySubtrigger.invokeOnFire(context);
-        earlySubtrigger.invokeClear(context);
-        earlyContext.trigger().setFinished(false);
-      } else {
-        // We have arrived at the end of the window; terminate the early trigger
-        // and clear out the late trigger's state
-        if (earlySubtrigger.invokeShouldFire(context)) {
-          earlySubtrigger.invokeOnFire(context);
-        }
-        earlyContext.trigger().setFinished(true);
-        earlySubtrigger.invokeClear(context);
-
-        if (lateTrigger == null) {
-          // Done if there is no late trigger.
-          context.trigger().setFinished(true);
-        } else {
-          // If there is a late trigger, we transition to it, and need to clear its state
-          // because it was run in parallel.
-          context.trigger().subTrigger(LATE_INDEX).invokeClear(context);
-        }
-      }
-
-    }
-
-    private void onLateFiring(Trigger<W>.TriggerContext context) throws Exception {
-      // We are firing the late trigger, with implicit repeat
-      ExecutableTrigger<W> lateSubtrigger = context.trigger().subTrigger(LATE_INDEX);
-      lateSubtrigger.invokeOnFire(context);
-      // It is a OnceTrigger, so it must have finished; unfinished it and clear it
-      lateSubtrigger.invokeClear(context);
-      context.forTrigger(lateSubtrigger).trigger().setFinished(false);
-    }
-  }
-
-  /**
-   * A watermark trigger targeted relative to the end of the window.
-   */
-  public static class FromEndOfWindow<W extends BoundedWindow> extends OnceTrigger<W> {
-
-    private FromEndOfWindow() {
-      super(null);
-    }
-
-    /**
-     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
-     * the given {@code Trigger} fires before the watermark has passed the end of the window.
-     */
-    public AfterWatermarkEarly<W> withEarlyFirings(OnceTrigger<W> earlyFirings) {
-      Preconditions.checkNotNull(earlyFirings,
-          "Must specify the trigger to use for early firings");
-      return new AfterWatermarkEarlyAndLate<W>(earlyFirings, null);
-    }
-
-    /**
-     * Creates a new {@code Trigger} like the this, except that it fires repeatedly whenever
-     * the given {@code Trigger} fires after the watermark has passed the end of the window.
-     */
-    public AfterWatermarkLate<W> withLateFirings(OnceTrigger<W> lateFirings) {
-      Preconditions.checkNotNull(lateFirings,
-          "Must specify the trigger to use for late firings");
-      return new AfterWatermarkEarlyAndLate<W>(new NeverTrigger<W>(), lateFirings);
-    }
-
-    @Override
-    public void onElement(OnElementContext c) throws Exception {
-      // We're interested in knowing when the input watermark passes the end of the window.
-      // (It is possible this has already happened, in which case the timer will be fired
-      // almost immediately).
-      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-    }
-
-    @Override
-    public void onMerge(OnMergeContext c) throws Exception {
-      // NOTE that the ReduceFnRunner will delete all end-of-window timers for the
-      // merged-away windows.
-
-      if (!c.trigger().finishedInAllMergingWindows()) {
-        // If the trigger is still active in any merging window then it is still active in the new
-        // merged window, because even if the merged window is "done" some pending elements haven't
-        // had a chance to fire
-        c.trigger().setFinished(false);
-      } else if (!endOfWindowReached(c)) {
-        // If the end of the new window has not been reached, then the trigger is active again.
-        c.trigger().setFinished(false);
-      } else {
-        // Otherwise it is done for good
-        c.trigger().setFinished(true);
-      }
-    }
-
-    @Override
-    public Instant getWatermarkThatGuaranteesFiring(W window) {
-      return window.maxTimestamp();
-    }
-
-    @Override
-    public FromEndOfWindow<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-      return this;
-    }
-
-    @Override
-    public String toString() {
-      return "AfterWatermark.pastEndOfWindow()";
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      return obj instanceof FromEndOfWindow;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass());
-    }
-
-    @Override
-    public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-      return endOfWindowReached(context);
-    }
-
-    private boolean endOfWindowReached(Trigger<W>.TriggerContext context) {
-      return context.currentEventTime() != null
-          && context.currentEventTime().isAfter(context.window().maxTimestamp());
-    }
-
-    @Override
-    protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception { }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
deleted file mode 100644
index 0afd8e3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/BoundedWindow.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import org.joda.time.Instant;
-
-import java.util.concurrent.TimeUnit;
-
-/**
- * A {@code BoundedWindow} represents a finite grouping of elements, with an
- * upper bound (larger timestamps represent more recent data) on the timestamps
- * of elements that can be placed in the window. This finiteness means that for
- * every window, at some point in time, all data for that window will have
- * arrived and can be processed together.
- *
- * <p>Windows must also implement {@link Object#equals} and
- * {@link Object#hashCode} such that windows that are logically equal will
- * be treated as equal by {@code equals()} and {@code hashCode()}.
- */
-public abstract class BoundedWindow {
-  // The min and max timestamps that won't overflow when they are converted to
-  // usec.
-  public static final Instant TIMESTAMP_MIN_VALUE =
-      new Instant(TimeUnit.MICROSECONDS.toMillis(Long.MIN_VALUE));
-  public static final Instant TIMESTAMP_MAX_VALUE =
-      new Instant(TimeUnit.MICROSECONDS.toMillis(Long.MAX_VALUE));
-
-  /**
-   * Returns the inclusive upper bound of timestamps for values in this window.
-   */
-  public abstract Instant maxTimestamp();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
deleted file mode 100644
index de5140f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/CalendarWindows.java
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
-import org.joda.time.Days;
-import org.joda.time.Instant;
-import org.joda.time.Months;
-import org.joda.time.Years;
-
-/**
- * A collection of {@link WindowFn}s that windows values into calendar-based
- * windows such as spans of days, months, or years.
- *
- * <p>For example, to group data into quarters that change on the 15th, use
- * {@code CalendarWindows.months(3).withStartingMonth(2014, 1).beginningOnDay(15)}.
- */
-public class CalendarWindows {
-
-  /**
-   * Returns a {@link WindowFn} that windows elements into periods measured by days.
-   *
-   * <p>For example, {@code CalendarWindows.days(1)} will window elements into
-   * separate windows for each day.
-   */
-  public static DaysWindows days(int number) {
-    return new DaysWindows(number, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
-  }
-
-  /**
-   * Returns a {@link WindowFn} that windows elements into periods measured by weeks.
-   *
-   * <p>For example, {@code CalendarWindows.weeks(1, DateTimeConstants.TUESDAY)} will
-   * window elements into week-long windows starting on Tuesdays.
-   */
-  public static DaysWindows weeks(int number, int startDayOfWeek) {
-    return new DaysWindows(
-        7 * number,
-        new DateTime(0, DateTimeZone.UTC).withDayOfWeek(startDayOfWeek),
-        DateTimeZone.UTC);
-  }
-
-  /**
-   * Returns a {@link WindowFn} that windows elements into periods measured by months.
-   *
-   * <p>For example,
-   * {@code CalendarWindows.months(8).withStartingMonth(2014, 1).beginningOnDay(10)}
-   * will window elements into 8 month windows where that start on the 10th day of month,
-   * and the first window begins in January 2014.
-   */
-  public static MonthsWindows months(int number) {
-    return new MonthsWindows(number, 1, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
-  }
-
-  /**
-   * Returns a {@link WindowFn} that windows elements into periods measured by years.
-   *
-   * <p>For example,
-   * {@code CalendarWindows.years(1).withTimeZone(DateTimeZone.forId("America/Los_Angeles"))}
-   * will window elements into year-long windows that start at midnight on Jan 1, in the
-   * America/Los_Angeles time zone.
-   */
-  public static YearsWindows years(int number) {
-    return new YearsWindows(number, 1, 1, new DateTime(0, DateTimeZone.UTC), DateTimeZone.UTC);
-  }
-
-  /**
-   * A {@link WindowFn} that windows elements into periods measured by days.
-   *
-   * <p>By default, periods of multiple days are measured starting at the
-   * epoch.  This can be overridden with {@link #withStartingDay}.
-   *
-   * <p>The time zone used to determine calendar boundaries is UTC, unless this
-   * is overridden with the {@link #withTimeZone} method.
-   */
-  public static class DaysWindows extends PartitioningWindowFn<Object, IntervalWindow> {
-    public DaysWindows withStartingDay(int year, int month, int day) {
-      return new DaysWindows(
-          number, new DateTime(year, month, day, 0, 0, timeZone), timeZone);
-    }
-
-    public DaysWindows withTimeZone(DateTimeZone timeZone) {
-      return new DaysWindows(
-          number, startDate.withZoneRetainFields(timeZone), timeZone);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    private int number;
-    private DateTime startDate;
-    private DateTimeZone timeZone;
-
-    private DaysWindows(int number, DateTime startDate, DateTimeZone timeZone) {
-      this.number = number;
-      this.startDate = startDate;
-      this.timeZone = timeZone;
-    }
-
-    @Override
-    public IntervalWindow assignWindow(Instant timestamp) {
-      DateTime datetime = new DateTime(timestamp, timeZone);
-
-      int dayOffset = Days.daysBetween(startDate, datetime).getDays() / number * number;
-
-      DateTime begin = startDate.plusDays(dayOffset);
-      DateTime end = begin.plusDays(number);
-
-      return new IntervalWindow(begin.toInstant(), end.toInstant());
-    }
-
-    @Override
-    public Coder<IntervalWindow> windowCoder() {
-      return IntervalWindow.getCoder();
-    }
-
-    @Override
-    public boolean isCompatible(WindowFn<?, ?> other) {
-      if (!(other instanceof DaysWindows)) {
-        return false;
-      }
-      DaysWindows that = (DaysWindows) other;
-      return number == that.number
-          && startDate == that.startDate
-          && timeZone == that.timeZone;
-    }
-
-    public int getNumber() {
-      return number;
-    }
-
-    public DateTime getStartDate() {
-      return startDate;
-    }
-
-    public DateTimeZone getTimeZone() {
-      return timeZone;
-    }
-
-  }
-
-  /**
-   * A {@link WindowFn} that windows elements into periods measured by months.
-   *
-   * <p>By default, periods of multiple months are measured starting at the
-   * epoch.  This can be overridden with {@link #withStartingMonth}.
-   *
-   * <p>Months start on the first day of each calendar month, unless overridden by
-   * {@link #beginningOnDay}.
-   *
-   * <p>The time zone used to determine calendar boundaries is UTC, unless this
-   * is overridden with the {@link #withTimeZone} method.
-   */
-  public static class MonthsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
-    public MonthsWindows beginningOnDay(int dayOfMonth) {
-      return new MonthsWindows(
-          number, dayOfMonth, startDate, timeZone);
-    }
-
-    public MonthsWindows withStartingMonth(int year, int month) {
-      return new MonthsWindows(
-          number, dayOfMonth, new DateTime(year, month, 1, 0, 0, timeZone), timeZone);
-    }
-
-    public MonthsWindows withTimeZone(DateTimeZone timeZone) {
-      return new MonthsWindows(
-          number, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    private int number;
-    private int dayOfMonth;
-    private DateTime startDate;
-    private DateTimeZone timeZone;
-
-    private MonthsWindows(int number, int dayOfMonth, DateTime startDate, DateTimeZone timeZone) {
-      this.number = number;
-      this.dayOfMonth = dayOfMonth;
-      this.startDate = startDate;
-      this.timeZone = timeZone;
-    }
-
-    @Override
-    public IntervalWindow assignWindow(Instant timestamp) {
-      DateTime datetime = new DateTime(timestamp, timeZone);
-
-      int monthOffset =
-          Months.monthsBetween(startDate.withDayOfMonth(dayOfMonth), datetime).getMonths()
-          / number * number;
-
-      DateTime begin = startDate.withDayOfMonth(dayOfMonth).plusMonths(monthOffset);
-      DateTime end = begin.plusMonths(number);
-
-      return new IntervalWindow(begin.toInstant(), end.toInstant());
-    }
-
-    @Override
-    public Coder<IntervalWindow> windowCoder() {
-      return IntervalWindow.getCoder();
-    }
-
-    @Override
-    public boolean isCompatible(WindowFn<?, ?> other) {
-      if (!(other instanceof MonthsWindows)) {
-        return false;
-      }
-      MonthsWindows that = (MonthsWindows) other;
-      return number == that.number
-          && dayOfMonth == that.dayOfMonth
-          && startDate == that.startDate
-          && timeZone == that.timeZone;
-    }
-
-    public int getNumber() {
-      return number;
-    }
-
-    public int getDayOfMonth() {
-      return dayOfMonth;
-    }
-
-    public DateTime getStartDate() {
-      return startDate;
-    }
-
-    public DateTimeZone getTimeZone() {
-      return timeZone;
-    }
-
-  }
-
-  /**
-   * A {@link WindowFn} that windows elements into periods measured by years.
-   *
-   * <p>By default, periods of multiple years are measured starting at the
-   * epoch.  This can be overridden with {@link #withStartingYear}.
-   *
-   * <p>Years start on the first day of each calendar year, unless overridden by
-   * {@link #beginningOnDay}.
-   *
-   * <p>The time zone used to determine calendar boundaries is UTC, unless this
-   * is overridden with the {@link #withTimeZone} method.
-   */
-  public static class YearsWindows extends PartitioningWindowFn<Object, IntervalWindow> {
-    public YearsWindows beginningOnDay(int monthOfYear, int dayOfMonth) {
-      return new YearsWindows(
-          number, monthOfYear, dayOfMonth, startDate, timeZone);
-    }
-
-    public YearsWindows withStartingYear(int year) {
-      return new YearsWindows(
-          number, monthOfYear, dayOfMonth, new DateTime(year, 1, 1, 0, 0, timeZone), timeZone);
-    }
-
-    public YearsWindows withTimeZone(DateTimeZone timeZone) {
-      return new YearsWindows(
-          number, monthOfYear, dayOfMonth, startDate.withZoneRetainFields(timeZone), timeZone);
-    }
-
-    ////////////////////////////////////////////////////////////////////////////
-
-    private int number;
-    private int monthOfYear;
-    private int dayOfMonth;
-    private DateTime startDate;
-    private DateTimeZone timeZone;
-
-    private YearsWindows(
-        int number, int monthOfYear, int dayOfMonth, DateTime startDate, DateTimeZone timeZone) {
-      this.number = number;
-      this.monthOfYear = monthOfYear;
-      this.dayOfMonth = dayOfMonth;
-      this.startDate = startDate;
-      this.timeZone = timeZone;
-    }
-
-    @Override
-    public IntervalWindow assignWindow(Instant timestamp) {
-      DateTime datetime = new DateTime(timestamp, timeZone);
-
-      DateTime offsetStart = startDate.withMonthOfYear(monthOfYear).withDayOfMonth(dayOfMonth);
-
-      int yearOffset =
-          Years.yearsBetween(offsetStart, datetime).getYears() / number * number;
-
-      DateTime begin = offsetStart.plusYears(yearOffset);
-      DateTime end = begin.plusYears(number);
-
-      return new IntervalWindow(begin.toInstant(), end.toInstant());
-    }
-
-    @Override
-    public Coder<IntervalWindow> windowCoder() {
-      return IntervalWindow.getCoder();
-    }
-
-    @Override
-    public boolean isCompatible(WindowFn<?, ?> other) {
-      if (!(other instanceof YearsWindows)) {
-        return false;
-      }
-      YearsWindows that = (YearsWindows) other;
-      return number == that.number
-          && monthOfYear == that.monthOfYear
-          && dayOfMonth == that.dayOfMonth
-          && startDate == that.startDate
-          && timeZone == that.timeZone;
-    }
-
-    public DateTimeZone getTimeZone() {
-      return timeZone;
-    }
-
-    public DateTime getStartDate() {
-      return startDate;
-    }
-
-    public int getDayOfMonth() {
-      return dayOfMonth;
-    }
-
-    public int getMonthOfYear() {
-      return monthOfYear;
-    }
-
-    public int getNumber() {
-      return number;
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
deleted file mode 100644
index 9ac4abd..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/DefaultTrigger.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-
-import org.joda.time.Instant;
-
-import java.util.List;
-
-/**
- * A trigger that is equivalent to {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
- * See {@link Repeatedly#forever} and {@link AfterWatermark#pastEndOfWindow} for more details.
- *
- * @param <W> The type of windows being triggered/encoded.
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public class DefaultTrigger<W extends BoundedWindow> extends Trigger<W>{
-
-  private DefaultTrigger() {
-    super(null);
-  }
-
-  /**
-   * Returns the default trigger.
-   */
-  public static <W extends BoundedWindow> DefaultTrigger<W> of() {
-    return new DefaultTrigger<W>();
-  }
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    // If the end of the window has already been reached, then we are already ready to fire
-    // and do not need to set a wake-up timer.
-    if (!endOfWindowReached(c)) {
-      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-    }
-  }
-
-  @Override
-  public void onMerge(OnMergeContext c) throws Exception {
-    // If the end of the window has already been reached, then we are already ready to fire
-    // and do not need to set a wake-up timer.
-    if (!endOfWindowReached(c)) {
-      c.setTimer(c.window().maxTimestamp(), TimeDomain.EVENT_TIME);
-    }
-  }
-
-  @Override
-  public void clear(TriggerContext c) throws Exception { }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    return window.maxTimestamp();
-  }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    // Semantically, all default triggers are identical
-    return other instanceof DefaultTrigger;
-  }
-
-  @Override
-  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return this;
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    return endOfWindowReached(context);
-  }
-
-  private boolean endOfWindowReached(Trigger<W>.TriggerContext context) {
-    return context.currentEventTime() != null
-        && context.currentEventTime().isAfter(context.window().maxTimestamp());
-  }
-
-  @Override
-  public void onFire(Trigger<W>.TriggerContext context) throws Exception { }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
deleted file mode 100644
index 12a0f1b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/FixedWindows.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.Objects;
-
-/**
- * A {@link WindowFn} that windows values into fixed-size timestamp-based windows.
- *
- * <p>For example, in order to partition the data into 10 minute windows:
- * <pre> {@code
- * PCollection<Integer> items = ...;
- * PCollection<Integer> windowedItems = items.apply(
- *   Window.<Integer>into(FixedWindows.of(Duration.standardMinutes(10))));
- * } </pre>
- */
-public class FixedWindows extends PartitioningWindowFn<Object, IntervalWindow> {
-
-  /**
-   * Size of this window.
-   */
-  private final Duration size;
-
-  /**
-   * Offset of this window.  Windows start at time
-   * N * size + offset, where 0 is the epoch.
-   */
-  private final Duration offset;
-
-  /**
-   * Partitions the timestamp space into half-open intervals of the form
-   * [N * size, (N + 1) * size), where 0 is the epoch.
-   */
-  public static FixedWindows of(Duration size) {
-    return new FixedWindows(size, Duration.ZERO);
-  }
-
-  /**
-   * Partitions the timestamp space into half-open intervals of the form
-   * [N * size + offset, (N + 1) * size + offset),
-   * where 0 is the epoch.
-   *
-   * @throws IllegalArgumentException if offset is not in [0, size)
-   */
-  public FixedWindows withOffset(Duration offset) {
-    return new FixedWindows(size, offset);
-  }
-
-  private FixedWindows(Duration size, Duration offset) {
-    if (offset.isShorterThan(Duration.ZERO) || !offset.isShorterThan(size)) {
-      throw new IllegalArgumentException(
-          "FixedWindows WindowingStrategies must have 0 <= offset < size");
-    }
-    this.size = size;
-    this.offset = offset;
-  }
-
-  @Override
-  public IntervalWindow assignWindow(Instant timestamp) {
-    long start = timestamp.getMillis()
-        - timestamp.plus(size).minus(offset).getMillis() % size.getMillis();
-    return new IntervalWindow(new Instant(start), size);
-  }
-
-  @Override
-  public Coder<IntervalWindow> windowCoder() {
-    return IntervalWindow.getCoder();
-  }
-
-  @Override
-  public boolean isCompatible(WindowFn<?, ?> other) {
-    return this.equals(other);
-  }
-
-  public Duration getSize() {
-    return size;
-  }
-
-  public Duration getOffset() {
-    return offset;
-  }
-
-  @Override
-  public boolean equals(Object object) {
-    if (!(object instanceof FixedWindows)) {
-      return false;
-    }
-    FixedWindows other = (FixedWindows) object;
-    return getOffset().equals(other.getOffset())
-        && getSize().equals(other.getSize());
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(size, offset);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
deleted file mode 100644
index d7fc396..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindow.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * The default window into which all data is placed (via {@link GlobalWindows}).
- */
-public class GlobalWindow extends BoundedWindow {
-  /**
-   * Singleton instance of {@link GlobalWindow}.
-   */
-  public static final GlobalWindow INSTANCE = new GlobalWindow();
-
-  // Triggers use maxTimestamp to set timers' timestamp. Timers fires when
-  // the watermark passes their timestamps. So, the maxTimestamp needs to be
-  // smaller than the TIMESTAMP_MAX_VALUE.
-  // One standard day is subtracted from TIMESTAMP_MAX_VALUE to make sure
-  // the maxTimestamp is smaller than TIMESTAMP_MAX_VALUE even after rounding up
-  // to seconds or minutes.
-  private static final Instant END_OF_GLOBAL_WINDOW =
-      TIMESTAMP_MAX_VALUE.minus(Duration.standardDays(1));
-
-  @Override
-  public Instant maxTimestamp() {
-    return END_OF_GLOBAL_WINDOW;
-  }
-
-  private GlobalWindow() {}
-
-  /**
-   * {@link Coder} for encoding and decoding {@code GlobalWindow}s.
-   */
-  public static class Coder extends AtomicCoder<GlobalWindow> {
-    public static final Coder INSTANCE = new Coder();
-
-    @Override
-    public void encode(GlobalWindow window, OutputStream outStream, Context context) {}
-
-    @Override
-    public GlobalWindow decode(InputStream inStream, Context context) {
-      return GlobalWindow.INSTANCE;
-    }
-
-    private Coder() {}
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
deleted file mode 100644
index d3d949c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/GlobalWindows.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.Collections;
-
-/**
- * Default {@link WindowFn} that assigns all data to the same window.
- */
-public class GlobalWindows extends NonMergingWindowFn<Object, GlobalWindow> {
-
-  private static final Collection<GlobalWindow> GLOBAL_WINDOWS =
-      Collections.singletonList(GlobalWindow.INSTANCE);
-
-  @Override
-  public Collection<GlobalWindow> assignWindows(AssignContext c) {
-    return GLOBAL_WINDOWS;
-  }
-
-  @Override
-  public boolean isCompatible(WindowFn<?, ?> o) {
-    return o instanceof GlobalWindows;
-  }
-
-  @Override
-  public Coder<GlobalWindow> windowCoder() {
-    return GlobalWindow.Coder.INSTANCE;
-  }
-
-  @Override
-  public GlobalWindow getSideInputWindow(BoundedWindow window) {
-    return GlobalWindow.INSTANCE;
-  }
-
-  @Override
-  public boolean assignsToSingleWindow() {
-    return true;
-  }
-
-  @Override
-  public Instant getOutputTime(Instant inputTimestamp, GlobalWindow window) {
-    return inputTimestamp;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
deleted file mode 100644
index 58287c7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/IntervalWindow.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.DurationCoder;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.ReadableDuration;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * An implementation of {@link BoundedWindow} that represents an interval from
- * {@link #start} (inclusive) to {@link #end} (exclusive).
- */
-public class IntervalWindow extends BoundedWindow
-    implements Comparable<IntervalWindow> {
-  /**
-   * Start of the interval, inclusive.
-   */
-  private final Instant start;
-
-  /**
-   * End of the interval, exclusive.
-   */
-  private final Instant end;
-
-  /**
-   * Creates a new IntervalWindow that represents the half-open time
-   * interval [start, end).
-   */
-  public IntervalWindow(Instant start, Instant end) {
-    this.start = start;
-    this.end = end;
-  }
-
-  public IntervalWindow(Instant start, ReadableDuration size) {
-    this.start = start;
-    this.end = start.plus(size);
-  }
-
-  /**
-   * Returns the start of this window, inclusive.
-   */
-  public Instant start() {
-    return start;
-  }
-
-  /**
-   * Returns the end of this window, exclusive.
-   */
-  public Instant end() {
-    return end;
-  }
-
-  /**
-   * Returns the largest timestamp that can be included in this window.
-   */
-  @Override
-  public Instant maxTimestamp() {
-    // end not inclusive
-    return end.minus(1);
-  }
-
-  /**
-   * Returns whether this window contains the given window.
-   */
-  public boolean contains(IntervalWindow other) {
-    return !this.start.isAfter(other.start) && !this.end.isBefore(other.end);
-  }
-
-  /**
-   * Returns whether this window is disjoint from the given window.
-   */
-  public boolean isDisjoint(IntervalWindow other) {
-    return !this.end.isAfter(other.start) || !other.end.isAfter(this.start);
-  }
-
-  /**
-   * Returns whether this window intersects the given window.
-   */
-  public boolean intersects(IntervalWindow other) {
-    return !isDisjoint(other);
-  }
-
-  /**
-   * Returns the minimal window that includes both this window and
-   * the given window.
-   */
-  public IntervalWindow span(IntervalWindow other) {
-    return new IntervalWindow(
-        new Instant(Math.min(start.getMillis(), other.start.getMillis())),
-        new Instant(Math.max(end.getMillis(), other.end.getMillis())));
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    return (o instanceof IntervalWindow)
-        && ((IntervalWindow) o).end.isEqual(end)
-        && ((IntervalWindow) o).start.isEqual(start);
-  }
-
-  @Override
-  public int hashCode() {
-    // The end values are themselves likely to be arithmetic sequence, which
-    // is a poor distribution to use for a hashtable, so we
-    // add a highly non-linear transformation.
-    return (int)
-        (start.getMillis() + modInverse((int) (end.getMillis() << 1) + 1));
-  }
-
-  /**
-   * Compute the inverse of (odd) x mod 2^32.
-   */
-  private int modInverse(int x) {
-    // Cube gives inverse mod 2^4, as x^4 == 1 (mod 2^4) for all odd x.
-    int inverse = x * x * x;
-    // Newton iteration doubles correct bits at each step.
-    inverse *= 2 - x * inverse;
-    inverse *= 2 - x * inverse;
-    inverse *= 2 - x * inverse;
-    return inverse;
-  }
-
-  @Override
-  public String toString() {
-    return "[" + start + ".." + end + ")";
-  }
-
-  @Override
-  public int compareTo(IntervalWindow o) {
-    if (start.isEqual(o.start)) {
-      return end.compareTo(o.end);
-    }
-    return start.compareTo(o.start);
-  }
-
-  /**
-   * Returns a {@link Coder} suitable for {@link IntervalWindow}.
-   */
-  public static Coder<IntervalWindow> getCoder() {
-    return IntervalWindowCoder.of();
-  }
-
-  /**
-   * Encodes an {@link IntervalWindow} as a pair of its upper bound and duration.
-   */
-  private static class IntervalWindowCoder extends AtomicCoder<IntervalWindow> {
-
-    private static final IntervalWindowCoder INSTANCE =
-        new IntervalWindowCoder();
-
-    private static final Coder<Instant> instantCoder = InstantCoder.of();
-    private static final Coder<ReadableDuration> durationCoder = DurationCoder.of();
-
-    @JsonCreator
-    public static IntervalWindowCoder of() {
-      return INSTANCE;
-    }
-
-    @Override
-    public void encode(IntervalWindow window,
-                       OutputStream outStream,
-                       Context context)
-        throws IOException, CoderException {
-      instantCoder.encode(window.end, outStream, context.nested());
-      durationCoder.encode(new Duration(window.start, window.end), outStream, context.nested());
-    }
-
-    @Override
-    public IntervalWindow decode(InputStream inStream, Context context)
-        throws IOException, CoderException {
-      Instant end = instantCoder.decode(inStream, context.nested());
-      ReadableDuration duration = durationCoder.decode(inStream, context.nested());
-      return new IntervalWindow(end.minus(duration), end);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
deleted file mode 100644
index 596f4e7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/InvalidWindows.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-
-/**
- * A {@link WindowFn} that represents an invalid pipeline state.
- *
- * @param <W> window type
- */
-public class InvalidWindows<W extends BoundedWindow> extends WindowFn<Object, W> {
-  private String cause;
-  private WindowFn<?, W> originalWindowFn;
-
-  public InvalidWindows(String cause, WindowFn<?, W> originalWindowFn) {
-    this.originalWindowFn = originalWindowFn;
-    this.cause = cause;
-  }
-
-  /**
-   * Returns the reason that this {@code WindowFn} is invalid.
-   */
-  public String getCause() {
-    return cause;
-  }
-
-  /**
-   * Returns the original windowFn that this InvalidWindows replaced.
-   */
-  public WindowFn<?, W> getOriginalWindowFn() {
-    return originalWindowFn;
-  }
-
-  @Override
-  public Collection<W> assignWindows(AssignContext c) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public void mergeWindows(MergeContext c) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Coder<W> windowCoder() {
-    return originalWindowFn.windowCoder();
-  }
-
-  /**
-   * {@code InvalidWindows} objects with the same {@code originalWindowFn} are compatible.
-   */
-  @Override
-  public boolean isCompatible(WindowFn<?, ?> other) {
-    return getClass() == other.getClass()
-        && getOriginalWindowFn().isCompatible(
-            ((InvalidWindows<?>) other).getOriginalWindowFn());
-  }
-
-  @Override
-  public W getSideInputWindow(BoundedWindow window) {
-    throw new UnsupportedOperationException("InvalidWindows is not allowed in side inputs");
-  }
-
-  @Override
-  public Instant getOutputTime(Instant inputTimestamp, W window) {
-    return inputTimestamp;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
deleted file mode 100644
index 4e06234..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/MergeOverlappingIntervalWindows.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * A utility function for merging overlapping {@link IntervalWindow}s.
- */
-public class MergeOverlappingIntervalWindows {
-
-  /**
-   * Merge overlapping {@link IntervalWindow}s.
-   */
-  public static void mergeWindows(WindowFn<?, IntervalWindow>.MergeContext c) throws Exception {
-    // Merge any overlapping windows into a single window.
-    // Sort the list of existing windows so we only have to
-    // traverse the list once rather than considering all
-    // O(n^2) window pairs.
-    List<IntervalWindow> sortedWindows = new ArrayList<>();
-    for (IntervalWindow window : c.windows()) {
-      sortedWindows.add(window);
-    }
-    Collections.sort(sortedWindows);
-    List<MergeCandidate> merges = new ArrayList<>();
-    MergeCandidate current = new MergeCandidate();
-    for (IntervalWindow window : sortedWindows) {
-      if (current.intersects(window)) {
-        current.add(window);
-      } else {
-        merges.add(current);
-        current = new MergeCandidate(window);
-      }
-    }
-    merges.add(current);
-    for (MergeCandidate merge : merges) {
-      merge.apply(c);
-    }
-  }
-
-  private static class MergeCandidate {
-    private IntervalWindow union;
-    private final List<IntervalWindow> parts;
-    public MergeCandidate() {
-      parts = new ArrayList<>();
-    }
-    public MergeCandidate(IntervalWindow window) {
-      union = window;
-      parts = new ArrayList<>(Arrays.asList(window));
-    }
-    public boolean intersects(IntervalWindow window) {
-      return union == null || union.intersects(window);
-    }
-    public void add(IntervalWindow window) {
-      union = union == null ? window : union.span(window);
-      parts.add(window);
-    }
-    public void apply(WindowFn<?, IntervalWindow>.MergeContext c) throws Exception {
-      if (parts.size() > 1) {
-        c.merge(parts, union);
-      }
-    }
-
-    @Override
-    public String toString() {
-      return "MergeCandidate[union=" + union + ", parts=" + parts + "]";
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
deleted file mode 100644
index 8aa66fc..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/NonMergingWindowFn.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-/**
- * Abstract base class for {@link WindowFn}s that do not merge windows.
- *
- * @param <T> type of elements being windowed
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code WindowFn}
- */
-public abstract class NonMergingWindowFn<T, W extends BoundedWindow>
-    extends WindowFn<T, W> {
-  @Override
-  public final void mergeWindows(MergeContext c) { }
-
-  @Override
-  public final boolean isNonMerging() {
-    return true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
deleted file mode 100644
index 652092a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OrFinallyTrigger.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.common.annotations.VisibleForTesting;
-
-import org.joda.time.Instant;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Executes the {@code actual} trigger until it finishes or until the {@code until} trigger fires.
- */
-class OrFinallyTrigger<W extends BoundedWindow> extends Trigger<W> {
-
-  private static final int ACTUAL = 0;
-  private static final int UNTIL = 1;
-
-  @VisibleForTesting OrFinallyTrigger(Trigger<W> actual, Trigger.OnceTrigger<W> until) {
-    super(Arrays.asList(actual, until));
-  }
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    c.trigger().subTrigger(ACTUAL).invokeOnElement(c);
-    c.trigger().subTrigger(UNTIL).invokeOnElement(c);
-  }
-
-  @Override
-  public void onMerge(OnMergeContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      subTrigger.invokeOnMerge(c);
-    }
-    updateFinishedState(c);
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    // This trigger fires once either the trigger or the until trigger fires.
-    Instant actualDeadline = subTriggers.get(ACTUAL).getWatermarkThatGuaranteesFiring(window);
-    Instant untilDeadline = subTriggers.get(UNTIL).getWatermarkThatGuaranteesFiring(window);
-    return actualDeadline.isBefore(untilDeadline) ? actualDeadline : untilDeadline;
-  }
-
-  @Override
-  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    // Use OrFinallyTrigger instead of AfterFirst because the continuation of ACTUAL
-    // may not be a OnceTrigger.
-    return Repeatedly.forever(
-        new OrFinallyTrigger<W>(
-            continuationTriggers.get(ACTUAL),
-            (Trigger.OnceTrigger<W>) continuationTriggers.get(UNTIL)));
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    return context.trigger().subTrigger(ACTUAL).invokeShouldFire(context)
-        || context.trigger().subTrigger(UNTIL).invokeShouldFire(context);
-  }
-
-  @Override
-  public void onFire(Trigger<W>.TriggerContext context) throws Exception {
-    ExecutableTrigger<W> actualSubtrigger = context.trigger().subTrigger(ACTUAL);
-    ExecutableTrigger<W> untilSubtrigger = context.trigger().subTrigger(UNTIL);
-
-    if (untilSubtrigger.invokeShouldFire(context)) {
-      untilSubtrigger.invokeOnFire(context);
-      actualSubtrigger.invokeClear(context);
-    } else {
-      // If until didn't fire, then the actual must have (or it is forbidden to call
-      // onFire) so we are done only if actual is done.
-      actualSubtrigger.invokeOnFire(context);
-      // Do not clear the until trigger, because it tracks data cross firings.
-    }
-    updateFinishedState(context);
-  }
-
-  private void updateFinishedState(TriggerContext c) throws Exception {
-    boolean anyStillFinished = false;
-    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      anyStillFinished |= c.forTrigger(subTrigger).trigger().isFinished();
-    }
-    c.trigger().setFinished(anyStillFinished);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
deleted file mode 100644
index c5d943d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFn.java
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.common.collect.Ordering;
-
-import org.joda.time.Instant;
-
-import java.io.Serializable;
-import java.util.Objects;
-
-/**
- * <b><i>(Experimental)</i></b> A function from timestamps of input values to the timestamp for a
- * computed value.
- *
- * <p>The function is represented via three components:
- * <ol>
- *   <li>{@link #assignOutputTime} calculates an output timestamp for any input
- *       value in a particular window.</li>
- *   <li>The output timestamps for all non-late input values within a window are combined
- *       according to {@link #combine combine()}, a commutative and associative operation on
- *       the output timestamps.</li>
- *   <li>The output timestamp when windows merge is provided by {@link #merge merge()}.</li>
- * </ol>
- *
- * <p>This abstract class cannot be subclassed directly, by design: it may grow
- * in consumer-compatible ways that require mutually-exclusive default implementations. To
- * create a concrete subclass, extend {@link OutputTimeFn.Defaults} or
- * {@link OutputTimeFn.DependsOnlyOnWindow}. Note that as long as this class remains
- * experimental, we may also choose to change it in arbitrary backwards-incompatible ways.
- *
- * @param <W> the type of window. Contravariant: methods accepting any subtype of
- * {@code OutputTimeFn<W>} should use the parameter type {@code OutputTimeFn<? super W>}.
- */
-@Experimental(Experimental.Kind.OUTPUT_TIME)
-public abstract class OutputTimeFn<W extends BoundedWindow> implements Serializable {
-
-  /**
-   * Private constructor to prevent subclassing other than provided base classes.
-   */
-  private OutputTimeFn() { }
-
-  /**
-   * Returns the output timestamp to use for data depending on the given
-   * {@code inputTimestamp} in the specified {@code window}.
-   *
-   *
-   * <p>The result of this method must be between {@code inputTimestamp} and
-   * {@code window.maxTimestamp()} (inclusive on both sides).
-   *
-   * <p>This function must be monotonic across input timestamps. Specifically, if {@code A < B},
-   * then {@code assignOutputTime(A, window) <= assignOutputTime(B, window)}.
-   *
-   * <p>For a {@link WindowFn} that doesn't produce overlapping windows, this can (and typically
-   * should) just return {@code inputTimestamp}. In the presence of overlapping windows, it is
-   * suggested that the result in later overlapping windows is past the end of earlier windows
-   * so that the later windows don't prevent the watermark from
-   * progressing past the end of the earlier window.
-   *
-   * <p>See the overview of {@link OutputTimeFn} for the consistency properties required
-   * between {@link #assignOutputTime}, {@link #combine}, and {@link #merge}.
-   */
-  public abstract Instant assignOutputTime(Instant inputTimestamp, W window);
-
-  /**
-   * Combines the given output times, which must be from the same window, into an output time
-   * for a computed value.
-   *
-   * <ul>
-   *   <li>{@code combine} must be commutative: {@code combine(a, b).equals(combine(b, a))}.</li>
-   *   <li>{@code combine} must be associative:
-   *       {@code combine(a, combine(b, c)).equals(combine(combine(a, b), c))}.</li>
-   * </ul>
-   */
-  public abstract Instant combine(Instant outputTime, Instant otherOutputTime);
-
-  /**
-   * Merges the given output times, presumed to be combined output times for windows that
-   * are merging, into an output time for the {@code resultWindow}.
-   *
-   * <p>When windows {@code w1} and {@code w2} merge to become a new window {@code w1plus2},
-   * then {@link #merge} must be implemented such that the output time is the same as
-   * if all timestamps were assigned in {@code w1plus2}. Formally:
-   *
-   * <p>{@code fn.merge(w, fn.assignOutputTime(t1, w1), fn.assignOutputTime(t2, w2))}
-   *
-   * <p>must be equal to
-   *
-   * <p>{@code fn.combine(fn.assignOutputTime(t1, w1plus2), fn.assignOutputTime(t2, w1plus2))}
-   *
-   * <p>If the assigned time depends only on the window, the correct implementation of
-   * {@link #merge merge()} necessarily returns the result of
-   * {@link #assignOutputTime assignOutputTime(t1, w1plus2)}
-   * (which equals {@link #assignOutputTime assignOutputTime(t2, w1plus2)}.
-   * Defaults for this case are provided by {@link DependsOnlyOnWindow}.
-   *
-   * <p>For many other {@link OutputTimeFn} implementations, such as taking the earliest or latest
-   * timestamp, this will be the same as {@link #combine combine()}. Defaults for this
-   * case are provided by {@link Defaults}.
-   */
-  public abstract Instant merge(W intoWindow, Iterable<? extends Instant> mergingTimestamps);
-
-  /**
-   * Returns {@code true} if the result of combination of many output timestamps actually depends
-   * only on the earliest.
-   *
-   * <p>This may allow optimizations when it is very efficient to retrieve the earliest timestamp
-   * to be combined.
-   */
-  public abstract boolean dependsOnlyOnEarliestInputTimestamp();
-
-  /**
-   * Returns {@code true} if the result does not depend on what outputs were combined but only
-   * the window they are in. The canonical example is if all timestamps are sure to
-   * be the end of the window.
-   *
-   * <p>This may allow optimizations, since it is typically very efficient to retrieve the window
-   * and combining output timestamps is not necessary.
-   *
-   * <p>If the assigned output time for an implementation depends only on the window, consider
-   * extending {@link DependsOnlyOnWindow}, which returns {@code true} here and also provides
-   * a framework for easily implementing a correct {@link #merge}, {@link #combine} and
-   * {@link #assignOutputTime}.
-   */
-  public abstract boolean dependsOnlyOnWindow();
-
-  /**
-   * <b><i>(Experimental)</i></b> Default method implementations for {@link OutputTimeFn} where the
-   * output time depends on the input element timestamps and possibly the window.
-   *
-   * <p>To complete an implementation, override {@link #assignOutputTime}, at a minimum.
-   *
-   * <p>By default, {@link #combine} and {@link #merge} return the earliest timestamp of their
-   * inputs.
-   */
-  public abstract static class Defaults<W extends BoundedWindow> extends OutputTimeFn<W> {
-
-    protected Defaults() {
-      super();
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return the earlier of the two timestamps.
-     */
-    @Override
-    public Instant combine(Instant outputTimestamp, Instant otherOutputTimestamp) {
-      return Ordering.natural().min(outputTimestamp, otherOutputTimestamp);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return the result of {@link #combine combine(outputTimstamp, otherOutputTimestamp)},
-     * by default.
-     */
-    @Override
-    public Instant merge(W resultWindow, Iterable<? extends Instant> mergingTimestamps) {
-      return OutputTimeFns.combineOutputTimes(this, mergingTimestamps);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code false}. An {@link OutputTimeFn} that depends only on the window should extend
-     * {@link OutputTimeFn.DependsOnlyOnWindow}.
-     */
-    @Override
-    public final boolean dependsOnlyOnWindow() {
-      return false;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code true} by default.
-     */
-    @Override
-    public boolean dependsOnlyOnEarliestInputTimestamp() {
-      return false;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code true} if the two {@link OutputTimeFn} instances have the same class, by
-     *         default.
-     */
-    @Override
-    public boolean equals(Object other) {
-      if (other == null) {
-        return false;
-      }
-
-      return this.getClass().equals(other.getClass());
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass());
-    }
-  }
-
-  /**
-   * <b><i>(Experimental)</i></b> Default method implementations for {@link OutputTimeFn} when the
-   * output time depends only on the window.
-   *
-   * <p>To complete an implementation, override {@link #assignOutputTime(BoundedWindow)}.
-   */
-  public abstract static class DependsOnlyOnWindow<W extends BoundedWindow>
-      extends OutputTimeFn<W> {
-
-    protected DependsOnlyOnWindow() {
-      super();
-    }
-
-    /**
-     * Returns the output timestamp to use for data in the specified {@code window}.
-     *
-     * <p>Note that the result of this method must be between the maximum possible input timestamp
-     * in {@code window} and {@code window.maxTimestamp()} (inclusive on both sides).
-     *
-     * <p>For example, using {@code Sessions.withGapDuration(gapDuration)}, we know that all input
-     * timestamps must lie at least {@code gapDuration} from the end of the session, so
-     * {@code window.maxTimestamp() - gapDuration} is an acceptable assigned timestamp.
-     *
-     * @see #assignOutputTime(Instant, BoundedWindow)
-     */
-    protected abstract Instant assignOutputTime(W window);
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return the result of {#link assignOutputTime(BoundedWindow) assignOutputTime(window)}.
-     */
-    @Override
-    public final Instant assignOutputTime(Instant timestamp, W window) {
-      return assignOutputTime(window);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return the same timestamp as both argument timestamps, which are necessarily equal.
-     */
-    @Override
-    public final Instant combine(Instant outputTimestamp, Instant otherOutputTimestamp) {
-      return outputTimestamp;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return the result of
-     * {@link #assignOutputTime(BoundedWindow) assignOutputTime(resultWindow)}.
-     */
-    @Override
-    public final Instant merge(W resultWindow, Iterable<? extends Instant> mergingTimestamps) {
-      return assignOutputTime(resultWindow);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code true}.
-     */
-    @Override
-    public final boolean dependsOnlyOnWindow() {
-      return true;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code true}. Since the output time depends only on the window, it can
-     * certainly be ascertained given a single input timestamp.
-     */
-    @Override
-    public final boolean dependsOnlyOnEarliestInputTimestamp() {
-      return true;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code true} if the two {@link OutputTimeFn} instances have the same class, by
-     *         default.
-     */
-    @Override
-    public boolean equals(Object other) {
-      if (other == null) {
-        return false;
-      }
-
-      return this.getClass().equals(other.getClass());
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java
deleted file mode 100644
index dcc0f5b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/OutputTimeFns.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Ordering;
-
-import org.joda.time.Instant;
-
-import javax.annotation.Nullable;
-
-/**
- * <b><i>(Experimental)</i></b> Static utility methods and provided implementations for
- * {@link OutputTimeFn}.
- */
-@Experimental(Experimental.Kind.OUTPUT_TIME)
-public class OutputTimeFns {
-  /**
-   * The policy of outputting at the earliest of the input timestamps for non-late input data
-   * that led to a computed value.
-   *
-   * <p>For example, suppose <i>v</i><sub>1</sub> through <i>v</i><sub>n</sub> are all on-time
-   * elements being aggregated via some function {@code f} into
-   * {@code f}(<i>v</i><sub>1</sub>, ..., <i>v</i><sub>n</sub>. When emitted, the output
-   * timestamp of the result will be the earliest of the event time timestamps
-   *
-   * <p>If data arrives late, it has no effect on the output timestamp.
-   */
-  public static OutputTimeFn<BoundedWindow> outputAtEarliestInputTimestamp() {
-    return new OutputAtEarliestInputTimestamp();
-  }
-
-  /**
-   * The policy of holding the watermark to the latest of the input timestamps
-   * for non-late input data that led to a computed value.
-   *
-   * <p>For example, suppose <i>v</i><sub>1</sub> through <i>v</i><sub>n</sub> are all on-time
-   * elements being aggregated via some function {@code f} into
-   * {@code f}(<i>v</i><sub>1</sub>, ..., <i>v</i><sub>n</sub>. When emitted, the output
-   * timestamp of the result will be the latest of the event time timestamps
-   *
-   * <p>If data arrives late, it has no effect on the output timestamp.
-   */
-  public static OutputTimeFn<BoundedWindow> outputAtLatestInputTimestamp() {
-    return new OutputAtLatestInputTimestamp();
-  }
-
-  /**
-   * The policy of outputting with timestamps at the end of the window.
-   *
-   * <p>Note that this output timestamp depends only on the window. See
-   * {#link dependsOnlyOnWindow()}.
-   *
-   * <p>When windows merge, instead of using {@link OutputTimeFn#combine} to obtain an output
-   * timestamp for the results in the new window, it is mandatory to obtain a new output
-   * timestamp from {@link OutputTimeFn#assignOutputTime} with the new window and an arbitrary
-   * timestamp (because it is guaranteed that the timestamp is irrelevant).
-   *
-   * <p>For non-merging window functions, this {@link OutputTimeFn} works transparently.
-   */
-  public static OutputTimeFn<BoundedWindow> outputAtEndOfWindow() {
-    return new OutputAtEndOfWindow();
-  }
-
-  /**
-   * Applies the given {@link OutputTimeFn} to the given output times, obtaining
-   * the output time for a value computed. See {@link OutputTimeFn#combine} for
-   * a full specification.
-   *
-   * @throws IllegalArgumentException if {@code outputTimes} is empty.
-   */
-  public static Instant combineOutputTimes(
-      OutputTimeFn<?> outputTimeFn, Iterable<? extends Instant> outputTimes) {
-    checkArgument(
-        !Iterables.isEmpty(outputTimes),
-        "Collection of output times must not be empty in %s.combineOutputTimes",
-        OutputTimeFns.class.getName());
-
-    @Nullable
-    Instant combinedOutputTime = null;
-    for (Instant outputTime : outputTimes) {
-      combinedOutputTime =
-          combinedOutputTime == null
-              ? outputTime : outputTimeFn.combine(combinedOutputTime, outputTime);
-    }
-    return combinedOutputTime;
-  }
-
-  /**
-   * See {@link #outputAtEarliestInputTimestamp}.
-   */
-  private static class OutputAtEarliestInputTimestamp extends OutputTimeFn.Defaults<BoundedWindow> {
-    @Override
-    public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) {
-      return inputTimestamp;
-    }
-
-    @Override
-    public Instant combine(Instant outputTime, Instant otherOutputTime) {
-      return Ordering.natural().min(outputTime, otherOutputTime);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code true}. The result of any combine will be the earliest input timestamp.
-     */
-    @Override
-    public boolean dependsOnlyOnEarliestInputTimestamp() {
-      return true;
-    }
-  }
-
-  /**
-   * See {@link #outputAtLatestInputTimestamp}.
-   */
-  private static class OutputAtLatestInputTimestamp extends OutputTimeFn.Defaults<BoundedWindow> {
-    @Override
-    public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) {
-      return inputTimestamp;
-    }
-
-    @Override
-    public Instant combine(Instant outputTime, Instant otherOutputTime) {
-      return Ordering.natural().max(outputTime, otherOutputTime);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code false}.
-     */
-    @Override
-    public boolean dependsOnlyOnEarliestInputTimestamp() {
-      return false;
-    }
-  }
-
-  private static class OutputAtEndOfWindow extends OutputTimeFn.DependsOnlyOnWindow<BoundedWindow> {
-
-    /**
-     *{@inheritDoc}
-     *
-     *@return {@code window.maxTimestamp()}.
-     */
-    @Override
-    protected Instant assignOutputTime(BoundedWindow window) {
-      return window.maxTimestamp();
-    }
-  }
-}

[47/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
deleted file mode 100644
index 2b0190b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/JAXBCoder.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.Structs;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.FilterInputStream;
-import java.io.FilterOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBException;
-import javax.xml.bind.Marshaller;
-import javax.xml.bind.Unmarshaller;
-
-/**
- * A coder for JAXB annotated objects. This coder uses JAXB marshalling/unmarshalling mechanisms
- * to encode/decode the objects. Users must provide the {@code Class} of the JAXB annotated object.
- *
- * @param <T> type of JAXB annotated objects that will be serialized.
- */
-public class JAXBCoder<T> extends AtomicCoder<T> {
-
-  private final Class<T> jaxbClass;
-  private transient Marshaller jaxbMarshaller = null;
-  private transient Unmarshaller jaxbUnmarshaller = null;
-
-  public Class<T> getJAXBClass() {
-    return jaxbClass;
-  }
-
-  private JAXBCoder(Class<T> jaxbClass) {
-    this.jaxbClass = jaxbClass;
-  }
-
-  /**
-   * Create a coder for a given type of JAXB annotated objects.
-   *
-   * @param jaxbClass the {@code Class} of the JAXB annotated objects.
-   */
-  public static <T> JAXBCoder<T> of(Class<T> jaxbClass) {
-    return new JAXBCoder<>(jaxbClass);
-  }
-
-  @Override
-  public void encode(T value, OutputStream outStream, Context context)
-      throws CoderException, IOException {
-    try {
-      if (jaxbMarshaller == null) {
-        JAXBContext jaxbContext = JAXBContext.newInstance(jaxbClass);
-        jaxbMarshaller = jaxbContext.createMarshaller();
-      }
-
-      jaxbMarshaller.marshal(value, new FilterOutputStream(outStream) {
-        // JAXB closes the underyling stream so we must filter out those calls.
-        @Override
-        public void close() throws IOException {
-        }
-      });
-    } catch (JAXBException e) {
-      throw new CoderException(e);
-    }
-  }
-
-  @Override
-  public T decode(InputStream inStream, Context context) throws CoderException, IOException {
-    try {
-      if (jaxbUnmarshaller == null) {
-        JAXBContext jaxbContext = JAXBContext.newInstance(jaxbClass);
-        jaxbUnmarshaller = jaxbContext.createUnmarshaller();
-      }
-
-      @SuppressWarnings("unchecked")
-      T obj = (T) jaxbUnmarshaller.unmarshal(new FilterInputStream(inStream) {
-        // JAXB closes the underyling stream so we must filter out those calls.
-        @Override
-        public void close() throws IOException {
-        }
-      });
-      return obj;
-    } catch (JAXBException e) {
-      throw new CoderException(e);
-    }
-  }
-
-  @Override
-  public String getEncodingId() {
-    return getJAXBClass().getName();
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////
-  // JSON Serialization details below
-
-  private static final String JAXB_CLASS = "jaxb_class";
-
-  /**
-   * Constructor for JSON deserialization only.
-   */
-  @JsonCreator
-  public static <T> JAXBCoder<T> of(
-      @JsonProperty(JAXB_CLASS) String jaxbClassName) {
-    try {
-      @SuppressWarnings("unchecked")
-      Class<T> jaxbClass = (Class<T>) Class.forName(jaxbClassName);
-      return of(jaxbClass);
-    } catch (ClassNotFoundException e) {
-      throw new IllegalArgumentException(e);
-    }
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = super.asCloudObject();
-    Structs.addString(result, JAXB_CLASS, jaxbClass.getName());
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
deleted file mode 100644
index 33085cf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoder.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * A {@code KvCoder} encodes {@link KV}s.
- *
- * @param <K> the type of the keys of the KVs being transcoded
- * @param <V> the type of the values of the KVs being transcoded
- */
-public class KvCoder<K, V> extends KvCoderBase<KV<K, V>> {
-  public static <K, V> KvCoder<K, V> of(Coder<K> keyCoder,
-                                        Coder<V> valueCoder) {
-    return new KvCoder<>(keyCoder, valueCoder);
-  }
-
-  @JsonCreator
-  public static KvCoder<?, ?> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> components) {
-    Preconditions.checkArgument(components.size() == 2,
-        "Expecting 2 components, got " + components.size());
-    return of(components.get(0), components.get(1));
-  }
-
-  public static <K, V> List<Object> getInstanceComponents(
-      KV<K, V> exampleValue) {
-    return Arrays.asList(
-        exampleValue.getKey(),
-        exampleValue.getValue());
-  }
-
-  public Coder<K> getKeyCoder() {
-    return keyCoder;
-  }
-
-  public Coder<V> getValueCoder() {
-    return valueCoder;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private final Coder<K> keyCoder;
-  private final Coder<V> valueCoder;
-
-  private KvCoder(Coder<K> keyCoder, Coder<V> valueCoder) {
-    this.keyCoder = keyCoder;
-    this.valueCoder = valueCoder;
-  }
-
-  @Override
-  public void encode(KV<K, V> kv, OutputStream outStream, Context context)
-      throws IOException, CoderException  {
-    if (kv == null) {
-      throw new CoderException("cannot encode a null KV");
-    }
-    Context nestedContext = context.nested();
-    keyCoder.encode(kv.getKey(), outStream, nestedContext);
-    valueCoder.encode(kv.getValue(), outStream, nestedContext);
-  }
-
-  @Override
-  public KV<K, V> decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    Context nestedContext = context.nested();
-    K key = keyCoder.decode(inStream, nestedContext);
-    V value = valueCoder.decode(inStream, nestedContext);
-    return KV.of(key, value);
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return Arrays.asList(keyCoder, valueCoder);
-  }
-
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    verifyDeterministic("Key coder must be deterministic", getKeyCoder());
-    verifyDeterministic("Value coder must be deterministic", getValueCoder());
-  }
-
-  @Override
-  public boolean consistentWithEquals() {
-    return keyCoder.consistentWithEquals() && valueCoder.consistentWithEquals();
-  }
-
-  @Override
-  public Object structuralValue(KV<K, V> kv) throws Exception {
-    if (consistentWithEquals()) {
-      return kv;
-    } else {
-      return KV.of(getKeyCoder().structuralValue(kv.getKey()),
-                   getValueCoder().structuralValue(kv.getValue()));
-    }
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = super.asCloudObject();
-    addBoolean(result, PropertyNames.IS_PAIR_LIKE, true);
-    return result;
-  }
-
-  /**
-   * Returns whether both keyCoder and valueCoder are considered not expensive.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(KV<K, V> kv, Context context) {
-    return keyCoder.isRegisterByteSizeObserverCheap(kv.getKey(),
-                                                    context.nested())
-        && valueCoder.isRegisterByteSizeObserverCheap(kv.getValue(),
-                                                      context.nested());
-  }
-
-  /**
-   * Notifies ElementByteSizeObserver about the byte size of the
-   * encoded value using this coder.
-   */
-  @Override
-  public void registerByteSizeObserver(
-      KV<K, V> kv, ElementByteSizeObserver observer, Context context)
-      throws Exception {
-    if (kv == null) {
-      throw new CoderException("cannot encode a null KV");
-    }
-    keyCoder.registerByteSizeObserver(
-        kv.getKey(), observer, context.nested());
-    valueCoder.registerByteSizeObserver(
-        kv.getValue(), observer, context.nested());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
deleted file mode 100644
index 4a12ee0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/KvCoderBase.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.List;
-
-/**
- * A abstract base class for KvCoder. Works around a Jackson2 bug tickled when building
- * {@link KvCoder} directly (as of this writing, Jackson2 walks off the end of
- * an array when it tries to deserialize a class with multiple generic type
- * parameters).  This class should be removed when possible.
- *
- * @param <T> the type of values being transcoded
- */
-@Deprecated
-public abstract class KvCoderBase<T> extends StandardCoder<T> {
-  /**
-   * A constructor used only for decoding from JSON.
-   *
-   * @param typeId present in the JSON encoding, but unused
-   * @param isPairLike present in the JSON encoding, but unused
-   */
-  @Deprecated
-  @JsonCreator
-  public static KvCoderBase<?> of(
-      // N.B. typeId is a required parameter here, since a field named "@type"
-      // is presented to the deserializer as an input.
-      //
-      // If this method did not consume the field, Jackson2 would observe an
-      // unconsumed field and a returned value of a derived type.  So Jackson2
-      // would attempt to update the returned value with the unconsumed field
-      // data.  The standard JsonDeserializer does not implement a mechanism for
-      // updating constructed values, so it would throw an exception, causing
-      // deserialization to fail.
-      @JsonProperty(value = "@type", required = false) String typeId,
-      @JsonProperty(value = PropertyNames.IS_PAIR_LIKE, required = false) boolean isPairLike,
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
-    return KvCoder.of(components);
-  }
-
-  protected KvCoderBase() {}
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
deleted file mode 100644
index bc74404..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/ListCoder.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.List;
-
-/**
- * A {@link Coder} for {@link List}, using the format of {@link IterableLikeCoder}.
- *
- * @param <T> the type of the elements of the Lists being transcoded
- */
-public class ListCoder<T> extends IterableLikeCoder<T, List<T>> {
-
-  public static <T> ListCoder<T> of(Coder<T> elemCoder) {
-    return new ListCoder<>(elemCoder);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal operations below here.
-
-  @Override
-  protected final List<T> decodeToIterable(List<T> decodedElements) {
-    return decodedElements;
-  }
-
-  @JsonCreator
-  public static ListCoder<?> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> components) {
-    Preconditions.checkArgument(components.size() == 1,
-        "Expecting 1 component, got " + components.size());
-    return of((Coder<?>) components.get(0));
-  }
-
-  /**
-   * Returns the first element in this list if it is non-empty,
-   * otherwise returns {@code null}.
-   */
-  public static <T> List<Object> getInstanceComponents(List<T> exampleValue) {
-    return getInstanceComponentsHelper(exampleValue);
-  }
-
-  protected ListCoder(Coder<T> elemCoder) {
-    super(elemCoder, "List");
-  }
-
-  /**
-   * List sizes are always known, so ListIterable may be deterministic while
-   * the general IterableLikeCoder is not.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    verifyDeterministic(
-        "ListCoder.elemCoder must be deterministic", getElemCoder());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
deleted file mode 100644
index b6f3103..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoder.java
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Maps;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
-/**
- * A {@link Coder} for {@link Map Maps} that encodes them according to provided
- * coders for keys and values.
- *
- * @param <K> the type of the keys of the KVs being transcoded
- * @param <V> the type of the values of the KVs being transcoded
- */
-public class MapCoder<K, V> extends MapCoderBase<Map<K, V>> {
-  /**
-   * Produces a MapCoder with the given keyCoder and valueCoder.
-   */
-  public static <K, V> MapCoder<K, V> of(
-      Coder<K> keyCoder,
-      Coder<V> valueCoder) {
-    return new MapCoder<>(keyCoder, valueCoder);
-  }
-
-  @JsonCreator
-  public static MapCoder<?, ?> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> components) {
-    Preconditions.checkArgument(components.size() == 2,
-        "Expecting 2 components, got " + components.size());
-    return of((Coder<?>) components.get(0), (Coder<?>) components.get(1));
-  }
-
-  /**
-   * Returns the key and value for an arbitrary element of this map,
-   * if it is non-empty, otherwise returns {@code null}.
-   */
-   public static <K, V> List<Object> getInstanceComponents(
-       Map<K, V> exampleValue) {
-     for (Map.Entry<K, V> entry : exampleValue.entrySet()) {
-       return Arrays.asList(entry.getKey(), entry.getValue());
-     }
-     return null;
-   }
-
-  public Coder<K> getKeyCoder() {
-    return keyCoder;
-  }
-
-  public Coder<V> getValueCoder() {
-    return valueCoder;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  Coder<K> keyCoder;
-  Coder<V> valueCoder;
-
-  MapCoder(Coder<K> keyCoder, Coder<V> valueCoder) {
-    this.keyCoder = keyCoder;
-    this.valueCoder = valueCoder;
-  }
-
-  @Override
-  public void encode(
-      Map<K, V> map,
-      OutputStream outStream,
-      Context context)
-      throws IOException, CoderException  {
-    if (map == null) {
-      throw new CoderException("cannot encode a null Map");
-    }
-    DataOutputStream dataOutStream = new DataOutputStream(outStream);
-    dataOutStream.writeInt(map.size());
-    for (Entry<K, V> entry : map.entrySet()) {
-      keyCoder.encode(entry.getKey(), outStream, context.nested());
-      valueCoder.encode(entry.getValue(), outStream, context.nested());
-    }
-    dataOutStream.flush();
-  }
-
-  @Override
-  public Map<K, V> decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    DataInputStream dataInStream = new DataInputStream(inStream);
-    int size = dataInStream.readInt();
-    Map<K, V> retval = Maps.newHashMapWithExpectedSize(size);
-    for (int i = 0; i < size; ++i) {
-      K key = keyCoder.decode(inStream, context.nested());
-      V value = valueCoder.decode(inStream, context.nested());
-      retval.put(key, value);
-    }
-    return retval;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return a {@link List} containing the key coder at index 0 at the and value coder at index 1.
-   */
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return Arrays.asList(keyCoder, valueCoder);
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException always. Not all maps have a deterministic encoding.
-   * For example, {@code HashMap} comparison does not depend on element order, so
-   * two {@code HashMap} instances may be equal but produce different encodings.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "Ordering of entries in a Map may be non-deterministic.");
-  }
-
-  @Override
-  public void registerByteSizeObserver(
-      Map<K, V> map, ElementByteSizeObserver observer, Context context)
-      throws Exception {
-    observer.update(4L);
-    for (Entry<K, V> entry : map.entrySet()) {
-      keyCoder.registerByteSizeObserver(
-          entry.getKey(), observer, context.nested());
-      valueCoder.registerByteSizeObserver(
-          entry.getValue(), observer, context.nested());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
deleted file mode 100644
index d32406c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/MapCoderBase.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.List;
-
-/**
- * A abstract base class for MapCoder. Works around a Jackson2 bug tickled when building
- * {@link MapCoder} directly (as of this writing, Jackson2 walks off the end of
- * an array when it tries to deserialize a class with multiple generic type
- * parameters).  This should be removed in favor of a better workaround.
- * @param <T> the type of values being transcoded
- */
-@Deprecated
-public abstract class MapCoderBase<T> extends StandardCoder<T> {
-  @Deprecated
-  @JsonCreator
-  public static MapCoderBase<?> of(
-      // N.B. typeId is a required parameter here, since a field named "@type"
-      // is presented to the deserializer as an input.
-      //
-      // If this method did not consume the field, Jackson2 would observe an
-      // unconsumed field and a returned value of a derived type.  So Jackson2
-      // would attempt to update the returned value with the unconsumed field
-      // data, The standard JsonDeserializer does not implement a mechanism for
-      // updating constructed values, so it would throw an exception, causing
-      // deserialization to fail.
-      @JsonProperty(value = "@type", required = false) String typeId,
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> components) {
-    return MapCoder.of(components);
-  }
-
-  protected MapCoderBase() {}
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
deleted file mode 100644
index 5598a71..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/NullableCoder.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link NullableCoder} encodes nullable values of type {@code T} using a nested
- * {@code Coder<T>} that does not tolerate {@code null} values. {@link NullableCoder} uses
- * exactly 1 byte per entry to indicate whether the value is {@code null}, then adds the encoding
- * of the inner coder for non-null values.
- *
- * @param <T> the type of the values being transcoded
- */
-public class NullableCoder<T> extends StandardCoder<T> {
-  public static <T> NullableCoder<T> of(Coder<T> valueCoder) {
-    return new NullableCoder<>(valueCoder);
-  }
-
-  @JsonCreator
-  public static NullableCoder<?> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> components) {
-    Preconditions.checkArgument(components.size() == 1,
-        "Expecting 1 components, got " + components.size());
-    return of(components.get(0));
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private final Coder<T> valueCoder;
-  private static final int ENCODE_NULL = 0;
-  private static final int ENCODE_PRESENT = 1;
-
-  private NullableCoder(Coder<T> valueCoder) {
-    this.valueCoder = valueCoder;
-  }
-
-  @Override
-  public void encode(@Nullable T value, OutputStream outStream, Context context)
-      throws IOException, CoderException  {
-    if (value == null) {
-      outStream.write(ENCODE_NULL);
-    } else {
-      outStream.write(ENCODE_PRESENT);
-      valueCoder.encode(value, outStream, context.nested());
-    }
-  }
-
-  @Override
-  @Nullable
-  public T decode(InputStream inStream, Context context) throws IOException, CoderException {
-    int b = inStream.read();
-    if (b == ENCODE_NULL) {
-      return null;
-    } else if (b != ENCODE_PRESENT) {
-        throw new CoderException(String.format(
-            "NullableCoder expects either a byte valued %s (null) or %s (present), got %s",
-            ENCODE_NULL, ENCODE_PRESENT, b));
-    }
-    return valueCoder.decode(inStream, context.nested());
-  }
-
-  @Override
-  public List<Coder<T>> getCoderArguments() {
-    return ImmutableList.of(valueCoder);
-  }
-
-  /**
-   * {@code NullableCoder} is deterministic if the nested {@code Coder} is.
-   *
-   * {@inheritDoc}
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    verifyDeterministic("Value coder must be deterministic", valueCoder);
-  }
-
-  /**
-   * {@code NullableCoder} is consistent with equals if the nested {@code Coder} is.
-   *
-   * {@inheritDoc}
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return valueCoder.consistentWithEquals();
-  }
-
-  @Override
-  public Object structuralValue(@Nullable T value) throws Exception {
-    if (value == null) {
-      return Optional.absent();
-    }
-    return Optional.of(valueCoder.structuralValue(value));
-  }
-
-  /**
-   * Overridden to short-circuit the default {@code StandardCoder} behavior of encoding and
-   * counting the bytes. The size is known (1 byte) when {@code value} is {@code null}, otherwise
-   * the size is 1 byte plus the size of nested {@code Coder}'s encoding of {@code value}.
-   *
-   * {@inheritDoc}
-   */
-  @Override
-  public void registerByteSizeObserver(
-      @Nullable T value, ElementByteSizeObserver observer, Context context) throws Exception {
-    observer.update(1);
-    if (value != null) {
-      valueCoder.registerByteSizeObserver(value, observer, context.nested());
-    }
-  }
-
-  /**
-   * Overridden to short-circuit the default {@code StandardCoder} behavior of encoding and
-   * counting the bytes. The size is known (1 byte) when {@code value} is {@code null}, otherwise
-   * the size is 1 byte plus the size of nested {@code Coder}'s encoding of {@code value}.
-   *
-   * {@inheritDoc}
-   */
-  @Override
-  protected long getEncodedElementByteSize(@Nullable T value, Context context) throws Exception {
-    if (value == null) {
-      return 1;
-    }
-
-    if (valueCoder instanceof StandardCoder) {
-      // If valueCoder is a StandardCoder then we can ask it directly for the encoded size of
-      // the value, adding 1 byte to count the null indicator.
-      return 1  + ((StandardCoder<T>) valueCoder)
-          .getEncodedElementByteSize(value, context.nested());
-    }
-
-    // If value is not a StandardCoder then fall back to the default StandardCoder behavior
-    // of encoding and counting the bytes. The encoding will include the null indicator byte.
-    return super.getEncodedElementByteSize(value, context);
-  }
-
-  /**
-   * {@code NullableCoder} is cheap if {@code valueCoder} is cheap.
-   *
-   * {@inheritDoc}
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(@Nullable T value, Context context) {
-    return valueCoder.isRegisterByteSizeObserverCheap(value, context.nested());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
deleted file mode 100644
index ef91ba9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/Proto2Coder.java
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.Structs;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-import com.google.protobuf.ExtensionRegistry;
-import com.google.protobuf.Message;
-import com.google.protobuf.Parser;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.util.Collections;
-import java.util.List;
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link Coder} using Google Protocol Buffers 2 binary format.
- *
- * <p>To learn more about Protocol Buffers, visit:
- * <a href="https://developers.google.com/protocol-buffers">https://developers.google.com/protocol-buffers</a>
- *
- * <p>To use, specify the {@link Coder} type on a PCollection containing Protocol Buffers messages.
- *
- * <pre>
- * {@code
- * PCollection<MyProto.Message> records =
- *     input.apply(...)
- *          .setCoder(Proto2Coder.of(MyProto.Message.class));
- * }
- * </pre>
- *
- * <p>Custom message extensions are also supported, but the coder must be made
- * aware of them explicitly:
- *
- * <pre>
- * {@code
- * PCollection<MyProto.Message> records =
- *     input.apply(...)
- *          .setCoder(Proto2Coder.of(MyProto.Message.class)
- *              .addExtensionsFrom(MyProto.class));
- * }
- * </pre>
- *
- * @param <T> the type of elements handled by this coder, must extend {@code Message}
- * @deprecated Use {@link ProtoCoder}.
- */
-@Deprecated
-public class Proto2Coder<T extends Message> extends AtomicCoder<T> {
-
-  /** The class of Protobuf message to be encoded. */
-  private final Class<T> protoMessageClass;
-
-  /**
-   * All extension host classes included in this Proto2Coder. The extensions from
-   * these classes will be included in the {@link ExtensionRegistry} used during
-   * encoding and decoding.
-   */
-  private final List<Class<?>> extensionHostClasses;
-
-  private Proto2Coder(Class<T> protoMessageClass, List<Class<?>> extensionHostClasses) {
-    this.protoMessageClass = protoMessageClass;
-    this.extensionHostClasses = extensionHostClasses;
-  }
-
-  private static final CoderProvider PROVIDER =
-      new CoderProvider() {
-        @Override
-        public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
-          if (type.isSubtypeOf(new TypeDescriptor<Message>() {})) {
-            @SuppressWarnings("unchecked")
-            TypeDescriptor<? extends Message> messageType =
-                (TypeDescriptor<? extends Message>) type;
-            @SuppressWarnings("unchecked")
-            Coder<T> coder = (Coder<T>) Proto2Coder.of(messageType);
-            return coder;
-          } else {
-            throw new CannotProvideCoderException(
-                String.format(
-                    "Cannot provide Proto2Coder because %s "
-                        + "is not a subclass of protocol buffer Messsage",
-                    type));
-          }
-        }
-      };
-
-  public static CoderProvider coderProvider() {
-    return PROVIDER;
-  }
-
-  /**
-   * Returns a {@code Proto2Coder} for the given Protobuf message class.
-   */
-  public static <T extends Message> Proto2Coder<T> of(Class<T> protoMessageClass) {
-    return new Proto2Coder<T>(protoMessageClass, Collections.<Class<?>>emptyList());
-  }
-
-  /**
-   * Returns a {@code Proto2Coder} for the given Protobuf message class.
-   */
-  public static <T extends Message> Proto2Coder<T> of(TypeDescriptor<T> protoMessageType) {
-    @SuppressWarnings("unchecked")
-    Class<T> protoMessageClass = (Class<T>) protoMessageType.getRawType();
-    return of(protoMessageClass);
-  }
-
-  /**
-   * Produces a {@code Proto2Coder} like this one, but with the extensions from
-   * the given classes registered.
-   *
-   * @param moreExtensionHosts an iterable of classes that define a static
-   *      method {@code registerAllExtensions(ExtensionRegistry)}
-   */
-  public Proto2Coder<T> withExtensionsFrom(Iterable<Class<?>> moreExtensionHosts) {
-    for (Class<?> extensionHost : moreExtensionHosts) {
-      // Attempt to access the required method, to make sure it's present.
-      try {
-        Method registerAllExtensions =
-            extensionHost.getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
-        checkArgument(
-            Modifier.isStatic(registerAllExtensions.getModifiers()),
-            "Method registerAllExtensions() must be static for use with Proto2Coder");
-      } catch (NoSuchMethodException | SecurityException e) {
-        throw new IllegalArgumentException(e);
-      }
-    }
-
-    return new Proto2Coder<T>(
-        protoMessageClass,
-        new ImmutableList.Builder<Class<?>>()
-            .addAll(extensionHostClasses)
-            .addAll(moreExtensionHosts)
-            .build());
-  }
-
-  /**
-   * See {@link #withExtensionsFrom(Iterable)}.
-   */
-  public Proto2Coder<T> withExtensionsFrom(Class<?>... extensionHosts) {
-    return withExtensionsFrom(ImmutableList.copyOf(extensionHosts));
-  }
-
-  /**
-   * Adds custom Protobuf extensions to the coder. Returns {@code this}
-   * for method chaining.
-   *
-   * @param extensionHosts must be a class that defines a static
-   *      method name {@code registerAllExtensions}
-   * @deprecated use {@link #withExtensionsFrom}
-   */
-  @Deprecated
-  public Proto2Coder<T> addExtensionsFrom(Class<?>... extensionHosts) {
-    return addExtensionsFrom(ImmutableList.copyOf(extensionHosts));
-  }
-
-  /**
-   * Adds custom Protobuf extensions to the coder. Returns {@code this}
-   * for method chaining.
-   *
-   * @param extensionHosts must be a class that defines a static
-   *      method name {@code registerAllExtensions}
-   * @deprecated use {@link #withExtensionsFrom}
-   */
-  @Deprecated
-  public Proto2Coder<T> addExtensionsFrom(Iterable<Class<?>> extensionHosts) {
-    for (Class<?> extensionHost : extensionHosts) {
-      try {
-        // Attempt to access the declared method, to make sure it's present.
-        extensionHost.getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class);
-      } catch (NoSuchMethodException e) {
-        throw new IllegalArgumentException(e);
-      }
-      extensionHostClasses.add(extensionHost);
-    }
-    // The memoized extension registry needs to be recomputed because we have mutated this object.
-    synchronized (this) {
-      memoizedExtensionRegistry = null;
-      getExtensionRegistry();
-    }
-    return this;
-  }
-
-  @Override
-  public void encode(T value, OutputStream outStream, Context context) throws IOException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null " + protoMessageClass.getSimpleName());
-    }
-    if (context.isWholeStream) {
-      value.writeTo(outStream);
-    } else {
-      value.writeDelimitedTo(outStream);
-    }
-  }
-
-  @Override
-  public T decode(InputStream inStream, Context context) throws IOException {
-    if (context.isWholeStream) {
-      return getParser().parseFrom(inStream, getExtensionRegistry());
-    } else {
-      return getParser().parseDelimitedFrom(inStream, getExtensionRegistry());
-    }
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-    if (!(other instanceof Proto2Coder)) {
-      return false;
-    }
-    Proto2Coder<?> otherCoder = (Proto2Coder<?>) other;
-    return protoMessageClass.equals(otherCoder.protoMessageClass)
-        && Sets.newHashSet(extensionHostClasses)
-            .equals(Sets.newHashSet(otherCoder.extensionHostClasses));
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(protoMessageClass, extensionHostClasses);
-  }
-
-  /**
-   * The encoding identifier is designed to support evolution as per the design of Protocol
-   * Buffers. In order to use this class effectively, carefully follow the advice in the Protocol
-   * Buffers documentation at
-   * <a href="https://developers.google.com/protocol-buffers/docs/proto#updating">Updating
-   * A Message Type</a>.
-   *
-   * <p>In particular, the encoding identifier is guaranteed to be the same for {@code Proto2Coder}
-   * instances of the same principal message class, and otherwise distinct. Loaded extensions do not
-   * affect the id, nor does it encode the full schema.
-   *
-   * <p>When modifying a message class, here are the broadest guidelines; see the above link
-   * for greater detail.
-   *
-   * <ul>
-   * <li>Do not change the numeric tags for any fields.
-   * <li>Never remove a <code>required</code> field.
-   * <li>Only add <code>optional</code> or <code>repeated</code> fields, with sensible defaults.
-   * <li>When changing the type of a field, consult the Protocol Buffers documentation to ensure
-   * the new and old types are interchangeable.
-   * </ul>
-   *
-   * <p>Code consuming this message class should be prepared to support <i>all</i> versions of
-   * the class until it is certain that no remaining serialized instances exist.
-   *
-   * <p>If backwards incompatible changes must be made, the best recourse is to change the name
-   * of your Protocol Buffers message class.
-   */
-  @Override
-  public String getEncodingId() {
-    return protoMessageClass.getName();
-  }
-
-  private transient Parser<T> memoizedParser;
-
-  private Parser<T> getParser() {
-    if (memoizedParser == null) {
-      try {
-        @SuppressWarnings("unchecked")
-        T protoMessageInstance = (T) protoMessageClass.getMethod("getDefaultInstance").invoke(null);
-        @SuppressWarnings("unchecked")
-        Parser<T> tParser = (Parser<T>) protoMessageInstance.getParserForType();
-        memoizedParser = tParser;
-      } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
-        throw new IllegalArgumentException(e);
-      }
-    }
-    return memoizedParser;
-  }
-
-  private transient ExtensionRegistry memoizedExtensionRegistry;
-
-  private synchronized ExtensionRegistry getExtensionRegistry() {
-    if (memoizedExtensionRegistry == null) {
-      ExtensionRegistry registry = ExtensionRegistry.newInstance();
-      for (Class<?> extensionHost : extensionHostClasses) {
-        try {
-          extensionHost
-              .getDeclaredMethod("registerAllExtensions", ExtensionRegistry.class)
-              .invoke(null, registry);
-        } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
-          throw new IllegalStateException(e);
-        }
-      }
-      memoizedExtensionRegistry = registry.getUnmodifiable();
-    }
-    return memoizedExtensionRegistry;
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////
-  // JSON Serialization details below
-
-  private static final String PROTO_MESSAGE_CLASS = "proto_message_class";
-  private static final String PROTO_EXTENSION_HOSTS = "proto_extension_hosts";
-
-  /**
-   * Constructor for JSON deserialization only.
-   */
-  @JsonCreator
-  public static <T extends Message> Proto2Coder<T> of(
-      @JsonProperty(PROTO_MESSAGE_CLASS) String protoMessageClassName,
-      @Nullable @JsonProperty(PROTO_EXTENSION_HOSTS) List<String> extensionHostClassNames) {
-
-    try {
-      @SuppressWarnings("unchecked")
-      Class<T> protoMessageClass = (Class<T>) Class.forName(protoMessageClassName);
-      List<Class<?>> extensionHostClasses = Lists.newArrayList();
-      if (extensionHostClassNames != null) {
-        for (String extensionHostClassName : extensionHostClassNames) {
-          extensionHostClasses.add(Class.forName(extensionHostClassName));
-        }
-      }
-      return of(protoMessageClass).withExtensionsFrom(extensionHostClasses);
-    } catch (ClassNotFoundException e) {
-      throw new IllegalArgumentException(e);
-    }
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = super.asCloudObject();
-    Structs.addString(result, PROTO_MESSAGE_CLASS, protoMessageClass.getName());
-    List<CloudObject> extensionHostClassNames = Lists.newArrayList();
-    for (Class<?> clazz : extensionHostClasses) {
-      extensionHostClassNames.add(CloudObject.forString(clazz.getName()));
-    }
-    Structs.addList(result, PROTO_EXTENSION_HOSTS, extensionHostClassNames);
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
deleted file mode 100644
index 593c9f0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SerializableCoder.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.ObjectStreamClass;
-import java.io.OutputStream;
-import java.io.Serializable;
-
-/**
- * A {@link Coder} for Java classes that implement {@link Serializable}.
- *
- * <p>To use, specify the coder type on a PCollection:
- * <pre>
- * {@code
- *   PCollection<MyRecord> records =
- *       foo.apply(...).setCoder(SerializableCoder.of(MyRecord.class));
- * }
- * </pre>
- *
- * <p>{@link SerializableCoder} does not guarantee a deterministic encoding, as Java
- * serialization may produce different binary encodings for two equivalent
- * objects.
- *
- * @param <T> the type of elements handled by this coder
- */
-public class SerializableCoder<T extends Serializable> extends AtomicCoder<T> {
-
-  /**
-   * Returns a {@link SerializableCoder} instance for the provided element type.
-   * @param <T> the element type
-   */
-  public static <T extends Serializable> SerializableCoder<T> of(TypeDescriptor<T> type) {
-    @SuppressWarnings("unchecked")
-    Class<T> clazz = (Class<T>) type.getRawType();
-    return of(clazz);
-  }
-
-  /**
-   * Returns a {@link SerializableCoder} instance for the provided element class.
-   * @param <T> the element type
-   */
-  public static <T extends Serializable> SerializableCoder<T> of(Class<T> clazz) {
-    return new SerializableCoder<>(clazz);
-  }
-
-  @JsonCreator
-  @SuppressWarnings("unchecked")
-  public static SerializableCoder<?> of(@JsonProperty("type") String classType)
-      throws ClassNotFoundException {
-    Class<?> clazz = Class.forName(classType);
-    if (!Serializable.class.isAssignableFrom(clazz)) {
-      throw new ClassNotFoundException(
-          "Class " + classType + " does not implement Serializable");
-    }
-    return of((Class<? extends Serializable>) clazz);
-  }
-
-  /**
-   * A {@link CoderProvider} that constructs a {@link SerializableCoder}
-   * for any class that implements serializable.
-   */
-  public static final CoderProvider PROVIDER = new CoderProvider() {
-    @Override
-    public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor)
-        throws CannotProvideCoderException {
-      Class<?> clazz = typeDescriptor.getRawType();
-      if (Serializable.class.isAssignableFrom(clazz)) {
-        @SuppressWarnings("unchecked")
-        Class<? extends Serializable> serializableClazz =
-            (Class<? extends Serializable>) clazz;
-        @SuppressWarnings("unchecked")
-        Coder<T> coder = (Coder<T>) SerializableCoder.of(serializableClazz);
-        return coder;
-      } else {
-        throw new CannotProvideCoderException(
-            "Cannot provide SerializableCoder because " + typeDescriptor
-            + " does not implement Serializable");
-      }
-    }
-  };
-
-
-  private final Class<T> type;
-
-  protected SerializableCoder(Class<T> type) {
-    this.type = type;
-  }
-
-  public Class<T> getRecordType() {
-    return type;
-  }
-
-  @Override
-  public void encode(T value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    try {
-      ObjectOutputStream oos = new ObjectOutputStream(outStream);
-      oos.writeObject(value);
-      oos.flush();
-    } catch (IOException exn) {
-      throw new CoderException("unable to serialize record " + value, exn);
-    }
-  }
-
-  @Override
-  public T decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    try {
-      ObjectInputStream ois = new ObjectInputStream(inStream);
-      return type.cast(ois.readObject());
-    } catch (ClassNotFoundException e) {
-      throw new CoderException("unable to deserialize record", e);
-    }
-  }
-
-  @Override
-  public String getEncodingId() {
-    return String.format("%s:%s",
-        type.getName(),
-        ObjectStreamClass.lookup(type).getSerialVersionUID());
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = super.asCloudObject();
-    result.put("type", type.getName());
-    return result;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException always. Java serialization is not
-   *         deterministic with respect to {@link Object#equals} for all types.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "Java Serialization may be non-deterministic.");
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (getClass() != other.getClass()) {
-      return false;
-    }
-    return type == ((SerializableCoder<?>) other).type;
-  }
-
-  @Override
-  public int hashCode() {
-    return type.hashCode();
-  }
-
-  // This coder inherits isRegisterByteSizeObserverCheap,
-  // getEncodedElementByteSize and registerByteSizeObserver
-  // from StandardCoder. Looks like we cannot do much better
-  // in this case.
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
deleted file mode 100644
index 36b3606..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/SetCoder.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-/**
- * A {@link SetCoder} encodes any {@link Set} using the format of {@link IterableLikeCoder}. The
- * elements may not be in a deterministic order, depending on the {@code Set} implementation.
- *
- * @param <T> the type of the elements of the set
- */
-public class SetCoder<T> extends IterableLikeCoder<T, Set<T>> {
-
-  /**
-   * Produces a {@link SetCoder} with the given {@code elementCoder}.
-   */
-  public static <T> SetCoder<T> of(Coder<T> elementCoder) {
-    return new SetCoder<>(elementCoder);
-  }
-
-  /**
-   * Dynamically typed constructor for JSON deserialization.
-   */
-  @JsonCreator
-  public static SetCoder<?> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Object> components) {
-    Preconditions.checkArgument(components.size() == 1,
-        "Expecting 1 component, got " + components.size());
-    return of((Coder<?>) components.get(0));
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException always. Sets are not ordered, but
-   *         they are encoded in the order of an arbitrary iteration.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "Ordering of elements in a set may be non-deterministic.");
-  }
-
-  /**
-   * Returns the first element in this set if it is non-empty,
-   * otherwise returns {@code null}.
-   */
-  public static <T> List<Object> getInstanceComponents(
-      Set<T> exampleValue) {
-    return getInstanceComponentsHelper(exampleValue);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Internal operations below here.
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return A new {@link Set} built from the elements in the {@link List} decoded by
-   * {@link IterableLikeCoder}.
-   */
-  @Override
-  protected final Set<T> decodeToIterable(List<T> decodedElements) {
-    return new HashSet<>(decodedElements);
-  }
-
-  protected SetCoder(Coder<T> elemCoder) {
-    super(elemCoder, "Set");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
deleted file mode 100644
index faa9861..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StandardCoder.java
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addList;
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-import static com.google.cloud.dataflow.sdk.util.Structs.addStringList;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.common.collect.Lists;
-import com.google.common.io.ByteStreams;
-import com.google.common.io.CountingOutputStream;
-
-import java.io.ByteArrayOutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * An abstract base class to implement a {@link Coder} that defines equality, hashing, and printing
- * via the class name and recursively using {@link #getComponents}.
- *
- * <p>To extend {@link StandardCoder}, override the following methods as appropriate:
- *
- * <ul>
- *   <li>{@link #getComponents}: the default implementation returns {@link #getCoderArguments}.</li>
- *   <li>{@link #getEncodedElementByteSize} and
- *       {@link #isRegisterByteSizeObserverCheap}: the
- *       default implementation encodes values to bytes and counts the bytes, which is considered
- *       expensive.</li>
- *   <li>{@link #getEncodingId} and {@link #getAllowedEncodings}: by default, the encoding id
- *       is the empty string, so only the canonical name of the subclass will be used for
- *       compatibility checks, and no other encoding ids are allowed.</li>
- * </ul>
- */
-public abstract class StandardCoder<T> implements Coder<T> {
-  protected StandardCoder() {}
-
-  @Override
-  public String getEncodingId() {
-    return "";
-  }
-
-  @Override
-  public Collection<String> getAllowedEncodings() {
-    return Collections.emptyList();
-  }
-
-  /**
-   * Returns the list of {@link Coder Coders} that are components of this {@link Coder}.
-   */
-  public List<? extends Coder<?>> getComponents() {
-    List<? extends Coder<?>> coderArguments = getCoderArguments();
-    if (coderArguments == null) {
-      return Collections.emptyList();
-    } else {
-      return coderArguments;
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true} if the two {@link StandardCoder} instances have the
-   * same class and equal components.
-   */
-  @Override
-  public boolean equals(Object o) {
-    if (o == null || this.getClass() != o.getClass()) {
-      return false;
-    }
-    StandardCoder<?> that = (StandardCoder<?>) o;
-    return this.getComponents().equals(that.getComponents());
-  }
-
-  @Override
-  public int hashCode() {
-    return getClass().hashCode() * 31 + getComponents().hashCode();
-  }
-
-  @Override
-  public String toString() {
-    String s = getClass().getName();
-    s = s.substring(s.lastIndexOf('.') + 1);
-    List<? extends Coder<?>> componentCoders = getComponents();
-    if (!componentCoders.isEmpty()) {
-      s += "(";
-      boolean first = true;
-      for (Coder<?> componentCoder : componentCoders) {
-        if (first) {
-          first = false;
-        } else {
-          s += ", ";
-        }
-        s += componentCoder.toString();
-      }
-      s += ")";
-    }
-    return s;
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = CloudObject.forClass(getClass());
-
-    List<? extends Coder<?>> components = getComponents();
-    if (!components.isEmpty()) {
-      List<CloudObject> cloudComponents = new ArrayList<>(components.size());
-      for (Coder<?> coder : components) {
-        cloudComponents.add(coder.asCloudObject());
-      }
-      addList(result, PropertyNames.COMPONENT_ENCODINGS, cloudComponents);
-    }
-
-    String encodingId = getEncodingId();
-    checkNotNull(encodingId, "Coder.getEncodingId() must not return null.");
-    if (!encodingId.isEmpty()) {
-      addString(result, PropertyNames.ENCODING_ID, encodingId);
-    }
-
-    Collection<String> allowedEncodings = getAllowedEncodings();
-    if (!allowedEncodings.isEmpty()) {
-      addStringList(result, PropertyNames.ALLOWED_ENCODINGS, Lists.newArrayList(allowedEncodings));
-    }
-
-    return result;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code false} unless it is overridden. {@link StandardCoder#registerByteSizeObserver}
-   *         invokes {@link #getEncodedElementByteSize} which requires re-encoding an element
-   *         unless it is overridden. This is considered expensive.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(T value, Context context) {
-    return false;
-  }
-
-  /**
-   * Returns the size in bytes of the encoded value using this coder.
-   */
-  protected long getEncodedElementByteSize(T value, Context context)
-      throws Exception {
-    try {
-      CountingOutputStream os = new CountingOutputStream(ByteStreams.nullOutputStream());
-      encode(value, os, context);
-      return os.getCount();
-    } catch (Exception exn) {
-      throw new IllegalArgumentException(
-          "Unable to encode element '" + value + "' with coder '" + this + "'.", exn);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * <p>For {@link StandardCoder} subclasses, this notifies {@code observer} about the byte size
-   * of the encoded value using this coder as returned by {@link #getEncodedElementByteSize}.
-   */
-  @Override
-  public void registerByteSizeObserver(
-      T value, ElementByteSizeObserver observer, Context context)
-      throws Exception {
-    observer.update(getEncodedElementByteSize(value, context));
-  }
-
-  protected void verifyDeterministic(String message, Iterable<Coder<?>> coders)
-      throws NonDeterministicException {
-    for (Coder<?> coder : coders) {
-      try {
-        coder.verifyDeterministic();
-      } catch (NonDeterministicException e) {
-        throw new NonDeterministicException(this, message, e);
-      }
-    }
-  }
-
-  protected void verifyDeterministic(String message, Coder<?>... coders)
-      throws NonDeterministicException {
-    verifyDeterministic(message, Arrays.asList(coders));
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code false} for {@link StandardCoder} unless overridden.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return false;
-  }
-
-  @Override
-  public Object structuralValue(T value) throws Exception {
-    if (value != null && consistentWithEquals()) {
-      return value;
-    } else {
-      try {
-        ByteArrayOutputStream os = new ByteArrayOutputStream();
-        encode(value, os, Context.OUTER);
-        return new StructuralByteArray(os.toByteArray());
-      } catch (Exception exn) {
-        throw new IllegalArgumentException(
-            "Unable to encode element '" + value + "' with coder '" + this + "'.", exn);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
deleted file mode 100644
index 1fc1247..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringDelegateCoder.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.coders.protobuf.ProtoCoder;
-
-import java.lang.reflect.InvocationTargetException;
-
-/**
- * A {@link Coder} that wraps a {@code Coder<String>}
- * and encodes/decodes values via string representations.
- *
- * <p>To decode, the input byte stream is decoded to
- * a {@link String}, and this is passed to the single-argument
- * constructor for {@code T}.
- *
- * <p>To encode, the input value is converted via {@code toString()},
- * and this string is encoded.
- *
- * <p>In order for this to operate correctly for a class {@code Clazz},
- * it must be the case for any instance {@code x} that
- * {@code x.equals(new Clazz(x.toString()))}.
- *
- * <p>This method of encoding is not designed for ease of evolution of {@code Clazz};
- * it should only be used in cases where the class is stable or the encoding is not
- * important. If evolution of the class is important, see {@link ProtoCoder}, {@link AvroCoder},
- * or {@link JAXBCoder}.
- *
- * @param <T> The type of objects coded.
- */
-public class StringDelegateCoder<T> extends DelegateCoder<T, String> {
-  public static <T> StringDelegateCoder<T> of(Class<T> clazz) {
-    return new StringDelegateCoder<T>(clazz);
-  }
-
-  @Override
-  public String toString() {
-    return "StringDelegateCoder(" + clazz + ")";
-  }
-
-  private final Class<T> clazz;
-
-  protected StringDelegateCoder(final Class<T> clazz) {
-    super(StringUtf8Coder.of(),
-      new CodingFunction<T, String>() {
-        @Override
-        public String apply(T input) {
-          return input.toString();
-        }
-      },
-      new CodingFunction<String, T>() {
-        @Override
-        public T apply(String input) throws
-            NoSuchMethodException,
-            InstantiationException,
-            IllegalAccessException,
-            InvocationTargetException {
-          return clazz.getConstructor(String.class).newInstance(input);
-        }
-      });
-
-    this.clazz = clazz;
-  }
-
-  /**
-   * The encoding id is the fully qualified name of the encoded/decoded class.
-   */
-  @Override
-  public String getEncodingId() {
-    return clazz.getName();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
deleted file mode 100644
index 179840c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StringUtf8Coder.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
-import com.google.cloud.dataflow.sdk.util.StreamUtils;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.common.base.Utf8;
-import com.google.common.io.ByteStreams;
-import com.google.common.io.CountingOutputStream;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UTFDataFormatException;
-import java.nio.charset.StandardCharsets;
-
-/**
- * A {@link Coder} that encodes {@link String Strings} in UTF-8 encoding.
- * If in a nested context, prefixes the string with an integer length field,
- * encoded via a {@link VarIntCoder}.
- */
-public class StringUtf8Coder extends AtomicCoder<String> {
-
-  @JsonCreator
-  public static StringUtf8Coder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final StringUtf8Coder INSTANCE = new StringUtf8Coder();
-
-  private static void writeString(String value, DataOutputStream dos)
-      throws IOException {
-    byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
-    VarInt.encode(bytes.length, dos);
-    dos.write(bytes);
-  }
-
-  private static String readString(DataInputStream dis) throws IOException {
-    int len = VarInt.decodeInt(dis);
-    if (len < 0) {
-      throw new CoderException("Invalid encoded string length: " + len);
-    }
-    byte[] bytes = new byte[len];
-    dis.readFully(bytes);
-    return new String(bytes, StandardCharsets.UTF_8);
-  }
-
-  private StringUtf8Coder() {}
-
-  @Override
-  public void encode(String value, OutputStream outStream, Context context)
-      throws IOException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null String");
-    }
-    if (context.isWholeStream) {
-      byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
-      if (outStream instanceof ExposedByteArrayOutputStream) {
-        ((ExposedByteArrayOutputStream) outStream).writeAndOwn(bytes);
-      } else {
-        outStream.write(bytes);
-      }
-    } else {
-      writeString(value, new DataOutputStream(outStream));
-    }
-  }
-
-  @Override
-  public String decode(InputStream inStream, Context context)
-      throws IOException {
-    if (context.isWholeStream) {
-      byte[] bytes = StreamUtils.getBytes(inStream);
-      return new String(bytes, StandardCharsets.UTF_8);
-    } else {
-      try {
-        return readString(new DataInputStream(inStream));
-      } catch (EOFException | UTFDataFormatException exn) {
-        // These exceptions correspond to decoding problems, so change
-        // what kind of exception they're branded as.
-        throw new CoderException(exn);
-      }
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. This coder is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return the byte size of the UTF-8 encoding of the a string or, in a nested context,
-   * the byte size of the encoding plus the encoded length prefix.
-   */
-  @Override
-  protected long getEncodedElementByteSize(String value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null String");
-    }
-    if (context.isWholeStream) {
-      return Utf8.encodedLength(value);
-    } else {
-      CountingOutputStream countingStream =
-          new CountingOutputStream(ByteStreams.nullOutputStream());
-      DataOutputStream stream = new DataOutputStream(countingStream);
-      writeString(value, stream);
-      return countingStream.getCount();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java
deleted file mode 100644
index ea18eb9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/StructuralByteArray.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.api.client.util.Base64.encodeBase64String;
-
-import java.util.Arrays;
-
-/**
- * A wrapper around a byte[] that uses structural, value-based
- * equality rather than byte[]'s normal object identity.
- */
-public class StructuralByteArray {
-  byte[] value;
-
-  public StructuralByteArray(byte[] value) {
-    this.value = value;
-  }
-
-  public byte[] getValue() {
-    return value;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (o instanceof StructuralByteArray) {
-      StructuralByteArray that = (StructuralByteArray) o;
-      return Arrays.equals(this.value, that.value);
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public int hashCode() {
-    return Arrays.hashCode(value);
-  }
-
-  @Override
-  public String toString() {
-    return "base64:" + encodeBase64String(value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
deleted file mode 100644
index bed88b0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TableRowJsonCoder.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.api.services.bigquery.model.TableRow;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.SerializationFeature;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} that encodes BigQuery {@link TableRow} objects in their native JSON format.
- */
-public class TableRowJsonCoder extends AtomicCoder<TableRow> {
-
-  @JsonCreator
-  public static TableRowJsonCoder of() {
-    return INSTANCE;
-  }
-
-  @Override
-  public void encode(TableRow value, OutputStream outStream, Context context)
-      throws IOException {
-    String strValue = MAPPER.writeValueAsString(value);
-    StringUtf8Coder.of().encode(strValue, outStream, context);
-  }
-
-  @Override
-  public TableRow decode(InputStream inStream, Context context)
-      throws IOException {
-    String strValue = StringUtf8Coder.of().decode(inStream, context);
-    return MAPPER.readValue(strValue, TableRow.class);
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(TableRow value, Context context)
-      throws Exception {
-    String strValue = MAPPER.writeValueAsString(value);
-    return StringUtf8Coder.of().getEncodedElementByteSize(strValue, context);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  // FAIL_ON_EMPTY_BEANS is disabled in order to handle null values in
-  // TableRow.
-  private static final ObjectMapper MAPPER =
-      new ObjectMapper().disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
-
-  private static final TableRowJsonCoder INSTANCE = new TableRowJsonCoder();
-
-  private TableRowJsonCoder() { }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws NonDeterministicException always. A {@link TableRow} can hold arbitrary
-   *         {@link Object} instances, which makes the encoding non-deterministic.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    throw new NonDeterministicException(this,
-        "TableCell can hold arbitrary instances, which may be non-deterministic.");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
deleted file mode 100644
index 9250c68..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/TextualIntegerCoder.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link Coder} that encodes {@code Integer Integers} as the ASCII bytes of
- * their textual, decimal, representation.
- */
-public class TextualIntegerCoder extends AtomicCoder<Integer> {
-
-  @JsonCreator
-  public static TextualIntegerCoder of() {
-    return new TextualIntegerCoder();
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  protected TextualIntegerCoder() {}
-
-  @Override
-  public void encode(Integer value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Integer");
-    }
-    String textualValue = value.toString();
-    StringUtf8Coder.of().encode(textualValue, outStream, context);
-  }
-
-  @Override
-  public Integer decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    String textualValue = StringUtf8Coder.of().decode(inStream, context);
-    try {
-      return Integer.valueOf(textualValue);
-    } catch (NumberFormatException exn) {
-      throw new CoderException("error when decoding a textual integer", exn);
-    }
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(Integer value, Context context) throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Integer");
-    }
-    String textualValue = value.toString();
-    return StringUtf8Coder.of().getEncodedElementByteSize(textualValue, context);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
deleted file mode 100644
index 18ec250..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarIntCoder.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.VarInt;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UTFDataFormatException;
-
-/**
- * A {@link Coder} that encodes {@link Integer Integers} using between 1 and 5 bytes. Negative
- * numbers always take 5 bytes, so {@link BigEndianIntegerCoder} may be preferable for
- * integers that are known to often be large or negative.
- */
-public class VarIntCoder extends AtomicCoder<Integer> {
-
-  @JsonCreator
-  public static VarIntCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final VarIntCoder INSTANCE =
-      new VarIntCoder();
-
-  private VarIntCoder() {}
-
-  @Override
-  public void encode(Integer value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Integer");
-    }
-    VarInt.encode(value.intValue(), outStream);
-  }
-
-  @Override
-  public Integer decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    try {
-      return VarInt.decodeInt(inStream);
-    } catch (EOFException | UTFDataFormatException exn) {
-      // These exceptions correspond to decoding problems, so change
-      // what kind of exception they're branded as.
-      throw new CoderException(exn);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. {@link VarIntCoder} is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. {@link #getEncodedElementByteSize} is cheap.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Integer value, Context context) {
-    return true;
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(Integer value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Integer");
-    }
-    return VarInt.getLength(value.longValue());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
deleted file mode 100644
index 520245e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/VarLongCoder.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.util.VarInt;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.UTFDataFormatException;
-
-/**
- * A {@link Coder} that encodes {@link Long Longs} using between 1 and 10 bytes. Negative
- * numbers always take 10 bytes, so {@link BigEndianLongCoder} may be preferable for
- * longs that are known to often be large or negative.
- */
-public class VarLongCoder extends AtomicCoder<Long> {
-
-  @JsonCreator
-  public static VarLongCoder of() {
-    return INSTANCE;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private static final VarLongCoder INSTANCE = new VarLongCoder();
-
-  private VarLongCoder() {}
-
-  @Override
-  public void encode(Long value, OutputStream outStream, Context context)
-      throws IOException, CoderException {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Long");
-    }
-    VarInt.encode(value.longValue(), outStream);
-  }
-
-  @Override
-  public Long decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    try {
-      return VarInt.decodeLong(inStream);
-    } catch (EOFException | UTFDataFormatException exn) {
-      // These exceptions correspond to decoding problems, so change
-      // what kind of exception they're branded as.
-      throw new CoderException(exn);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. {@link VarLongCoder} is injective.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return true;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true}. {@link #getEncodedElementByteSize} is cheap.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(Long value, Context context) {
-    return true;
-  }
-
-  @Override
-  protected long getEncodedElementByteSize(Long value, Context context)
-      throws Exception {
-    if (value == null) {
-      throw new CoderException("cannot encode a null Long");
-    }
-    return VarInt.getLength(value.longValue());
-  }
-}

[41/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
deleted file mode 100644
index cde8769..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Read.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PInput;
-
-import org.joda.time.Duration;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link PTransform} for reading from a {@link Source}.
- *
- * <p>Usage example:
- * <pre>
- * Pipeline p = Pipeline.create();
- * p.apply(Read.from(new MySource().withFoo("foo").withBar("bar"))
- *             .named("foobar"));
- * </pre>
- */
-public class Read {
-  /**
-   * Returns a new {@code Read} {@code PTransform} builder with the given name.
-   */
-  public static Builder named(String name) {
-    return new Builder(name);
-  }
-
-  /**
-   * Returns a new {@code Read.Bounded} {@code PTransform} reading from the given
-   * {@code BoundedSource}.
-   */
-  public static <T> Bounded<T> from(BoundedSource<T> source) {
-    return new Bounded<>(null, source);
-  }
-
-  /**
-   * Returns a new {@code Read.Unbounded} {@code PTransform} reading from the given
-   * {@code UnboundedSource}.
-   */
-  public static <T> Unbounded<T> from(UnboundedSource<T, ?> source) {
-    return new Unbounded<>(null, source);
-  }
-
-  /**
-   * Helper class for building {@code Read} transforms.
-   */
-  public static class Builder {
-    private final String name;
-
-    private Builder(String name) {
-      this.name = name;
-    }
-
-    /**
-     * Returns a new {@code Read.Bounded} {@code PTransform} reading from the given
-     * {@code BoundedSource}.
-     */
-    public <T> Bounded<T> from(BoundedSource<T> source) {
-      return new Bounded<>(name, source);
-    }
-
-    /**
-     * Returns a new {@code Read.Unbounded} {@code PTransform} reading from the given
-     * {@code UnboundedSource}.
-     */
-    public <T> Unbounded<T> from(UnboundedSource<T, ?> source) {
-      return new Unbounded<>(name, source);
-    }
-  }
-
-  /**
-   * {@link PTransform} that reads from a {@link BoundedSource}.
-   */
-  public static class Bounded<T> extends PTransform<PInput, PCollection<T>> {
-    private final BoundedSource<T> source;
-
-    private Bounded(@Nullable String name, BoundedSource<T> source) {
-      super(name);
-      this.source = SerializableUtils.ensureSerializable(source);
-    }
-
-    /**
-     * Returns a new {@code Bounded} {@code PTransform} that's like this one but
-     * has the given name.
-     *
-     * <p>Does not modify this object.
-     */
-    public Bounded<T> named(String name) {
-      return new Bounded<T>(name, source);
-    }
-
-    @Override
-    protected Coder<T> getDefaultOutputCoder() {
-      return source.getDefaultOutputCoder();
-    }
-
-    @Override
-    public final PCollection<T> apply(PInput input) {
-      source.validate();
-
-      return PCollection.<T>createPrimitiveOutputInternal(input.getPipeline(),
-          WindowingStrategy.globalDefault(), IsBounded.BOUNDED)
-          .setCoder(getDefaultOutputCoder());
-    }
-
-    /**
-     * Returns the {@code BoundedSource} used to create this {@code Read} {@code PTransform}.
-     */
-    public BoundedSource<T> getSource() {
-      return source;
-    }
-
-    @Override
-    public String getKindString() {
-      return "Read(" + approximateSimpleName(source.getClass()) + ")";
-    }
-
-    static {
-      registerDefaultTransformEvaluator();
-    }
-
-    @SuppressWarnings({"rawtypes", "unchecked"})
-    private static void registerDefaultTransformEvaluator() {
-      DirectPipelineRunner.registerDefaultTransformEvaluator(
-          Bounded.class,
-          new DirectPipelineRunner.TransformEvaluator<Bounded>() {
-            @Override
-            public void evaluate(
-                Bounded transform, DirectPipelineRunner.EvaluationContext context) {
-              evaluateReadHelper(transform, context);
-            }
-
-            private <T> void evaluateReadHelper(
-                Read.Bounded<T> transform, DirectPipelineRunner.EvaluationContext context) {
-              try {
-                List<DirectPipelineRunner.ValueWithMetadata<T>> output = new ArrayList<>();
-                BoundedSource<T> source = transform.getSource();
-                try (BoundedSource.BoundedReader<T> reader =
-                    source.createReader(context.getPipelineOptions())) {
-                  for (boolean available = reader.start();
-                      available;
-                      available = reader.advance()) {
-                    output.add(
-                        DirectPipelineRunner.ValueWithMetadata.of(
-                            WindowedValue.timestampedValueInGlobalWindow(
-                                reader.getCurrent(), reader.getCurrentTimestamp())));
-                  }
-                }
-                context.setPCollectionValuesWithMetadata(context.getOutput(transform), output);
-              } catch (Exception e) {
-                throw new RuntimeException(e);
-              }
-            }
-          });
-    }
-  }
-
-  /**
-   * {@link PTransform} that reads from a {@link UnboundedSource}.
-   */
-  public static class Unbounded<T> extends PTransform<PInput, PCollection<T>> {
-    private final UnboundedSource<T, ?> source;
-
-    private Unbounded(@Nullable String name, UnboundedSource<T, ?> source) {
-      super(name);
-      this.source = SerializableUtils.ensureSerializable(source);
-    }
-
-    /**
-     * Returns a new {@code Unbounded} {@code PTransform} that's like this one but
-     * has the given name.
-     *
-     * <p>Does not modify this object.
-     */
-    public Unbounded<T> named(String name) {
-      return new Unbounded<T>(name, source);
-    }
-
-    /**
-     * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
-     * of data from the given {@link UnboundedSource}.  The bound is specified as a number
-     * of records to read.
-     *
-     * <p>This may take a long time to execute if the splits of this source are slow to read
-     * records.
-     */
-    public BoundedReadFromUnboundedSource<T> withMaxNumRecords(long maxNumRecords) {
-      return new BoundedReadFromUnboundedSource<T>(source, maxNumRecords, null);
-    }
-
-    /**
-     * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
-     * of data from the given {@link UnboundedSource}.  The bound is specified as an amount
-     * of time to read for.  Each split of the source will read for this much time.
-     */
-    public BoundedReadFromUnboundedSource<T> withMaxReadTime(Duration maxReadTime) {
-      return new BoundedReadFromUnboundedSource<T>(source, Long.MAX_VALUE, maxReadTime);
-    }
-
-    @Override
-    protected Coder<T> getDefaultOutputCoder() {
-      return source.getDefaultOutputCoder();
-    }
-
-    @Override
-    public final PCollection<T> apply(PInput input) {
-      source.validate();
-
-      return PCollection.<T>createPrimitiveOutputInternal(
-          input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED);
-    }
-
-    /**
-     * Returns the {@code UnboundedSource} used to create this {@code Read} {@code PTransform}.
-     */
-    public UnboundedSource<T, ?> getSource() {
-      return source;
-    }
-
-    @Override
-    public String getKindString() {
-      return "Read(" + approximateSimpleName(source.getClass()) + ")";
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
deleted file mode 100644
index 7270012..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/ShardNameTemplate.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-/**
- * Standard shard naming templates.
- *
- * <p>Shard naming templates are strings that may contain placeholders for
- * the shard number and shard count.  When constructing a filename for a
- * particular shard number, the upper-case letters 'S' and 'N' are replaced
- * with the 0-padded shard number and shard count respectively.
- *
- * <p>Left-padding of the numbers enables lexicographical sorting of the
- * resulting filenames.  If the shard number or count are too large for the
- * space provided in the template, then the result may no longer sort
- * lexicographically.  For example, a shard template of "S-of-N", for 200
- * shards, will result in outputs named "0-of-200", ... '10-of-200',
- * '100-of-200", etc.
- *
- * <p>Shard numbers start with 0, so the last shard number is the shard count
- * minus one.  For example, the template "-SSSSS-of-NNNNN" will be
- * instantiated as "-00000-of-01000" for the first shard (shard 0) of a
- * 1000-way sharded output.
- *
- * <p>A shard name template is typically provided along with a name prefix
- * and suffix, which allows constructing complex paths that have embedded
- * shard information.  For example, outputs in the form
- * "gs://bucket/path-01-of-99.txt" could be constructed by providing the
- * individual components:
- *
- * <pre>{@code
- *   pipeline.apply(
- *       TextIO.Write.to("gs://bucket/path")
- *                   .withShardNameTemplate("-SS-of-NN")
- *                   .withSuffix(".txt"))
- * }</pre>
- *
- * <p>In the example above, you could make parts of the output configurable
- * by users without the user having to specify all components of the output
- * name.
- *
- * <p>If a shard name template does not contain any repeating 'S', then
- * the output shard count must be 1, as otherwise the same filename would be
- * generated for multiple shards.
- */
-public class ShardNameTemplate {
-  /**
-   * Shard name containing the index and max.
-   *
-   * <p>Eg: [prefix]-00000-of-00100[suffix] and
-   * [prefix]-00001-of-00100[suffix]
-   */
-  public static final String INDEX_OF_MAX = "-SSSSS-of-NNNNN";
-
-  /**
-   * Shard is a file within a directory.
-   *
-   * <p>Eg: [prefix]/part-00000[suffix] and [prefix]/part-00001[suffix]
-   */
-  public static final String DIRECTORY_CONTAINER = "/part-SSSSS";
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
deleted file mode 100644
index a5649ce..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Sink.java
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.io.Serializable;
-
-/**
- * A {@code Sink} represents a resource that can be written to using the {@link Write} transform.
- *
- * <p>A parallel write to a {@code Sink} consists of three phases:
- * <ol>
- * <li>A sequential <i>initialization</i> phase (e.g., creating a temporary output directory, etc.)
- * <li>A <i>parallel write</i> phase where workers write bundles of records
- * <li>A sequential <i>finalization</i> phase (e.g., committing the writes, merging output files,
- * etc.)
- * </ol>
- *
- * <p>The {@link Write} transform can be used in a Dataflow pipeline to perform this write.
- * Specifically, a Write transform can be applied to a {@link PCollection} {@code p} by:
- *
- * <p>{@code p.apply(Write.to(new MySink()));}
- *
- * <p>Implementing a {@link Sink} and the corresponding write operations requires extending three
- * abstract classes:
- *
- * <ul>
- * <li>{@link Sink}: an immutable logical description of the location/resource to write to.
- * Depending on the type of sink, it may contain fields such as the path to an output directory
- * on a filesystem, a database table name, etc. Implementors of {@link Sink} must
- * implement two methods: {@link Sink#validate} and {@link Sink#createWriteOperation}.
- * {@link Sink#validate Validate} is called by the Write transform at pipeline creation, and should
- * validate that the Sink can be written to. The createWriteOperation method is also called at
- * pipeline creation, and should return a WriteOperation object that defines how to write to the
- * Sink. Note that implementations of Sink must be serializable and Sinks must be immutable.
- *
- * <li>{@link WriteOperation}: The WriteOperation implements the <i>initialization</i> and
- * <i>finalization</i> phases of a write. Implementors of {@link WriteOperation} must implement
- * corresponding {@link WriteOperation#initialize} and {@link WriteOperation#finalize} methods. A
- * WriteOperation must also implement {@link WriteOperation#createWriter} that creates Writers,
- * {@link WriteOperation#getWriterResultCoder} that returns a {@link Coder} for the result of a
- * parallel write, and a {@link WriteOperation#getSink} that returns the Sink that the write
- * operation corresponds to. See below for more information about these methods and restrictions on
- * their implementation.
- *
- * <li>{@link Writer}: A Writer writes a bundle of records. Writer defines four methods:
- * {@link Writer#open}, which is called once at the start of writing a bundle; {@link Writer#write},
- * which writes a single record from the bundle; {@link Writer#close}, which is called once at the
- * end of writing a bundle; and {@link Writer#getWriteOperation}, which returns the write operation
- * that the writer belongs to.
- * </ul>
- *
- * <h2>WriteOperation</h2>
- * <p>{@link WriteOperation#initialize} and {@link WriteOperation#finalize} are conceptually called
- * once: at the beginning and end of a Write transform. However, implementors must ensure that these
- * methods are idempotent, as they may be called multiple times on different machines in the case of
- * failure/retry or for redundancy.
- *
- * <p>The finalize method of WriteOperation is passed an Iterable of a writer result type. This
- * writer result type should encode the result of a write and, in most cases, some encoding of the
- * unique bundle id.
- *
- * <p>All implementations of {@link WriteOperation} must be serializable.
- *
- * <p>WriteOperation may have mutable state. For instance, {@link WriteOperation#initialize} may
- * mutate the object state. These mutations will be visible in {@link WriteOperation#createWriter}
- * and {@link WriteOperation#finalize} because the object will be serialized after initialize and
- * deserialized before these calls. However, it is not serialized again after createWriter is
- * called, as createWriter will be called within workers to create Writers for the bundles that are
- * distributed to these workers. Therefore, newWriter should not mutate the WriteOperation state (as
- * these mutations will not be visible in finalize).
- *
- * <h2>Bundle Ids:</h2>
- * <p>In order to ensure fault-tolerance, a bundle may be executed multiple times (e.g., in the
- * event of failure/retry or for redundancy). However, exactly one of these executions will have its
- * result passed to the WriteOperation's finalize method. Each call to {@link Writer#open} is passed
- * a unique <i>bundle id</i> when it is called by the Write transform, so even redundant or retried
- * bundles will have a unique way of identifying their output.
- *
- * <p>The bundle id should be used to guarantee that a bundle's output is unique. This uniqueness
- * guarantee is important; if a bundle is to be output to a file, for example, the name of the file
- * must be unique to avoid conflicts with other Writers. The bundle id should be encoded in the
- * writer result returned by the Writer and subsequently used by the WriteOperation's finalize
- * method to identify the results of successful writes.
- *
- * <p>For example, consider the scenario where a Writer writes files containing serialized records
- * and the WriteOperation's finalization step is to merge or rename these output files. In this
- * case, a Writer may use its unique id to name its output file (to avoid conflicts) and return the
- * name of the file it wrote as its writer result. The WriteOperation will then receive an Iterable
- * of output file names that it can then merge or rename using some bundle naming scheme.
- *
- * <h2>Writer Results:</h2>
- * <p>{@link WriteOperation}s and {@link Writer}s must agree on a writer result type that will be
- * returned by a Writer after it writes a bundle. This type can be a client-defined object or an
- * existing type; {@link WriteOperation#getWriterResultCoder} should return a {@link Coder} for the
- * type.
- *
- * <p>A note about thread safety: Any use of static members or methods in Writer should be thread
- * safe, as different instances of Writer objects may be created in different threads on the same
- * worker.
- *
- * @param <T> the type that will be written to the Sink.
- */
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public abstract class Sink<T> implements Serializable {
-  /**
-   * Ensures that the sink is valid and can be written to before the write operation begins. One
-   * should use {@link com.google.common.base.Preconditions} to implement this method.
-   */
-  public abstract void validate(PipelineOptions options);
-
-  /**
-   * Returns an instance of a {@link WriteOperation} that can write to this Sink.
-   */
-  public abstract WriteOperation<T, ?> createWriteOperation(PipelineOptions options);
-
-  /**
-   * A {@link WriteOperation} defines the process of a parallel write of objects to a Sink.
-   *
-   * <p>The {@code WriteOperation} defines how to perform initialization and finalization of a
-   * parallel write to a sink as well as how to create a {@link Sink.Writer} object that can write
-   * a bundle to the sink.
-   *
-   * <p>Since operations in Dataflow may be run multiple times for redundancy or fault-tolerance,
-   * the initialization and finalization defined by a WriteOperation <b>must be idempotent</b>.
-   *
-   * <p>{@code WriteOperation}s may be mutable; a {@code WriteOperation} is serialized after the
-   * call to {@code initialize} method and deserialized before calls to
-   * {@code createWriter} and {@code finalized}. However, it is not
-   * reserialized after {@code createWriter}, so {@code createWriter} should not mutate the
-   * state of the {@code WriteOperation}.
-   *
-   * <p>See {@link Sink} for more detailed documentation about the process of writing to a Sink.
-   *
-   * @param <T> The type of objects to write
-   * @param <WriteT> The result of a per-bundle write
-   */
-  public abstract static class WriteOperation<T, WriteT> implements Serializable {
-    /**
-     * Performs initialization before writing to the sink. Called before writing begins.
-     */
-    public abstract void initialize(PipelineOptions options) throws Exception;
-
-    /**
-     * Given an Iterable of results from bundle writes, performs finalization after writing and
-     * closes the sink. Called after all bundle writes are complete.
-     *
-     * <p>The results that are passed to finalize are those returned by bundles that completed
-     * successfully. Although bundles may have been run multiple times (for fault-tolerance), only
-     * one writer result will be passed to finalize for each bundle. An implementation of finalize
-     * should perform clean up of any failed and successfully retried bundles.  Note that these
-     * failed bundles will not have their writer result passed to finalize, so finalize should be
-     * capable of locating any temporary/partial output written by failed bundles.
-     *
-     * <p>A best practice is to make finalize atomic. If this is impossible given the semantics
-     * of the sink, finalize should be idempotent, as it may be called multiple times in the case of
-     * failure/retry or for redundancy.
-     *
-     * <p>Note that the iteration order of the writer results is not guaranteed to be consistent if
-     * finalize is called multiple times.
-     *
-     * @param writerResults an Iterable of results from successful bundle writes.
-     */
-    public abstract void finalize(Iterable<WriteT> writerResults, PipelineOptions options)
-        throws Exception;
-
-    /**
-     * Creates a new {@link Sink.Writer} to write a bundle of the input to the sink.
-     *
-     * <p>The bundle id that the writer will use to uniquely identify its output will be passed to
-     * {@link Writer#open}.
-     *
-     * <p>Must not mutate the state of the WriteOperation.
-     */
-    public abstract Writer<T, WriteT> createWriter(PipelineOptions options) throws Exception;
-
-    /**
-     * Returns the Sink that this write operation writes to.
-     */
-    public abstract Sink<T> getSink();
-
-    /**
-     * Returns a coder for the writer result type.
-     */
-    public Coder<WriteT> getWriterResultCoder() {
-      return null;
-    }
-  }
-
-  /**
-   * A Writer writes a bundle of elements from a PCollection to a sink. {@link Writer#open} is
-   * called before writing begins and {@link Writer#close} is called after all elements in the
-   * bundle have been written. {@link Writer#write} writes an element to the sink.
-   *
-   * <p>Note that any access to static members or methods of a Writer must be thread-safe, as
-   * multiple instances of a Writer may be instantiated in different threads on the same worker.
-   *
-   * <p>See {@link Sink} for more detailed documentation about the process of writing to a Sink.
-   *
-   * @param <T> The type of object to write
-   * @param <WriteT> The writer results type (e.g., the bundle's output filename, as String)
-   */
-  public abstract static class Writer<T, WriteT> {
-    /**
-     * Performs bundle initialization. For example, creates a temporary file for writing or
-     * initializes any state that will be used across calls to {@link Writer#write}.
-     *
-     * <p>The unique id that is given to open should be used to ensure that the writer's output does
-     * not interfere with the output of other Writers, as a bundle may be executed many times for
-     * fault tolerance. See {@link Sink} for more information about bundle ids.
-     */
-    public abstract void open(String uId) throws Exception;
-
-    /**
-     * Called for each value in the bundle.
-     */
-    public abstract void write(T value) throws Exception;
-
-    /**
-     * Finishes writing the bundle. Closes any resources used for writing the bundle.
-     *
-     * <p>Returns a writer result that will be used in the {@link Sink.WriteOperation}'s
-     * finalization. The result should contain some way to identify the output of this bundle (using
-     * the bundle id). {@link WriteOperation#finalize} will use the writer result to identify
-     * successful writes. See {@link Sink} for more information about bundle ids.
-     *
-     * @return the writer result
-     */
-    public abstract WriteT close() throws Exception;
-
-    /**
-     * Returns the write operation this writer belongs to.
-     */
-    public abstract WriteOperation<T, WriteT> getWriteOperation();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
deleted file mode 100644
index 4a02078..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Source.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.NoSuchElementException;
-
-/**
- * Base class for defining input formats and creating a {@code Source} for reading the input.
- *
- * <p>This class is not intended to be subclassed directly. Instead, to define
- * a bounded source (a source which produces a finite amount of input), subclass
- * {@link BoundedSource}; to define an unbounded source, subclass {@link UnboundedSource}.
- *
- * <p>A {@code Source} passed to a {@code Read} transform must be
- * {@code Serializable}.  This allows the {@code Source} instance
- * created in this "main program" to be sent (in serialized form) to
- * remote worker machines and reconstituted for each batch of elements
- * of the input {@code PCollection} being processed or for each source splitting
- * operation. A {@code Source} can have instance variable state, and
- * non-transient instance variable state will be serialized in the main program
- * and then deserialized on remote worker machines.
- *
- * <p>{@code Source} classes MUST be effectively immutable. The only acceptable use of
- * mutable fields is to cache the results of expensive operations, and such fields MUST be
- * marked {@code transient}.
- *
- * <p>{@code Source} objects should override {@link Object#toString}, as it will be
- * used in important error and debugging messages.
- *
- * @param <T> Type of elements read by the source.
- */
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public abstract class Source<T> implements Serializable {
-  /**
-   * Checks that this source is valid, before it can be used in a pipeline.
-   *
-   * <p>It is recommended to use {@link com.google.common.base.Preconditions} for implementing
-   * this method.
-   */
-  public abstract void validate();
-
-  /**
-   * Returns the default {@code Coder} to use for the data read from this source.
-   */
-  public abstract Coder<T> getDefaultOutputCoder();
-
-  /**
-   * The interface that readers of custom input sources must implement.
-   *
-   * <p>This interface is deliberately distinct from {@link java.util.Iterator} because
-   * the current model tends to be easier to program and more efficient in practice
-   * for iterating over sources such as files, databases etc. (rather than pure collections).
-   *
-   * <p>Reading data from the {@link Reader} must obey the following access pattern:
-   * <ul>
-   * <li> One call to {@link #start}
-   * <ul><li>If {@link #start} returned true, any number of calls to {@code getCurrent}*
-   *   methods</ul>
-   * <li> Repeatedly, a call to {@link #advance}. This may be called regardless
-   *   of what the previous {@link #start}/{@link #advance} returned.
-   * <ul><li>If {@link #advance} returned true, any number of calls to {@code getCurrent}*
-   *   methods</ul>
-   * </ul>
-   *
-   * <p>For example, if the reader is reading a fixed set of data:
-   * <pre>
-   *   try {
-   *     for (boolean available = reader.start(); available; available = reader.advance()) {
-   *       T item = reader.getCurrent();
-   *       Instant timestamp = reader.getCurrentTimestamp();
-   *       ...
-   *     }
-   *   } finally {
-   *     reader.close();
-   *   }
-   * </pre>
-   *
-   * <p>If the set of data being read is continually growing:
-   * <pre>
-   *   try {
-   *     boolean available = reader.start();
-   *     while (true) {
-   *       if (available) {
-   *         T item = reader.getCurrent();
-   *         Instant timestamp = reader.getCurrentTimestamp();
-   *         ...
-   *         resetExponentialBackoff();
-   *       } else {
-   *         exponentialBackoff();
-   *       }
-   *       available = reader.advance();
-   *     }
-   *   } finally {
-   *     reader.close();
-   *   }
-   * </pre>
-   *
-   * <p>Note: this interface is a work-in-progress and may change.
-   *
-   * <p>All {@code Reader} functions except {@link #getCurrentSource} do not need to be thread-safe;
-   * they may only be accessed by a single thread at once. However, {@link #getCurrentSource} needs
-   * to be thread-safe, and other functions should assume that its returned value can change
-   * asynchronously.
-   */
-  public abstract static class Reader<T> implements AutoCloseable {
-    /**
-     * Initializes the reader and advances the reader to the first record.
-     *
-     * <p>This method should be called exactly once. The invocation should occur prior to calling
-     * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
-     * are needed to initialize the reader.
-     *
-     * @return {@code true} if a record was read, {@code false} if there is no more input available.
-     */
-    public abstract boolean start() throws IOException;
-
-    /**
-     * Advances the reader to the next valid record.
-     *
-     * <p>It is an error to call this without having called {@link #start} first.
-     *
-     * @return {@code true} if a record was read, {@code false} if there is no more input available.
-     */
-    public abstract boolean advance() throws IOException;
-
-    /**
-     * Returns the value of the data item that was read by the last {@link #start} or
-     * {@link #advance} call. The returned value must be effectively immutable and remain valid
-     * indefinitely.
-     *
-     * <p>Multiple calls to this method without an intervening call to {@link #advance} should
-     * return the same result.
-     *
-     * @throws java.util.NoSuchElementException if {@link #start} was never called, or if
-     *         the last {@link #start} or {@link #advance} returned {@code false}.
-     */
-    public abstract T getCurrent() throws NoSuchElementException;
-
-    /**
-     * Returns the timestamp associated with the current data item.
-     *
-     * <p>If the source does not support timestamps, this should return
-     * {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
-     *
-     * <p>Multiple calls to this method without an intervening call to {@link #advance} should
-     * return the same result.
-     *
-     * @throws NoSuchElementException if the reader is at the beginning of the input and
-     *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
-     *         {@link #advance} returned {@code false}.
-     */
-    public abstract Instant getCurrentTimestamp() throws NoSuchElementException;
-
-    /**
-     * Closes the reader. The reader cannot be used after this method is called.
-     */
-    @Override
-    public abstract void close() throws IOException;
-
-    /**
-     * Returns a {@code Source} describing the same input that this {@code Reader} currently reads
-     * (including items already read).
-     *
-     * <p>Usually, an implementation will simply return the immutable {@link Source} object from
-     * which the current {@link Reader} was constructed, or delegate to the base class.
-     * However, when using or implementing this method on a {@link BoundedSource.BoundedReader},
-     * special considerations apply, see documentation for
-     * {@link BoundedSource.BoundedReader#getCurrentSource}.
-     */
-    public abstract Source<T> getCurrentSource();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
deleted file mode 100644
index d342f25..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/TextIO.java
+++ /dev/null
@@ -1,992 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.Read.Bounded;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.MimeTypes;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.protobuf.ByteString;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.nio.charset.StandardCharsets;
-import java.util.NoSuchElementException;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link PTransform}s for reading and writing text files.
- *
- * <p>To read a {@link PCollection} from one or more text files, use {@link TextIO.Read}.
- * You can instantiate a transform using {@link TextIO.Read#from(String)} to specify
- * the path of the file(s) to read from (e.g., a local filename or
- * filename pattern if running locally, or a Google Cloud Storage
- * filename or filename pattern of the form
- * {@code "gs://<bucket>/<filepath>"}). You may optionally call
- * {@link TextIO.Read#named(String)} to specify the name of the pipeline step.
- *
- * <p>By default, {@link TextIO.Read} returns a {@link PCollection} of {@link String Strings},
- * each corresponding to one line of an input UTF-8 text file. To convert directly from the raw
- * bytes (split into lines delimited by '\n', '\r', or '\r\n') to another object of type {@code T},
- * supply a {@code Coder<T>} using {@link TextIO.Read#withCoder(Coder)}.
- *
- * <p>See the following examples:
- *
- * <pre>{@code
- * Pipeline p = ...;
- *
- * // A simple Read of a local file (only runs locally):
- * PCollection<String> lines =
- *     p.apply(TextIO.Read.from("/local/path/to/file.txt"));
- *
- * // A fully-specified Read from a GCS file (runs locally and via the
- * // Google Cloud Dataflow service):
- * PCollection<Integer> numbers =
- *     p.apply(TextIO.Read.named("ReadNumbers")
- *                        .from("gs://my_bucket/path/to/numbers-*.txt")
- *                        .withCoder(TextualIntegerCoder.of()));
- * }</pre>
- *
- * <p>To write a {@link PCollection} to one or more text files, use
- * {@link TextIO.Write}, specifying {@link TextIO.Write#to(String)} to specify
- * the path of the file to write to (e.g., a local filename or sharded
- * filename pattern if running locally, or a Google Cloud Storage
- * filename or sharded filename pattern of the form
- * {@code "gs://<bucket>/<filepath>"}). You can optionally name the resulting transform using
- * {@link TextIO.Write#named(String)}, and you can use {@link TextIO.Write#withCoder(Coder)}
- * to specify the Coder to use to encode the Java values into text lines.
- *
- * <p>Any existing files with the same names as generated output files
- * will be overwritten.
- *
- * <p>For example:
- * <pre>{@code
- * // A simple Write to a local file (only runs locally):
- * PCollection<String> lines = ...;
- * lines.apply(TextIO.Write.to("/path/to/file.txt"));
- *
- * // A fully-specified Write to a sharded GCS file (runs locally and via the
- * // Google Cloud Dataflow service):
- * PCollection<Integer> numbers = ...;
- * numbers.apply(TextIO.Write.named("WriteNumbers")
- *                           .to("gs://my_bucket/path/to/numbers")
- *                           .withSuffix(".txt")
- *                           .withCoder(TextualIntegerCoder.of()));
- * }</pre>
- *
- * <h3>Permissions</h3>
- * <p>When run using the {@link DirectPipelineRunner}, your pipeline can read and write text files
- * on your local drive and remote text files on Google Cloud Storage that you have access to using
- * your {@code gcloud} credentials. When running in the Dataflow service using
- * {@link DataflowPipelineRunner}, the pipeline can only read and write files from GCS. For more
- * information about permissions, see the Cloud Dataflow documentation on
- * <a href="https://cloud.google.com/dataflow/security-and-permissions">Security and
- * Permissions</a>.
- */
-public class TextIO {
-  /** The default coder, which returns each line of the input file as a string. */
-  public static final Coder<String> DEFAULT_TEXT_CODER = StringUtf8Coder.of();
-
-  /**
-   * A {@link PTransform} that reads from a text file (or multiple text
-   * files matching a pattern) and returns a {@link PCollection} containing
-   * the decoding of each of the lines of the text file(s). The
-   * default decoding just returns each line as a {@link String}, but you may call
-   * {@link #withCoder(Coder)} to change the return type.
-   */
-  public static class Read {
-    /**
-     * Returns a transform for reading text files that uses the given step name.
-     */
-    public static Bound<String> named(String name) {
-      return new Bound<>(DEFAULT_TEXT_CODER).named(name);
-    }
-
-    /**
-     * Returns a transform for reading text files that reads from the file(s)
-     * with the given filename or filename pattern. This can be a local path (if running locally),
-     * or a Google Cloud Storage filename or filename pattern of the form
-     * {@code "gs://<bucket>/<filepath>"} (if running locally or via the Google Cloud Dataflow
-     * service). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
-     * >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
-     */
-    public static Bound<String> from(String filepattern) {
-      return new Bound<>(DEFAULT_TEXT_CODER).from(filepattern);
-    }
-
-    /**
-     * Returns a transform for reading text files that uses the given
-     * {@code Coder<T>} to decode each of the lines of the file into a
-     * value of type {@code T}.
-     *
-     * <p>By default, uses {@link StringUtf8Coder}, which just
-     * returns the text lines as Java strings.
-     *
-     * @param <T> the type of the decoded elements, and the elements
-     * of the resulting PCollection
-     */
-    public static <T> Bound<T> withCoder(Coder<T> coder) {
-      return new Bound<>(coder);
-    }
-
-    /**
-     * Returns a transform for reading text files that has GCS path validation on
-     * pipeline creation disabled.
-     *
-     * <p>This can be useful in the case where the GCS input does not
-     * exist at the pipeline creation time, but is expected to be
-     * available at execution time.
-     */
-    public static Bound<String> withoutValidation() {
-      return new Bound<>(DEFAULT_TEXT_CODER).withoutValidation();
-    }
-
-    /**
-     * Returns a transform for reading text files that decompresses all input files
-     * using the specified compression type.
-     *
-     * <p>If no compression type is specified, the default is {@link TextIO.CompressionType#AUTO}.
-     * In this mode, the compression type of the file is determined by its extension
-     * (e.g., {@code *.gz} is gzipped, {@code *.bz2} is bzipped, and all other extensions are
-     * uncompressed).
-     */
-    public static Bound<String> withCompressionType(TextIO.CompressionType compressionType) {
-      return new Bound<>(DEFAULT_TEXT_CODER).withCompressionType(compressionType);
-    }
-
-    // TODO: strippingNewlines, etc.
-
-    /**
-     * A {@link PTransform} that reads from one or more text files and returns a bounded
-     * {@link PCollection} containing one element for each line of the input files.
-     *
-     * @param <T> the type of each of the elements of the resulting
-     * {@link PCollection}. By default, each line is returned as a {@link String}, however you
-     * may use {@link #withCoder(Coder)} to supply a {@code Coder<T>} to produce a
-     * {@code PCollection<T>} instead.
-     */
-    public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
-      /** The filepattern to read from. */
-      @Nullable private final String filepattern;
-
-      /** The Coder to use to decode each line. */
-      private final Coder<T> coder;
-
-      /** An option to indicate if input validation is desired. Default is true. */
-      private final boolean validate;
-
-      /** Option to indicate the input source's compression type. Default is AUTO. */
-      private final TextIO.CompressionType compressionType;
-
-      Bound(Coder<T> coder) {
-        this(null, null, coder, true, TextIO.CompressionType.AUTO);
-      }
-
-      private Bound(String name, String filepattern, Coder<T> coder, boolean validate,
-          TextIO.CompressionType compressionType) {
-        super(name);
-        this.coder = coder;
-        this.filepattern = filepattern;
-        this.validate = validate;
-        this.compressionType = compressionType;
-      }
-
-      /**
-       * Returns a new transform for reading from text files that's like this one but
-       * with the given step name.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> named(String name) {
-        return new Bound<>(name, filepattern, coder, validate, compressionType);
-      }
-
-      /**
-       * Returns a new transform for reading from text files that's like this one but
-       * that reads from the file(s) with the given name or pattern. See {@link TextIO.Read#from}
-       * for a description of filepatterns.
-       *
-       * <p>Does not modify this object.
-
-       */
-      public Bound<T> from(String filepattern) {
-        return new Bound<>(name, filepattern, coder, validate, compressionType);
-      }
-
-      /**
-       * Returns a new transform for reading from text files that's like this one but
-       * that uses the given {@link Coder Coder<X>} to decode each of the
-       * lines of the file into a value of type {@code X}.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param <X> the type of the decoded elements, and the
-       * elements of the resulting PCollection
-       */
-      public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, filepattern, coder, validate, compressionType);
-      }
-
-      /**
-       * Returns a new transform for reading from text files that's like this one but
-       * that has GCS path validation on pipeline creation disabled.
-       *
-       * <p>This can be useful in the case where the GCS input does not
-       * exist at the pipeline creation time, but is expected to be
-       * available at execution time.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> withoutValidation() {
-        return new Bound<>(name, filepattern, coder, false, compressionType);
-      }
-
-      /**
-       * Returns a new transform for reading from text files that's like this one but
-       * reads from input sources using the specified compression type.
-       *
-       * <p>If no compression type is specified, the default is {@link TextIO.CompressionType#AUTO}.
-       * See {@link TextIO.Read#withCompressionType} for more details.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> withCompressionType(TextIO.CompressionType compressionType) {
-        return new Bound<>(name, filepattern, coder, validate, compressionType);
-      }
-
-      @Override
-      public PCollection<T> apply(PInput input) {
-        if (filepattern == null) {
-          throw new IllegalStateException("need to set the filepattern of a TextIO.Read transform");
-        }
-
-        if (validate) {
-          try {
-            checkState(
-                !IOChannelUtils.getFactory(filepattern).match(filepattern).isEmpty(),
-                "Unable to find any files matching %s",
-                filepattern);
-          } catch (IOException e) {
-            throw new IllegalStateException(
-                String.format("Failed to validate %s", filepattern), e);
-          }
-        }
-
-        // Create a source specific to the requested compression type.
-        final Bounded<T> read;
-        switch(compressionType) {
-          case UNCOMPRESSED:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                new TextSource<T>(filepattern, coder));
-            break;
-          case AUTO:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                CompressedSource.from(new TextSource<T>(filepattern, coder)));
-            break;
-          case BZIP2:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                CompressedSource.from(new TextSource<T>(filepattern, coder))
-                                .withDecompression(CompressedSource.CompressionMode.BZIP2));
-            break;
-          case GZIP:
-            read = com.google.cloud.dataflow.sdk.io.Read.from(
-                CompressedSource.from(new TextSource<T>(filepattern, coder))
-                                .withDecompression(CompressedSource.CompressionMode.GZIP));
-            break;
-          default:
-            throw new IllegalArgumentException("Unknown compression mode: " + compressionType);
-        }
-
-        PCollection<T> pcol = input.getPipeline().apply("Read", read);
-        // Honor the default output coder that would have been used by this PTransform.
-        pcol.setCoder(getDefaultOutputCoder());
-        return pcol;
-      }
-
-      @Override
-      protected Coder<T> getDefaultOutputCoder() {
-        return coder;
-      }
-
-      public String getFilepattern() {
-        return filepattern;
-      }
-
-      public boolean needsValidation() {
-        return validate;
-      }
-
-      public TextIO.CompressionType getCompressionType() {
-        return compressionType;
-      }
-    }
-
-    /** Disallow construction of utility classes. */
-    private Read() {}
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@link PTransform} that writes a {@link PCollection} to text file (or
-   * multiple text files matching a sharding pattern), with each
-   * element of the input collection encoded into its own line.
-   */
-  public static class Write {
-    /**
-     * Returns a transform for writing to text files with the given step name.
-     */
-    public static Bound<String> named(String name) {
-      return new Bound<>(DEFAULT_TEXT_CODER).named(name);
-    }
-
-    /**
-     * Returns a transform for writing to text files that writes to the file(s)
-     * with the given prefix. This can be a local filename
-     * (if running locally), or a Google Cloud Storage filename of
-     * the form {@code "gs://<bucket>/<filepath>"}
-     * (if running locally or via the Google Cloud Dataflow service).
-     *
-     * <p>The files written will begin with this prefix, followed by
-     * a shard identifier (see {@link Bound#withNumShards(int)}, and end
-     * in a common extension, if given by {@link Bound#withSuffix(String)}.
-     */
-    public static Bound<String> to(String prefix) {
-      return new Bound<>(DEFAULT_TEXT_CODER).to(prefix);
-    }
-
-    /**
-     * Returns a transform for writing to text files that appends the specified suffix
-     * to the created files.
-     */
-    public static Bound<String> withSuffix(String nameExtension) {
-      return new Bound<>(DEFAULT_TEXT_CODER).withSuffix(nameExtension);
-    }
-
-    /**
-     * Returns a transform for writing to text files that uses the provided shard count.
-     *
-     * <p>Constraining the number of shards is likely to reduce
-     * the performance of a pipeline. Setting this value is not recommended
-     * unless you require a specific number of output files.
-     *
-     * @param numShards the number of shards to use, or 0 to let the system
-     *                  decide.
-     */
-    public static Bound<String> withNumShards(int numShards) {
-      return new Bound<>(DEFAULT_TEXT_CODER).withNumShards(numShards);
-    }
-
-    /**
-     * Returns a transform for writing to text files that uses the given shard name
-     * template.
-     *
-     * <p>See {@link ShardNameTemplate} for a description of shard templates.
-     */
-    public static Bound<String> withShardNameTemplate(String shardTemplate) {
-      return new Bound<>(DEFAULT_TEXT_CODER).withShardNameTemplate(shardTemplate);
-    }
-
-    /**
-     * Returns a transform for writing to text files that forces a single file as
-     * output.
-     */
-    public static Bound<String> withoutSharding() {
-      return new Bound<>(DEFAULT_TEXT_CODER).withoutSharding();
-    }
-
-    /**
-     * Returns a transform for writing to text files that uses the given
-     * {@link Coder} to encode each of the elements of the input
-     * {@link PCollection} into an output text line.
-     *
-     * <p>By default, uses {@link StringUtf8Coder}, which writes input
-     * Java strings directly as output lines.
-     *
-     * @param <T> the type of the elements of the input {@link PCollection}
-     */
-    public static <T> Bound<T> withCoder(Coder<T> coder) {
-      return new Bound<>(coder);
-    }
-
-    /**
-     * Returns a transform for writing to text files that has GCS path validation on
-     * pipeline creation disabled.
-     *
-     * <p>This can be useful in the case where the GCS output location does
-     * not exist at the pipeline creation time, but is expected to be available
-     * at execution time.
-     */
-    public static Bound<String> withoutValidation() {
-      return new Bound<>(DEFAULT_TEXT_CODER).withoutValidation();
-    }
-
-    // TODO: appendingNewlines, header, footer, etc.
-
-    /**
-     * A PTransform that writes a bounded PCollection to a text file (or
-     * multiple text files matching a sharding pattern), with each
-     * PCollection element being encoded into its own line.
-     *
-     * @param <T> the type of the elements of the input PCollection
-     */
-    public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-      /** The prefix of each file written, combined with suffix and shardTemplate. */
-      @Nullable private final String filenamePrefix;
-      /** The suffix of each file written, combined with prefix and shardTemplate. */
-      private final String filenameSuffix;
-
-      /** The Coder to use to decode each line. */
-      private final Coder<T> coder;
-
-      /** Requested number of shards. 0 for automatic. */
-      private final int numShards;
-
-      /** The shard template of each file written, combined with prefix and suffix. */
-      private final String shardTemplate;
-
-      /** An option to indicate if output validation is desired. Default is true. */
-      private final boolean validate;
-
-      Bound(Coder<T> coder) {
-        this(null, null, "", coder, 0, ShardNameTemplate.INDEX_OF_MAX, true);
-      }
-
-      private Bound(String name, String filenamePrefix, String filenameSuffix, Coder<T> coder,
-          int numShards, String shardTemplate, boolean validate) {
-        super(name);
-        this.coder = coder;
-        this.filenamePrefix = filenamePrefix;
-        this.filenameSuffix = filenameSuffix;
-        this.numShards = numShards;
-        this.shardTemplate = shardTemplate;
-        this.validate = validate;
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * with the given step name.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> named(String name) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate, validate);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that writes to the file(s) with the given filename prefix.
-       *
-       * <p>See {@link TextIO.Write#to(String) Write.to(String)} for more information.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> to(String filenamePrefix) {
-        validateOutputComponent(filenamePrefix);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate, validate);
-      }
-
-      /**
-       * Returns a transform for writing to text files that that's like this one but
-       * that writes to the file(s) with the given filename suffix.
-       *
-       * <p>Does not modify this object.
-       *
-       * @see ShardNameTemplate
-       */
-      public Bound<T> withSuffix(String nameExtension) {
-        validateOutputComponent(nameExtension);
-        return new Bound<>(name, filenamePrefix, nameExtension, coder, numShards,
-            shardTemplate, validate);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that uses the provided shard count.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. Setting this value is not recommended
-       * unless you require a specific number of output files.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param numShards the number of shards to use, or 0 to let the system
-       *                  decide.
-       * @see ShardNameTemplate
-       */
-      public Bound<T> withNumShards(int numShards) {
-        Preconditions.checkArgument(numShards >= 0);
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate, validate);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that uses the given shard name template.
-       *
-       * <p>Does not modify this object.
-       *
-       * @see ShardNameTemplate
-       */
-      public Bound<T> withShardNameTemplate(String shardTemplate) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate, validate);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that forces a single file as output.
-       *
-       * <p>Constraining the number of shards is likely to reduce
-       * the performance of a pipeline. Using this setting is not recommended
-       * unless you truly require a single output file.
-       *
-       * <p>This is a shortcut for
-       * {@code .withNumShards(1).withShardNameTemplate("")}
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> withoutSharding() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, 1, "", validate);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one
-       * but that uses the given {@link Coder Coder<X>} to encode each of
-       * the elements of the input {@link PCollection PCollection<X>} into an
-       * output text line. Does not modify this object.
-       *
-       * @param <X> the type of the elements of the input {@link PCollection}
-       */
-      public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate, validate);
-      }
-
-      /**
-       * Returns a transform for writing to text files that's like this one but
-       * that has GCS output path validation on pipeline creation disabled.
-       *
-       * <p>This can be useful in the case where the GCS output location does
-       * not exist at the pipeline creation time, but is expected to be
-       * available at execution time.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> withoutValidation() {
-        return new Bound<>(name, filenamePrefix, filenameSuffix, coder, numShards,
-            shardTemplate, false);
-      }
-
-      @Override
-      public PDone apply(PCollection<T> input) {
-        if (filenamePrefix == null) {
-          throw new IllegalStateException(
-              "need to set the filename prefix of a TextIO.Write transform");
-        }
-
-        // Note that custom sinks currently do not expose sharding controls.
-        // Thus pipeline runner writers need to individually add support internally to
-        // apply user requested sharding limits.
-        return input.apply("Write", com.google.cloud.dataflow.sdk.io.Write.to(
-            new TextSink<>(
-                filenamePrefix, filenameSuffix, shardTemplate, coder)));
-      }
-
-      /**
-       * Returns the current shard name template string.
-       */
-      public String getShardNameTemplate() {
-        return shardTemplate;
-      }
-
-      @Override
-      protected Coder<Void> getDefaultOutputCoder() {
-        return VoidCoder.of();
-      }
-
-      public String getFilenamePrefix() {
-        return filenamePrefix;
-      }
-
-      public String getShardTemplate() {
-        return shardTemplate;
-      }
-
-      public int getNumShards() {
-        return numShards;
-      }
-
-      public String getFilenameSuffix() {
-        return filenameSuffix;
-      }
-
-      public Coder<T> getCoder() {
-        return coder;
-      }
-
-      public boolean needsValidation() {
-        return validate;
-      }
-    }
-  }
-
-  /**
-   * Possible text file compression types.
-   */
-  public static enum CompressionType {
-    /**
-     * Automatically determine the compression type based on filename extension.
-     */
-    AUTO(""),
-    /**
-     * Uncompressed (i.e., may be split).
-     */
-    UNCOMPRESSED(""),
-    /**
-     * GZipped.
-     */
-    GZIP(".gz"),
-    /**
-     * BZipped.
-     */
-    BZIP2(".bz2");
-
-    private String filenameSuffix;
-
-    private CompressionType(String suffix) {
-      this.filenameSuffix = suffix;
-    }
-
-    /**
-     * Determine if a given filename matches a compression type based on its extension.
-     * @param filename the filename to match
-     * @return true iff the filename ends with the compression type's known extension.
-     */
-    public boolean matches(String filename) {
-      return filename.toLowerCase().endsWith(filenameSuffix.toLowerCase());
-    }
-  }
-
-  // Pattern which matches old-style shard output patterns, which are now
-  // disallowed.
-  private static final Pattern SHARD_OUTPUT_PATTERN = Pattern.compile("@([0-9]+|\\*)");
-
-  private static void validateOutputComponent(String partialFilePattern) {
-    Preconditions.checkArgument(
-        !SHARD_OUTPUT_PATTERN.matcher(partialFilePattern).find(),
-        "Output name components are not allowed to contain @* or @N patterns: "
-        + partialFilePattern);
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  /** Disable construction of utility class. */
-  private TextIO() {}
-
-  /**
-   * A {@link FileBasedSource} which can decode records delimited by new line characters.
-   *
-   * <p>This source splits the data into records using {@code UTF-8} {@code \n}, {@code \r}, or
-   * {@code \r\n} as the delimiter. This source is not strict and supports decoding the last record
-   * even if it is not delimited. Finally, no records are decoded if the stream is empty.
-   *
-   * <p>This source supports reading from any arbitrary byte position within the stream. If the
-   * starting position is not {@code 0}, then bytes are skipped until the first delimiter is found
-   * representing the beginning of the first record to be decoded.
-   */
-  @VisibleForTesting
-  static class TextSource<T> extends FileBasedSource<T> {
-    /** The Coder to use to decode each line. */
-    private final Coder<T> coder;
-
-    @VisibleForTesting
-    TextSource(String fileSpec, Coder<T> coder) {
-      super(fileSpec, 1L);
-      this.coder = coder;
-    }
-
-    private TextSource(String fileName, long start, long end, Coder<T> coder) {
-      super(fileName, 1L, start, end);
-      this.coder = coder;
-    }
-
-    @Override
-    protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
-      return new TextSource<>(fileName, start, end, coder);
-    }
-
-    @Override
-    protected FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
-      return new TextBasedReader<>(this);
-    }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-      return false;
-    }
-
-    @Override
-    public Coder<T> getDefaultOutputCoder() {
-      return coder;
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader FileBasedReader}
-     * which can decode records delimited by new line characters.
-     *
-     * See {@link TextSource} for further details.
-     */
-    @VisibleForTesting
-    static class TextBasedReader<T> extends FileBasedReader<T> {
-      private static final int READ_BUFFER_SIZE = 8192;
-      private final Coder<T> coder;
-      private final ByteBuffer readBuffer = ByteBuffer.allocate(READ_BUFFER_SIZE);
-      private ByteString buffer;
-      private int startOfSeparatorInBuffer;
-      private int endOfSeparatorInBuffer;
-      private long startOfNextRecord;
-      private boolean eof;
-      private boolean elementIsPresent;
-      private T currentValue;
-      private ReadableByteChannel inChannel;
-
-      private TextBasedReader(TextSource<T> source) {
-        super(source);
-        coder = source.coder;
-        buffer = ByteString.EMPTY;
-      }
-
-      @Override
-      protected long getCurrentOffset() throws NoSuchElementException {
-        if (!elementIsPresent) {
-          throw new NoSuchElementException();
-        }
-        return startOfNextRecord;
-      }
-
-      @Override
-      public T getCurrent() throws NoSuchElementException {
-        if (!elementIsPresent) {
-          throw new NoSuchElementException();
-        }
-        return currentValue;
-      }
-
-      @Override
-      protected void startReading(ReadableByteChannel channel) throws IOException {
-        this.inChannel = channel;
-        // If the first offset is greater than zero, we need to skip bytes until we see our
-        // first separator.
-        if (getCurrentSource().getStartOffset() > 0) {
-          checkState(channel instanceof SeekableByteChannel,
-              "%s only supports reading from a SeekableByteChannel when given a start offset"
-              + " greater than 0.", TextSource.class.getSimpleName());
-          long requiredPosition = getCurrentSource().getStartOffset() - 1;
-          ((SeekableByteChannel) channel).position(requiredPosition);
-          findSeparatorBounds();
-          buffer = buffer.substring(endOfSeparatorInBuffer);
-          startOfNextRecord = requiredPosition + endOfSeparatorInBuffer;
-          endOfSeparatorInBuffer = 0;
-          startOfSeparatorInBuffer = 0;
-        }
-      }
-
-      /**
-       * Locates the start position and end position of the next delimiter. Will
-       * consume the channel till either EOF or the delimiter bounds are found.
-       *
-       * <p>This fills the buffer and updates the positions as follows:
-       * <pre>{@code
-       * ------------------------------------------------------
-       * | element bytes | delimiter bytes | unconsumed bytes |
-       * ------------------------------------------------------
-       * 0            start of          end of              buffer
-       *              separator         separator           size
-       *              in buffer         in buffer
-       * }</pre>
-       */
-      private void findSeparatorBounds() throws IOException {
-        int bytePositionInBuffer = 0;
-        while (true) {
-          if (!tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 1)) {
-            startOfSeparatorInBuffer = endOfSeparatorInBuffer = bytePositionInBuffer;
-            break;
-          }
-
-          byte currentByte = buffer.byteAt(bytePositionInBuffer);
-
-          if (currentByte == '\n') {
-            startOfSeparatorInBuffer = bytePositionInBuffer;
-            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
-            break;
-          } else if (currentByte == '\r') {
-            startOfSeparatorInBuffer = bytePositionInBuffer;
-            endOfSeparatorInBuffer = startOfSeparatorInBuffer + 1;
-
-            if (tryToEnsureNumberOfBytesInBuffer(bytePositionInBuffer + 2)) {
-              currentByte = buffer.byteAt(bytePositionInBuffer + 1);
-              if (currentByte == '\n') {
-                endOfSeparatorInBuffer += 1;
-              }
-            }
-            break;
-          }
-
-          // Move to the next byte in buffer.
-          bytePositionInBuffer += 1;
-        }
-      }
-
-      @Override
-      protected boolean readNextRecord() throws IOException {
-        startOfNextRecord += endOfSeparatorInBuffer;
-        findSeparatorBounds();
-
-        // If we have reached EOF file and consumed all of the buffer then we know
-        // that there are no more records.
-        if (eof && buffer.size() == 0) {
-          elementIsPresent = false;
-          return false;
-        }
-
-        decodeCurrentElement();
-        return true;
-      }
-
-      /**
-       * Decodes the current element updating the buffer to only contain the unconsumed bytes.
-       *
-       * This invalidates the currently stored {@code startOfSeparatorInBuffer} and
-       * {@code endOfSeparatorInBuffer}.
-       */
-      private void decodeCurrentElement() throws IOException {
-        ByteString dataToDecode = buffer.substring(0, startOfSeparatorInBuffer);
-        currentValue = coder.decode(dataToDecode.newInput(), Context.OUTER);
-        elementIsPresent = true;
-        buffer = buffer.substring(endOfSeparatorInBuffer);
-      }
-
-      /**
-       * Returns false if we were unable to ensure the minimum capacity by consuming the channel.
-       */
-      private boolean tryToEnsureNumberOfBytesInBuffer(int minCapacity) throws IOException {
-        // While we aren't at EOF or haven't fulfilled the minimum buffer capacity,
-        // attempt to read more bytes.
-        while (buffer.size() <= minCapacity && !eof) {
-          eof = inChannel.read(readBuffer) == -1;
-          readBuffer.flip();
-          buffer = buffer.concat(ByteString.copyFrom(readBuffer));
-          readBuffer.clear();
-        }
-        // Return true if we were able to honor the minimum buffer capacity request
-        return buffer.size() >= minCapacity;
-      }
-    }
-  }
-
-  /**
-   * A {@link FileBasedSink} for text files. Produces text files with the new line separator
-   * {@code '\n'} represented in {@code UTF-8} format as the record separator.
-   * Each record (including the last) is terminated.
-   */
-  @VisibleForTesting
-  static class TextSink<T> extends FileBasedSink<T> {
-    private final Coder<T> coder;
-
-    @VisibleForTesting
-    TextSink(
-        String baseOutputFilename, String extension, String fileNameTemplate, Coder<T> coder) {
-      super(baseOutputFilename, extension, fileNameTemplate);
-      this.coder = coder;
-    }
-
-    @Override
-    public FileBasedSink.FileBasedWriteOperation<T> createWriteOperation(PipelineOptions options) {
-      return new TextWriteOperation<>(this, coder);
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation
-     * FileBasedWriteOperation} for text files.
-     */
-    private static class TextWriteOperation<T> extends FileBasedWriteOperation<T> {
-      private final Coder<T> coder;
-
-      private TextWriteOperation(TextSink<T> sink, Coder<T> coder) {
-        super(sink);
-        this.coder = coder;
-      }
-
-      @Override
-      public FileBasedWriter<T> createWriter(PipelineOptions options) throws Exception {
-        return new TextWriter<>(this, coder);
-      }
-    }
-
-    /**
-     * A {@link com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter FileBasedWriter}
-     * for text files.
-     */
-    private static class TextWriter<T> extends FileBasedWriter<T> {
-      private static final byte[] NEWLINE = "\n".getBytes(StandardCharsets.UTF_8);
-      private final Coder<T> coder;
-      private OutputStream out;
-
-      public TextWriter(FileBasedWriteOperation<T> writeOperation, Coder<T> coder) {
-        super(writeOperation);
-        this.mimeType = MimeTypes.TEXT;
-        this.coder = coder;
-      }
-
-      @Override
-      protected void prepareWrite(WritableByteChannel channel) throws Exception {
-        out = Channels.newOutputStream(channel);
-      }
-
-      @Override
-      public void write(T value) throws Exception {
-        coder.encode(value, out, Context.OUTER);
-        out.write(NEWLINE);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
deleted file mode 100644
index e585151..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/UnboundedSource.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link Source} that reads an unbounded amount of input and, because of that, supports
- * some additional operations such as checkpointing, watermarks, and record ids.
- *
- * <ul>
- * <li> Checkpointing allows sources to not re-read the same data again in the case of failures.
- * <li> Watermarks allow for downstream parts of the pipeline to know up to what point
- *   in time the data is complete.
- * <li> Record ids allow for efficient deduplication of input records; many streaming sources
- *   do not guarantee that a given record will only be read a single time.
- * </ul>
- *
- * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for more information on
- * timestamps and watermarks.
- *
- * @param <OutputT> Type of records output by this source.
- * @param <CheckpointMarkT> Type of checkpoint marks used by the readers of this source.
- */
-public abstract class UnboundedSource<
-        OutputT, CheckpointMarkT extends UnboundedSource.CheckpointMark> extends Source<OutputT> {
-  /**
-   * Returns a list of {@code UnboundedSource} objects representing the instances of this source
-   * that should be used when executing the workflow.  Each split should return a separate partition
-   * of the input data.
-   *
-   * <p>For example, for a source reading from a growing directory of files, each split
-   * could correspond to a prefix of file names.
-   *
-   * <p>Some sources are not splittable, such as reading from a single TCP stream.  In that
-   * case, only a single split should be returned.
-   *
-   * <p>Some data sources automatically partition their data among readers.  For these types of
-   * inputs, {@code n} identical replicas of the top-level source can be returned.
-   *
-   * <p>The size of the returned list should be as close to {@code desiredNumSplits}
-   * as possible, but does not have to match exactly.  A low number of splits
-   * will limit the amount of parallelism in the source.
-   */
-  public abstract List<? extends UnboundedSource<OutputT, CheckpointMarkT>> generateInitialSplits(
-      int desiredNumSplits, PipelineOptions options) throws Exception;
-
-  /**
-   * Create a new {@link UnboundedReader} to read from this source, resuming from the given
-   * checkpoint if present.
-   */
-  public abstract UnboundedReader<OutputT> createReader(
-      PipelineOptions options, @Nullable CheckpointMarkT checkpointMark);
-
-  /**
-   * Returns a {@link Coder} for encoding and decoding the checkpoints for this source, or
-   * null if the checkpoints do not need to be durably committed.
-   */
-  @Nullable
-  public abstract Coder<CheckpointMarkT> getCheckpointMarkCoder();
-
-  /**
-   * Returns whether this source requires explicit deduping.
-   *
-   * <p>This is needed if the underlying data source can return the same record multiple times,
-   * such a queuing system with a pull-ack model.  Sources where the records read are uniquely
-   * identified by the persisted state in the CheckpointMark do not need this.
-   */
-  public boolean requiresDeduping() {
-    return false;
-  }
-
-  /**
-   * A marker representing the progress and state of an
-   * {@link com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader}.
-   *
-   * <p>For example, this could be offsets in a set of files being read.
-   */
-  public interface CheckpointMark {
-    /**
-     * Perform any finalization that needs to happen after a bundle of data read from
-     * the source has been processed and committed.
-     *
-     * <p>For example, this could be sending acknowledgement requests to an external
-     * data source such as Pub/Sub.
-     *
-     * <p>This may be called from any thread, potentially at the same time as calls to the
-     * {@code UnboundedReader} that created it.
-     */
-    void finalizeCheckpoint() throws IOException;
-  }
-
-  /**
-   * A {@code Reader} that reads an unbounded amount of input.
-   *
-   * <p>A given {@code UnboundedReader} object will only be accessed by a single thread at once.
-   */
-  @Experimental(Experimental.Kind.SOURCE_SINK)
-  public abstract static class UnboundedReader<OutputT> extends Source.Reader<OutputT> {
-    private static final byte[] EMPTY = new byte[0];
-
-    /**
-     * Initializes the reader and advances the reader to the first record.
-     *
-     * <p>This method should be called exactly once. The invocation should occur prior to calling
-     * {@link #advance} or {@link #getCurrent}. This method may perform expensive operations that
-     * are needed to initialize the reader.
-     *
-     * <p>Returns {@code true} if a record was read, {@code false} if there is no more input
-     * currently available.  Future calls to {@link #advance} may return {@code true} once more data
-     * is available. Regardless of the return value of {@code start}, {@code start} will not be
-     * called again on the same {@code UnboundedReader} object; it will only be called again when a
-     * new reader object is constructed for the same source, e.g. on recovery.
-     */
-    @Override
-    public abstract boolean start() throws IOException;
-
-    /**
-     * Advances the reader to the next valid record.
-     *
-     * <p>Returns {@code true} if a record was read, {@code false} if there is no more input
-     * available. Future calls to {@link #advance} may return {@code true} once more data is
-     * available.
-     */
-    @Override
-    public abstract boolean advance() throws IOException;
-
-    /**
-     * Returns a unique identifier for the current record.  This should be the same for each
-     * instance of the same logical record read from the underlying data source.
-     *
-     * <p>It is only necessary to override this if {@link #requiresDeduping} has been overridden to
-     * return true.
-     *
-     * <p>For example, this could be a hash of the record contents, or a logical ID present in
-     * the record.  If this is generated as a hash of the record contents, it should be at least 16
-     * bytes (128 bits) to avoid collisions.
-     *
-     * <p>This method has the same restrictions on when it can be called as {@link #getCurrent} and
-     * {@link #getCurrentTimestamp}.
-     *
-     * @throws NoSuchElementException if the reader is at the beginning of the input and
-     *         {@link #start} or {@link #advance} wasn't called, or if the last {@link #start} or
-     *         {@link #advance} returned {@code false}.
-     */
-    public byte[] getCurrentRecordId() throws NoSuchElementException {
-      if (getCurrentSource().requiresDeduping()) {
-        throw new IllegalStateException(
-            "getCurrentRecordId() must be overridden if requiresDeduping returns true()");
-      }
-      return EMPTY;
-    }
-
-    /**
-     * Returns a timestamp before or at the timestamps of all future elements read by this reader.
-     *
-     * <p>This can be approximate.  If records are read that violate this guarantee, they will be
-     * considered late, which will affect how they will be processed.  See
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} for more information on
-     * late data and how to handle it.
-     *
-     * <p>However, this value should be as late as possible. Downstream windows may not be able
-     * to close until this watermark passes their end.
-     *
-     * <p>For example, a source may know that the records it reads will be in timestamp order.  In
-     * this case, the watermark can be the timestamp of the last record read.  For a
-     * source that does not have natural timestamps, timestamps can be set to the time of
-     * reading, in which case the watermark is the current clock time.
-     *
-     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} and
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for more
-     * information on timestamps and watermarks.
-     *
-     * <p>May be called after {@link #advance} or {@link #start} has returned false, but not before
-     * {@link #start} has been called.
-     */
-    public abstract Instant getWatermark();
-
-    /**
-     * Returns a {@link CheckpointMark} representing the progress of this {@code UnboundedReader}.
-     *
-     * <p>The elements read up until this is called will be processed together as a bundle. Once
-     * the result of this processing has been durably committed,
-     * {@link CheckpointMark#finalizeCheckpoint} will be called on the {@link CheckpointMark}
-     * object.
-     *
-     * <p>The returned object should not be modified.
-     *
-     * <p>May be called after {@link #advance} or {@link #start} has returned false, but not before
-     * {@link #start} has been called.
-     */
-    public abstract CheckpointMark getCheckpointMark();
-
-    /**
-     * Constant representing an unknown amount of backlog.
-     */
-    public static final long BACKLOG_UNKNOWN = -1L;
-
-    /**
-     * Returns the size of the backlog of unread data in the underlying data source represented by
-     * this split of this source.
-     *
-     * <p>One of this or {@link #getTotalBacklogBytes} should be overridden in order to allow the
-     * runner to scale the amount of resources allocated to the pipeline.
-     */
-    public long getSplitBacklogBytes() {
-      return BACKLOG_UNKNOWN;
-    }
-
-    /**
-     * Returns the size of the backlog of unread data in the underlying data source represented by
-     * all splits of this source.
-     *
-     * <p>One of this or {@link #getSplitBacklogBytes} should be overridden in order to allow the
-     * runner to scale the amount of resources allocated to the pipeline.
-     */
-    public long getTotalBacklogBytes() {
-      return BACKLOG_UNKNOWN;
-    }
-
-    /**
-     * Returns the {@link UnboundedSource} that created this reader.  This will not change over the
-     * life of the reader.
-     */
-    @Override
-    public abstract UnboundedSource<OutputT, ?> getCurrentSource();
-  }
-}

[50/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/pom.xml
----------------------------------------------------------------------
diff --git a/sdk/pom.xml b/sdk/pom.xml
deleted file mode 100644
index 13fe950..0000000
--- a/sdk/pom.xml
+++ /dev/null
@@ -1,771 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>parent</artifactId>
-    <version>0.1.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>java-sdk-all</artifactId>
-  <name>Apache Beam :: SDK :: Java All</name>
-  <description>Beam SDK Java All provides a simple, Java-based
-  interface for processing virtually any size data. This
-  artifact includes entire Apache Beam Java SDK.</description>
-
-  <packaging>jar</packaging>
-
-  <properties>
-    <timestamp>${maven.build.timestamp}</timestamp>
-    <maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format>
-    <dataflow>com.google.cloud.dataflow</dataflow>
-    <runIntegrationTestOnService>false</runIntegrationTestOnService>
-    <testParallelValue>none</testParallelValue>
-    <testGroups></testGroups>
-    <dataflowProjectName></dataflowProjectName>
-  </properties>
-
-  <profiles>
-    <profile>
-      <id>DataflowPipelineTests</id>
-      <properties>
-        <runIntegrationTestOnService>true</runIntegrationTestOnService>
-        <testGroups>com.google.cloud.dataflow.sdk.testing.RunnableOnService</testGroups>
-        <testParallelValue>both</testParallelValue>
-      </properties>
-    </profile>
-  </profiles>
-
-  <build>
-    <resources>
-      <resource>
-        <directory>src/main/resources</directory>
-        <filtering>true</filtering>
-      </resource>
-    </resources>
-
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals><goal>analyze-only</goal></goals>
-            <configuration>
-              <failOnWarning>true</failOnWarning>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Run CheckStyle pass on transforms, as they are release in
-           source form. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>2.12</version>
-        <dependencies>
-          <dependency>
-            <groupId>com.puppycrawl.tools</groupId>
-            <artifactId>checkstyle</artifactId>
-            <version>6.6</version>
-          </dependency>
-        </dependencies>
-        <configuration>
-          <configLocation>../checkstyle.xml</configLocation>
-          <consoleOutput>true</consoleOutput>
-          <failOnViolation>true</failOnViolation>
-          <includeResources>false</includeResources>
-          <includeTestSourceDirectory>true</includeTestSourceDirectory>
-          <excludes>${project.build.directory}/generated-test-sources/**</excludes>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>check</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>default-jar</id>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>default-test-jar</id>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Source plugin for generating source and test-source JARs. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <version>2.4</version>
-        <executions>
-          <execution>
-            <id>attach-sources</id>
-            <phase>compile</phase>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>attach-test-sources</id>
-            <phase>test-compile</phase>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-javadoc-plugin</artifactId>
-        <configuration>
-          <windowtitle>Google Cloud Dataflow SDK ${project.version} API</windowtitle>
-          <doctitle>Google Cloud Dataflow SDK for Java, version ${project.version}</doctitle>
-          <overview>../javadoc/overview.html</overview>
-
-          <subpackages>com.google.cloud.dataflow.sdk</subpackages>
-          <additionalparam>-exclude com.google.cloud.dataflow.sdk.runners.worker:com.google.cloud.dataflow.sdk.runners.dataflow:com.google.cloud.dataflow.sdk.util:com.google.cloud.dataflow.sdk.runners.inprocess ${dataflow.javadoc_opts}</additionalparam>
-          <use>false</use>
-          <quiet>true</quiet>
-          <bottom><![CDATA[<br>]]></bottom>
-
-          <offlineLinks>
-            <offlineLink>
-              <url>https://developers.google.com/api-client-library/java/google-api-java-client/reference/1.20.0/</url>
-              <location>${basedir}/../javadoc/apiclient-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://avro.apache.org/docs/1.7.7/api/java/</url>
-              <location>${basedir}/../javadoc/avro-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/</url>
-              <location>${basedir}/../javadoc/bq-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>https://cloud.google.com/datastore/docs/apis/javadoc/</url>
-              <location>${basedir}/../javadoc/datastore-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://docs.guava-libraries.googlecode.com/git-history/release19/javadoc/</url>
-              <location>${basedir}/../javadoc/guava-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://hamcrest.org/JavaHamcrest/javadoc/1.3/</url>
-              <location>${basedir}/../javadoc/hamcrest-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://fasterxml.github.io/jackson-annotations/javadoc/2.7/</url>
-              <location>${basedir}/../javadoc/jackson-annotations-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://fasterxml.github.io/jackson-databind/javadoc/2.7/</url>
-              <location>${basedir}/../javadoc/jackson-databind-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://www.joda.org/joda-time/apidocs</url>
-              <location>${basedir}/../javadoc/joda-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>http://junit.sourceforge.net/javadoc/</url>
-              <location>${basedir}/../javadoc/junit-docs</location>
-            </offlineLink>
-            <offlineLink>
-              <url>https://developers.google.com/api-client-library/java/google-oauth-java-client/reference/1.20.0/</url>
-              <location>${basedir}/../javadoc/oauth-docs</location>
-            </offlineLink>
-          </offlineLinks>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>jar</goal>
-            </goals>
-            <phase>package</phase>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <version>2.4.1</version>
-        <executions>
-          <!-- In the first phase, we pick dependencies and relocate them. -->
-          <execution>
-            <id>bundle-and-repackage</id>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <shadeTestJar>true</shadeTestJar>
-              <artifactSet>
-                <includes>
-                  <include>com.google.cloud.bigtable:bigtable-client-core</include>
-                  <include>com.google.guava:guava</include>
-                </includes>
-              </artifactSet>
-              <filters>
-                <filter>
-                  <artifact>*:*</artifact>
-                  <excludes>
-                    <exclude>META-INF/*.SF</exclude>
-                    <exclude>META-INF/*.DSA</exclude>
-                    <exclude>META-INF/*.RSA</exclude>
-                  </excludes>
-                </filter>
-              </filters>
-              <relocations>
-                <!-- TODO: Once ready, change the following pattern to 'com'
-                     only, exclude 'com.google.cloud.dataflow.**', and remove
-                     the second relocation. -->
-                <relocation>
-                  <pattern>com.google.common</pattern>
-                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.common</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>com.google.thirdparty</pattern>
-                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.thirdparty</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>com.google.cloud.bigtable</pattern>
-                  <shadedPattern>com.google.cloud.dataflow.sdk.repackaged.com.google.cloud.bigtable</shadedPattern>
-                  <excludes>
-                    <exclude>com.google.cloud.bigtable.config.BigtableOptions*</exclude>
-                    <exclude>com.google.cloud.bigtable.config.CredentialOptions*</exclude>
-                    <exclude>com.google.cloud.bigtable.config.RetryOptions*</exclude>
-                    <exclude>com.google.cloud.bigtable.grpc.BigtableClusterName</exclude>
-                    <exclude>com.google.cloud.bigtable.grpc.BigtableTableName</exclude>
-                  </excludes>
-                </relocation>
-              </relocations>
-            </configuration>
-          </execution>
-
-          <!-- In the second phase, we pick remaining dependencies and bundle
-               them without repackaging. -->
-          <execution>
-            <id>bundle-rest-without-repackaging</id>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <shadeTestJar>true</shadeTestJar>
-              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
-              <artifactSet>
-                <excludes>
-                  <exclude>com.google.cloud.bigtable:bigtable-client-core</exclude>
-                  <exclude>com.google.guava:guava</exclude>
-                </excludes>
-              </artifactSet>
-              <filters>
-                <filter>
-                  <artifact>*:*</artifact>
-                  <excludes>
-                    <exclude>META-INF/*.SF</exclude>
-                    <exclude>META-INF/*.DSA</exclude>
-                    <exclude>META-INF/*.RSA</exclude>
-                  </excludes>
-                </filter>
-              </filters>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- Coverage analysis for unit tests. -->
-      <plugin>
-        <groupId>org.jacoco</groupId>
-        <artifactId>jacoco-maven-plugin</artifactId>
-      </plugin>
-
-      <!-- Avro plugin for automatic code generation -->
-      <plugin>
-        <groupId>org.apache.avro</groupId>
-        <artifactId>avro-maven-plugin</artifactId>
-        <version>${avro.version}</version>
-        <executions>
-          <execution>
-            <id>schemas</id>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>schema</goal>
-            </goals>
-            <configuration>
-              <testSourceDirectory>${project.basedir}/src/test/</testSourceDirectory>
-              <testOutputDirectory>${project.build.directory}/generated-test-sources/java</testOutputDirectory>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- This plugin tells Maven about an additional test-source directory to
-           build, which contains Avro-generated source files. This is not
-           strictly needed for the regular Maven build, but helps certain IDEs
-           automatically find and compile generated code. -->
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>build-helper-maven-plugin</artifactId>
-        <version>1.9.1</version>
-        <executions>
-          <execution>
-            <id>add-test-source</id>
-            <phase>generate-test-sources</phase>
-            <goals>
-              <goal>add-test-source</goal>
-            </goals>
-            <configuration>
-              <sources>
-                <source>${project.build.directory}/generated-test-sources/java</source>
-              </sources>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-dataflow</artifactId>
-      <version>${dataflow.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>io.grpc</groupId>
-      <artifactId>grpc-all</artifactId>
-      <version>0.12.0</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.cloud.bigtable</groupId>
-      <artifactId>bigtable-protos</artifactId>
-      <version>${bigtable.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.cloud.bigtable</groupId>
-      <artifactId>bigtable-client-core</artifactId>
-      <version>${bigtable.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.api-client</groupId>
-      <artifactId>google-api-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-bigquery</artifactId>
-      <version>${bigquery.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-clouddebugger</artifactId>
-      <version>${clouddebugger.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-pubsub</artifactId>
-      <version>${pubsub.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-storage</artifactId>
-      <version>${storage.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <!-- Required by com.google.apis:google-api-services-datastore-protobuf,
-         but the version they depend on differs from our api-client versions -->
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client-jackson</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-        <!--  Exclude an old version of jackson-core-asl -->
-        <exclusion>
-           <groupId>org.codehaus.jackson</groupId>
-           <artifactId>jackson-core-asl</artifactId>
-        </exclusion>
-      </exclusions>
-      <scope>runtime</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client-jackson2</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client-protobuf</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-      <scope>runtime</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.oauth-client</groupId>
-      <artifactId>google-oauth-client-java6</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.oauth-client</groupId>
-      <artifactId>google-oauth-client</artifactId>
-      <version>${google-clients.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-datastore-protobuf</artifactId>
-      <version>${datastore.version}</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-        <!-- Exclude old version of api client dependencies. -->
-        <exclusion>
-          <groupId>com.google.http-client</groupId>
-          <artifactId>google-http-client</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.google.api-client</groupId>
-          <artifactId>google-api-client</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.google.oauth-client</groupId>
-          <artifactId>google-oauth-client</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.google.http-client</groupId>
-          <artifactId>google-http-client-jackson</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.google.http-client</groupId>
-          <artifactId>google-http-client-protobuf</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.cloud.bigdataoss</groupId>
-      <artifactId>gcsio</artifactId>
-      <version>1.4.3</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.cloud.bigdataoss</groupId>
-      <artifactId>util</artifactId>
-      <version>1.4.3</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <!-- If updating version, please update the javadoc offlineLink -->
-      <version>${guava.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava-testlib</artifactId>
-      <version>${guava.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-      <version>${protobuf.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-      <version>${jsr305.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-core</artifactId>
-      <version>${jackson.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-annotations</artifactId>
-      <version>${jackson.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-databind</artifactId>
-      <version>${jackson.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <version>${slf4j.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-      <version>${avro.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.xerial.snappy</groupId>
-      <artifactId>snappy-java</artifactId>
-      <version>1.1.2.1</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-compress</artifactId>
-      <version>1.9</version>
-    </dependency>
-
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>${joda.version}</version>
-    </dependency>
-
-    <!--
-    To use com.google.cloud.dataflow.io.XmlSource:
-
-    1. Explicitly declare the following dependency for the stax2 API.
-    2. Include a stax2 implementation on the classpath. One example
-       is given below as an optional runtime dependency on woodstox-core-asl
-    -->
-    <dependency>
-      <groupId>org.codehaus.woodstox</groupId>
-      <artifactId>stax2-api</artifactId>
-      <version>${stax2.version}</version>
-      <optional>true</optional>
-    </dependency>
-
-    <dependency>
-      <groupId>org.codehaus.woodstox</groupId>
-      <artifactId>woodstox-core-asl</artifactId>
-      <version>${woodstox.version}</version>
-      <scope>runtime</scope>
-      <optional>true</optional>
-      <exclusions>
-        <!-- javax.xml.stream:stax-api is included in JDK 1.6+ -->
-        <exclusion>
-          <groupId>javax.xml.stream</groupId>
-          <artifactId>stax-api</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <!--
-    To use com.google.cloud.dataflow.io.AvroSource with XZ-encoded files,
-    please explicitly declare this dependency to include org.tukaani:xz on
-    the classpath at runtime.
-    -->
-    <dependency>
-      <groupId>org.tukaani</groupId>
-      <artifactId>xz</artifactId>
-      <version>1.5</version>
-      <scope>runtime</scope>
-      <optional>true</optional>
-    </dependency>
-
-    <!-- build dependencies -->
-    <dependency>
-      <groupId>com.google.auto.service</groupId>
-      <artifactId>auto-service</artifactId>
-      <version>1.0-rc2</version>
-      <optional>true</optional>
-    </dependency>
-
-    <!-- test dependencies -->
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <version>${hamcrest.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>${junit.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>${slf4j.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <version>1.10.19</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.cloud.dataflow</groupId>
-      <artifactId>google-cloud-dataflow-java-proto-library-all</artifactId>
-      <version>0.5.160304</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
deleted file mode 100644
index b166673..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/Pipeline.java
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk;
-
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.TransformHierarchy;
-import com.google.cloud.dataflow.sdk.runners.TransformTreeNode;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-/**
- * A {@link Pipeline} manages a directed acyclic graph of {@link PTransform PTransforms}, and the
- * {@link PCollection PCollections} that the {@link PTransform}s consume and produce.
- *
- * <p>A {@link Pipeline} is initialized with a {@link PipelineRunner} that will later
- * execute the {@link Pipeline}.
- *
- * <p>{@link Pipeline Pipelines} are independent, so they can be constructed and executed
- * concurrently.
- *
- * <p>Each {@link Pipeline} is self-contained and isolated from any other
- * {@link Pipeline}. The {@link PValue PValues} that are inputs and outputs of each of a
- * {@link Pipeline Pipeline's} {@link PTransform PTransforms} are also owned by that
- * {@link Pipeline}. A {@link PValue} owned by one {@link Pipeline} can be read only by
- * {@link PTransform PTransforms} also owned by that {@link Pipeline}.
- *
- * <p>Here is a typical example of use:
- * <pre> {@code
- * // Start by defining the options for the pipeline.
- * PipelineOptions options = PipelineOptionsFactory.create();
- * // Then create the pipeline. The runner is determined by the options.
- * Pipeline p = Pipeline.create(options);
- *
- * // A root PTransform, like TextIO.Read or Create, gets added
- * // to the Pipeline by being applied:
- * PCollection<String> lines =
- *     p.apply(TextIO.Read.from("gs://bucket/dir/file*.txt"));
- *
- * // A Pipeline can have multiple root transforms:
- * PCollection<String> moreLines =
- *     p.apply(TextIO.Read.from("gs://bucket/other/dir/file*.txt"));
- * PCollection<String> yetMoreLines =
- *     p.apply(Create.of("yet", "more", "lines").withCoder(StringUtf8Coder.of()));
- *
- * // Further PTransforms can be applied, in an arbitrary (acyclic) graph.
- * // Subsequent PTransforms (and intermediate PCollections etc.) are
- * // implicitly part of the same Pipeline.
- * PCollection<String> allLines =
- *     PCollectionList.of(lines).and(moreLines).and(yetMoreLines)
- *     .apply(new Flatten<String>());
- * PCollection<KV<String, Integer>> wordCounts =
- *     allLines
- *     .apply(ParDo.of(new ExtractWords()))
- *     .apply(new Count<String>());
- * PCollection<String> formattedWordCounts =
- *     wordCounts.apply(ParDo.of(new FormatCounts()));
- * formattedWordCounts.apply(TextIO.Write.to("gs://bucket/dir/counts.txt"));
- *
- * // PTransforms aren't executed when they're applied, rather they're
- * // just added to the Pipeline.  Once the whole Pipeline of PTransforms
- * // is constructed, the Pipeline's PTransforms can be run using a
- * // PipelineRunner.  The default PipelineRunner executes the Pipeline
- * // directly, sequentially, in this one process, which is useful for
- * // unit tests and simple experiments:
- * p.run();
- *
- * } </pre>
- */
-public class Pipeline {
-  private static final Logger LOG = LoggerFactory.getLogger(Pipeline.class);
-
-  /**
-   * Thrown during execution of a {@link Pipeline}, whenever user code within that
-   * {@link Pipeline} throws an exception.
-   *
-   * <p>The original exception thrown by user code may be retrieved via {@link #getCause}.
-   */
-  public static class PipelineExecutionException extends RuntimeException {
-    /**
-     * Wraps {@code cause} into a {@link PipelineExecutionException}.
-     */
-    public PipelineExecutionException(Throwable cause) {
-      super(cause);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Public operations.
-
-  /**
-   * Constructs a pipeline from the provided options.
-   *
-   * @return The newly created pipeline.
-   */
-  public static Pipeline create(PipelineOptions options) {
-    Pipeline pipeline = new Pipeline(PipelineRunner.fromOptions(options), options);
-    LOG.debug("Creating {}", pipeline);
-    return pipeline;
-  }
-
-  /**
-   * Returns a {@link PBegin} owned by this Pipeline.  This is useful
-   * as the input of a root PTransform such as {@link Read} or
-   * {@link Create}.
-   */
-  public PBegin begin() {
-    return PBegin.in(this);
-  }
-
-  /**
-   * Like {@link #apply(String, PTransform)} but the transform node in the {@link Pipeline}
-   * graph will be named according to {@link PTransform#getName}.
-   *
-   * @see #apply(String, PTransform)
-   */
-  public <OutputT extends POutput> OutputT apply(
-      PTransform<? super PBegin, OutputT> root) {
-    return begin().apply(root);
-  }
-
-  /**
-   * Adds a root {@link PTransform}, such as {@link Read} or {@link Create},
-   * to this {@link Pipeline}.
-   *
-   * <p>The node in the {@link Pipeline} graph will use the provided {@code name}.
-   * This name is used in various places, including the monitoring UI, logging,
-   * and to stably identify this node in the {@link Pipeline} graph upon update.
-   *
-   * <p>Alias for {@code begin().apply(name, root)}.
-   */
-  public <OutputT extends POutput> OutputT apply(
-      String name, PTransform<? super PBegin, OutputT> root) {
-    return begin().apply(name, root);
-  }
-
-  /**
-   * Runs the {@link Pipeline} using its {@link PipelineRunner}.
-   */
-  public PipelineResult run() {
-    LOG.debug("Running {} via {}", this, runner);
-    try {
-      return runner.run(this);
-    } catch (UserCodeException e) {
-      // This serves to replace the stack with one that ends here and
-      // is caused by the caught UserCodeException, thereby splicing
-      // out all the stack frames in between the PipelineRunner itself
-      // and where the worker calls into the user's code.
-      throw new PipelineExecutionException(e.getCause());
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Below here are operations that aren't normally called by users.
-
-  /**
-   * Returns the {@link CoderRegistry} that this {@link Pipeline} uses.
-   */
-  public CoderRegistry getCoderRegistry() {
-    if (coderRegistry == null) {
-      coderRegistry = new CoderRegistry();
-      coderRegistry.registerStandardCoders();
-    }
-    return coderRegistry;
-  }
-
-  /**
-   * Sets the {@link CoderRegistry} that this {@link Pipeline} uses.
-   */
-  public void setCoderRegistry(CoderRegistry coderRegistry) {
-    this.coderRegistry = coderRegistry;
-  }
-
-  /**
-   * A {@link PipelineVisitor} can be passed into
-   * {@link Pipeline#traverseTopologically} to be called for each of the
-   * transforms and values in the {@link Pipeline}.
-   */
-  public interface PipelineVisitor {
-    /**
-     * Called for each composite transform after all topological predecessors have been visited
-     * but before any of its component transforms.
-     */
-    public void enterCompositeTransform(TransformTreeNode node);
-
-    /**
-     * Called for each composite transform after all of its component transforms and their outputs
-     * have been visited.
-     */
-    public void leaveCompositeTransform(TransformTreeNode node);
-
-    /**
-     * Called for each primitive transform after all of its topological predecessors
-     * and inputs have been visited.
-     */
-    public void visitTransform(TransformTreeNode node);
-
-    /**
-     * Called for each value after the transform that produced the value has been
-     * visited.
-     */
-    public void visitValue(PValue value, TransformTreeNode producer);
-  }
-
-  /**
-   * Invokes the {@link PipelineVisitor PipelineVisitor's}
-   * {@link PipelineVisitor#visitTransform} and
-   * {@link PipelineVisitor#visitValue} operations on each of this
-   * {@link Pipeline Pipeline's} transform and value nodes, in forward
-   * topological order.
-   *
-   * <p>Traversal of the {@link Pipeline} causes {@link PTransform PTransforms} and
-   * {@link PValue PValues} owned by the {@link Pipeline} to be marked as finished,
-   * at which point they may no longer be modified.
-   *
-   * <p>Typically invoked by {@link PipelineRunner} subclasses.
-   */
-  public void traverseTopologically(PipelineVisitor visitor) {
-    Set<PValue> visitedValues = new HashSet<>();
-    // Visit all the transforms, which should implicitly visit all the values.
-    transforms.visit(visitor, visitedValues);
-    if (!visitedValues.containsAll(values)) {
-      throw new RuntimeException(
-          "internal error: should have visited all the values "
-          + "after visiting all the transforms");
-    }
-  }
-
-  /**
-   * Like {@link #applyTransform(String, PInput, PTransform)} but defaulting to the name
-   * provided by the {@link PTransform}.
-   */
-  public static <InputT extends PInput, OutputT extends POutput>
-  OutputT applyTransform(InputT input,
-      PTransform<? super InputT, OutputT> transform) {
-    return input.getPipeline().applyInternal(transform.getName(), input, transform);
-  }
-
-  /**
-   * Applies the given {@code PTransform} to this input {@code InputT} and returns
-   * its {@code OutputT}. This uses {@code name} to identify this specific application
-   * of the transform. This name is used in various places, including the monitoring UI,
-   * logging, and to stably identify this application node in the {@link Pipeline} graph during
-   * update.
-   *
-   * <p>Each {@link PInput} subclass that provides an {@code apply} method should delegate to
-   * this method to ensure proper registration with the {@link PipelineRunner}.
-   */
-  public static <InputT extends PInput, OutputT extends POutput>
-  OutputT applyTransform(String name, InputT input,
-      PTransform<? super InputT, OutputT> transform) {
-    return input.getPipeline().applyInternal(name, input, transform);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-  // Below here are internal operations, never called by users.
-
-  private final PipelineRunner<?> runner;
-  private final PipelineOptions options;
-  private final TransformHierarchy transforms = new TransformHierarchy();
-  private Collection<PValue> values = new ArrayList<>();
-  private Set<String> usedFullNames = new HashSet<>();
-  private CoderRegistry coderRegistry;
-  private Multimap<PTransform<?, ?>, AppliedPTransform<?, ?, ?>> transformApplicationsForTesting =
-      HashMultimap.create();
-
-  /**
-   * @deprecated replaced by {@link #Pipeline(PipelineRunner, PipelineOptions)}
-   */
-  @Deprecated
-  protected Pipeline(PipelineRunner<?> runner) {
-    this(runner, PipelineOptionsFactory.create());
-  }
-
-  protected Pipeline(PipelineRunner<?> runner, PipelineOptions options) {
-    this.runner = runner;
-    this.options = options;
-  }
-
-  @Override
-  public String toString() {
-    return "Pipeline#" + hashCode();
-  }
-
-  /**
-   * Applies a {@link PTransform} to the given {@link PInput}.
-   *
-   * @see Pipeline#apply
-   */
-  private <InputT extends PInput, OutputT extends POutput>
-  OutputT applyInternal(String name, InputT input,
-      PTransform<? super InputT, OutputT> transform) {
-    input.finishSpecifying();
-
-    TransformTreeNode parent = transforms.getCurrent();
-    String namePrefix = parent.getFullName();
-    String fullName = uniquifyInternal(namePrefix, name);
-
-    boolean nameIsUnique = fullName.equals(buildName(namePrefix, name));
-
-    if (!nameIsUnique) {
-      switch (getOptions().getStableUniqueNames()) {
-        case OFF:
-          break;
-        case WARNING:
-          LOG.warn("Transform {} does not have a stable unique name. "
-              + "This will prevent updating of pipelines.", fullName);
-          break;
-        case ERROR:
-          throw new IllegalStateException(
-              "Transform " + fullName + " does not have a stable unique name. "
-              + "This will prevent updating of pipelines.");
-        default:
-          throw new IllegalArgumentException(
-              "Unrecognized value for stable unique names: " + getOptions().getStableUniqueNames());
-      }
-    }
-
-    TransformTreeNode child =
-        new TransformTreeNode(parent, transform, fullName, input);
-    parent.addComposite(child);
-
-    transforms.addInput(child, input);
-
-    LOG.debug("Adding {} to {}", transform, this);
-    try {
-      transforms.pushNode(child);
-      transform.validate(input);
-      OutputT output = runner.apply(transform, input);
-      transforms.setOutput(child, output);
-
-      AppliedPTransform<?, ?, ?> applied = AppliedPTransform.of(
-          child.getFullName(), input, output, transform);
-      transformApplicationsForTesting.put(transform, applied);
-      // recordAsOutput is a NOOP if already called;
-      output.recordAsOutput(applied);
-      verifyOutputState(output, child);
-      return output;
-    } finally {
-      transforms.popNode();
-    }
-  }
-
-  /**
-   * Returns all producing transforms for the {@link PValue PValues} contained
-   * in {@code output}.
-   */
-  private List<AppliedPTransform<?, ?, ?>> getProducingTransforms(POutput output) {
-    List<AppliedPTransform<?, ?, ?>> producingTransforms = new ArrayList<>();
-    for (PValue value : output.expand()) {
-      AppliedPTransform<?, ?, ?> transform = value.getProducingTransformInternal();
-      if (transform != null) {
-        producingTransforms.add(transform);
-      }
-    }
-    return producingTransforms;
-  }
-
-  /**
-   * Verifies that the output of a {@link PTransform} is correctly configured in its
-   * {@link TransformTreeNode} in the {@link Pipeline} graph.
-   *
-   * <p>A non-composite {@link PTransform} must have all
-   * of its outputs registered as produced by that {@link PTransform}.
-   *
-   * <p>A composite {@link PTransform} must have all of its outputs
-   * registered as produced by the contained primitive {@link PTransform PTransforms}.
-   * They have each had the above check performed already, when
-   * they were applied, so the only possible failure state is
-   * that the composite {@link PTransform} has returned a primitive output.
-   */
-  private void verifyOutputState(POutput output, TransformTreeNode node) {
-    if (!node.isCompositeNode()) {
-      PTransform<?, ?> thisTransform = node.getTransform();
-      List<AppliedPTransform<?, ?, ?>> producingTransforms = getProducingTransforms(output);
-      for (AppliedPTransform<?, ?, ?> producingTransform : producingTransforms) {
-        // Using != because object identity indicates that the transforms
-        // are the same node in the pipeline
-        if (thisTransform != producingTransform.getTransform()) {
-          throw new IllegalArgumentException("Output of non-composite transform "
-              + thisTransform + " is registered as being produced by"
-              + " a different transform: " + producingTransform);
-        }
-      }
-    } else {
-      PTransform<?, ?> thisTransform = node.getTransform();
-      List<AppliedPTransform<?, ?, ?>> producingTransforms = getProducingTransforms(output);
-      for (AppliedPTransform<?, ?, ?> producingTransform : producingTransforms) {
-        // Using == because object identity indicates that the transforms
-        // are the same node in the pipeline
-        if (thisTransform == producingTransform.getTransform()) {
-          throw new IllegalStateException("Output of composite transform "
-              + thisTransform + " is registered as being produced by it,"
-              + " but the output of every composite transform should be"
-              + " produced by a primitive transform contained therein.");
-        }
-      }
-    }
-  }
-
-  /**
-   * Returns the configured {@link PipelineRunner}.
-   */
-  public PipelineRunner<?> getRunner() {
-    return runner;
-  }
-
-  /**
-   * Returns the configured {@link PipelineOptions}.
-   */
-  public PipelineOptions getOptions() {
-    return options;
-  }
-
-  /**
-   * @deprecated this method is no longer compatible with the design of {@link Pipeline},
-   * as {@link PTransform PTransforms} can be applied multiple times, with different names
-   * each time.
-   */
-  @Deprecated
-  public String getFullNameForTesting(PTransform<?, ?> transform) {
-    Collection<AppliedPTransform<?, ?, ?>> uses =
-        transformApplicationsForTesting.get(transform);
-    Preconditions.checkState(uses.size() > 0, "Unknown transform: " + transform);
-    Preconditions.checkState(uses.size() <= 1, "Transform used multiple times: " + transform);
-    return Iterables.getOnlyElement(uses).getFullName();
-  }
-
-  /**
-   * Returns a unique name for a transform with the given prefix (from
-   * enclosing transforms) and initial name.
-   *
-   * <p>For internal use only.
-   */
-  private String uniquifyInternal(String namePrefix, String origName) {
-    String name = origName;
-    int suffixNum = 2;
-    while (true) {
-      String candidate = buildName(namePrefix, name);
-      if (usedFullNames.add(candidate)) {
-        return candidate;
-      }
-      // A duplicate!  Retry.
-      name = origName + suffixNum++;
-    }
-  }
-
-  /**
-   * Builds a name from a "/"-delimited prefix and a name.
-   */
-  private String buildName(String namePrefix, String name) {
-    return namePrefix.isEmpty() ? name : namePrefix + "/" + name;
-  }
-
-  /**
-   * Adds the given {@link PValue} to this {@link Pipeline}.
-   *
-   * <p>For internal use only.
-   */
-  public void addValueInternal(PValue value) {
-    this.values.add(value);
-    LOG.debug("Adding {} to {}", value, this);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
deleted file mode 100644
index 6b9a36b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/PipelineResult.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk;
-
-import com.google.cloud.dataflow.sdk.runners.AggregatorRetrievalException;
-import com.google.cloud.dataflow.sdk.runners.AggregatorValues;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-
-/**
- * Result of {@link Pipeline#run()}.
- */
-public interface PipelineResult {
-
-  /**
-   * Retrieves the current state of the pipeline execution.
-   *
-   * @return the {@link State} representing the state of this pipeline.
-   */
-  State getState();
-
-  /**
-   * Retrieves the current value of the provided {@link Aggregator}.
-   *
-   * @param aggregator the {@link Aggregator} to retrieve values for.
-   * @return the current values of the {@link Aggregator},
-   * which may be empty if there are no values yet.
-   * @throws AggregatorRetrievalException if the {@link Aggregator} values could not be retrieved.
-   */
-  <T> AggregatorValues<T> getAggregatorValues(Aggregator<?, T> aggregator)
-      throws AggregatorRetrievalException;
-
-  // TODO: method to retrieve error messages.
-
-  /** Named constants for common values for the job state. */
-  public enum State {
-
-    /** The job state could not be obtained or was not specified. */
-    UNKNOWN(false, false),
-
-    /** The job has been paused, or has not yet started. */
-    STOPPED(false, false),
-
-    /** The job is currently running. */
-    RUNNING(false, false),
-
-    /** The job has successfully completed. */
-    DONE(true, false),
-
-    /** The job has failed. */
-    FAILED(true, false),
-
-    /** The job has been explicitly cancelled. */
-    CANCELLED(true, false),
-
-    /** The job has been updated. */
-    UPDATED(true, true);
-
-    private final boolean terminal;
-
-    private final boolean hasReplacement;
-
-    private State(boolean terminal, boolean hasReplacement) {
-      this.terminal = terminal;
-      this.hasReplacement = hasReplacement;
-    }
-
-    /**
-     * @return {@code true} if the job state can no longer complete work.
-     */
-    public final boolean isTerminal() {
-      return terminal;
-    }
-
-    /**
-     * @return {@code true} if this job state indicates that a replacement job exists.
-     */
-    public final boolean hasReplacementJob() {
-      return hasReplacement;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
deleted file mode 100644
index cac2aa8..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/Experimental.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.annotations;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-/**
- * Signifies that a public API (public class, method or field) is subject to
- * incompatible changes, or even removal, in a future release. An API bearing
- * this annotation is exempt from any compatibility guarantees made by its
- * containing library. Note that the presence of this annotation implies nothing
- * about the quality or performance of the API in question, only the fact that
- * it is not "API-frozen."
- *
- * <p>It is generally safe for <i>applications</i> to depend on experimental
- * APIs, at the cost of some extra work during upgrades. However, it is
- * generally inadvisable for <i>libraries</i> (which get included on users'
- * class paths, outside the library developers' control) to do so.
- */
-@Retention(RetentionPolicy.CLASS)
-@Target({
-    ElementType.ANNOTATION_TYPE,
-    ElementType.CONSTRUCTOR,
-    ElementType.FIELD,
-    ElementType.METHOD,
-    ElementType.TYPE})
-@Documented
-public @interface Experimental {
-  public Kind value() default Kind.UNSPECIFIED;
-
-  /**
-   * An enumeration of various kinds of experimental APIs.
-   */
-  public enum Kind {
-    /** Generic group of experimental APIs. This is the default value. */
-    UNSPECIFIED,
-
-    /** Sources and sinks related experimental APIs. */
-    SOURCE_SINK,
-
-    /** Auto-scaling related experimental APIs. */
-    AUTOSCALING,
-
-    /** Trigger-related experimental APIs. */
-    TRIGGER,
-
-    /** Aggregator-related experimental APIs. */
-    AGGREGATOR,
-
-    /** Experimental APIs for Coder binary format identifiers. */
-    CODER_ENCODING_ID,
-
-    /** State-related experimental APIs. */
-    STATE,
-
-    /** Timer-related experimental APIs. */
-    TIMERS,
-
-    /** Experimental APIs related to customizing the output time for computed values. */
-    OUTPUT_TIME
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java
deleted file mode 100644
index 6c224a6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/annotations/package-info.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines annotations used across the SDK.
- */
-package com.google.cloud.dataflow.sdk.annotations;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
deleted file mode 100644
index c4951b4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AtomicCoder.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import java.util.Collections;
-import java.util.List;
-
-/**
- * A {@link Coder} that has no component {@link Coder Coders} or other state.
- *
- * <p>Note that, unless the behavior is overridden, atomic coders are presumed to be deterministic
- * and all instances are considered equal.
- *
- * @param <T> the type of the values being transcoded
- */
-public abstract class AtomicCoder<T> extends DeterministicStandardCoder<T> {
-  protected AtomicCoder() { }
-
-  @Override
-  public List<Coder<?>> getCoderArguments() {
-    return null;
-  }
-
-  /**
-   * Returns a list of values contained in the provided example
-   * value, one per type parameter. If there are no type parameters,
-   * returns an empty list.
-   *
-   * <p>Because {@link AtomicCoder} has no components, always returns an empty list.
-   *
-   * @param exampleValue unused, but part of the latent interface expected by
-   * {@link CoderFactories#fromStaticMethods}
-   */
-  public static <T> List<Object> getInstanceComponents(T exampleValue) {
-    return Collections.emptyList();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
deleted file mode 100644
index 91efb43..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/coders/AvroCoder.java
+++ /dev/null
@@ -1,714 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addString;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.BinaryEncoder;
-import org.apache.avro.io.DatumReader;
-import org.apache.avro.io.DatumWriter;
-import org.apache.avro.io.DecoderFactory;
-import org.apache.avro.io.EncoderFactory;
-import org.apache.avro.reflect.AvroEncode;
-import org.apache.avro.reflect.AvroName;
-import org.apache.avro.reflect.AvroSchema;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.avro.reflect.ReflectDatumReader;
-import org.apache.avro.reflect.ReflectDatumWriter;
-import org.apache.avro.reflect.Union;
-import org.apache.avro.specific.SpecificData;
-import org.apache.avro.util.ClassUtils;
-import org.apache.avro.util.Utf8;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.lang.reflect.Field;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link Coder} using Avro binary format.
- *
- * <p>Each instance of {@code AvroCoder<T>} encapsulates an Avro schema for objects of type
- * {@code T}.
- *
- * <p>The Avro schema may be provided explicitly via {@link AvroCoder#of(Class, Schema)} or
- * omitted via {@link AvroCoder#of(Class)}, in which case it will be inferred
- * using Avro's {@link org.apache.avro.reflect.ReflectData}.
- *
- * <p>For complete details about schema generation and how it can be controlled please see
- * the {@link org.apache.avro.reflect} package.
- * Only concrete classes with a no-argument constructor can be mapped to Avro records.
- * All inherited fields that are not static or transient are included. Fields are not permitted to
- * be null unless annotated by {@link Nullable} or a {@link Union} schema
- * containing {@code "null"}.
- *
- * <p>To use, specify the {@code Coder} type on a PCollection:
- * <pre>
- * {@code
- * PCollection<MyCustomElement> records =
- *     input.apply(...)
- *          .setCoder(AvroCoder.of(MyCustomElement.class);
- * }
- * </pre>
- *
- * <p>or annotate the element class using {@code @DefaultCoder}.
- * <pre><code>
- * {@literal @}DefaultCoder(AvroCoder.class)
- * public class MyCustomElement {
- *   ...
- * }
- * </code></pre>
- *
- * <p>The implementation attempts to determine if the Avro encoding of the given type will satisfy
- * the criteria of {@link Coder#verifyDeterministic} by inspecting both the type and the
- * Schema provided or generated by Avro. Only coders that are deterministic can be used in
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} operations.
- *
- * @param <T> the type of elements handled by this coder
- */
-public class AvroCoder<T> extends StandardCoder<T> {
-
-  /**
-   * Returns an {@code AvroCoder} instance for the provided element type.
-   * @param <T> the element type
-   */
-  public static <T> AvroCoder<T> of(TypeDescriptor<T> type) {
-    @SuppressWarnings("unchecked")
-    Class<T> clazz = (Class<T>) type.getRawType();
-    return of(clazz);
-  }
-
-  /**
-   * Returns an {@code AvroCoder} instance for the provided element class.
-   * @param <T> the element type
-   */
-  public static <T> AvroCoder<T> of(Class<T> clazz) {
-    return new AvroCoder<>(clazz, ReflectData.get().getSchema(clazz));
-  }
-
-  /**
-   * Returns an {@code AvroCoder} instance for the Avro schema. The implicit
-   * type is GenericRecord.
-   */
-  public static AvroCoder<GenericRecord> of(Schema schema) {
-    return new AvroCoder<>(GenericRecord.class, schema);
-  }
-
-  /**
-   * Returns an {@code AvroCoder} instance for the provided element type
-   * using the provided Avro schema.
-   *
-   * <p>If the type argument is GenericRecord, the schema may be arbitrary.
-   * Otherwise, the schema must correspond to the type provided.
-   *
-   * @param <T> the element type
-   */
-  public static <T> AvroCoder<T> of(Class<T> type, Schema schema) {
-    return new AvroCoder<>(type, schema);
-  }
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  @JsonCreator
-  public static AvroCoder<?> of(
-      @JsonProperty("type") String classType,
-      @JsonProperty("schema") String schema) throws ClassNotFoundException {
-    Schema.Parser parser = new Schema.Parser();
-    return new AvroCoder(Class.forName(classType), parser.parse(schema));
-  }
-
-  public static final CoderProvider PROVIDER = new CoderProvider() {
-    @Override
-    public <T> Coder<T> getCoder(TypeDescriptor<T> typeDescriptor) {
-      // This is a downcast from `? super T` to T. However, because
-      // it comes from a TypeDescriptor<T>, the class object itself
-      // is the same so the supertype in question shares the same
-      // generated AvroCoder schema.
-      @SuppressWarnings("unchecked")
-      Class<T> rawType = (Class<T>) typeDescriptor.getRawType();
-      return AvroCoder.of(rawType);
-    }
-  };
-
-  private final Class<T> type;
-  private final Schema schema;
-
-  private final List<String> nonDeterministicReasons;
-
-  // Factories allocated by .get() are thread-safe and immutable.
-  private static final EncoderFactory ENCODER_FACTORY = EncoderFactory.get();
-  private static final DecoderFactory DECODER_FACTORY = DecoderFactory.get();
-  // Cache the old encoder/decoder and let the factories reuse them when possible. To be threadsafe,
-  // these are ThreadLocal. This code does not need to be re-entrant as AvroCoder does not use
-  // an inner coder.
-  private final ThreadLocal<BinaryDecoder> decoder;
-  private final ThreadLocal<BinaryEncoder> encoder;
-  private final ThreadLocal<DatumWriter<T>> writer;
-  private final ThreadLocal<DatumReader<T>> reader;
-
-  protected AvroCoder(Class<T> type, Schema schema) {
-    this.type = type;
-    this.schema = schema;
-
-    nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema);
-
-    // Decoder and Encoder start off null for each thread. They are allocated and potentially
-    // reused inside encode/decode.
-    this.decoder = new ThreadLocal<>();
-    this.encoder = new ThreadLocal<>();
-
-    // Reader and writer are allocated once per thread and are "final" for thread-local Coder
-    // instance.
-    this.reader = new ThreadLocal<DatumReader<T>>() {
-      @Override
-      public DatumReader<T> initialValue() {
-        return createDatumReader();
-      }
-    };
-    this.writer = new ThreadLocal<DatumWriter<T>>() {
-      @Override
-      public DatumWriter<T> initialValue() {
-        return createDatumWriter();
-      }
-    };
-  }
-
-  /**
-   * The encoding identifier is designed to support evolution as per the design of Avro
-   * In order to use this class effectively, carefully read the Avro
-   * documentation at
-   * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution">Schema Resolution</a>
-   * to ensure that the old and new schema <i>match</i>.
-   *
-   * <p>In particular, this encoding identifier is guaranteed to be the same for {@code AvroCoder}
-   * instances of the same principal class, and otherwise distinct. The schema is not included
-   * in the identifier.
-   *
-   * <p>When modifying a class to be encoded as Avro, here are some guidelines; see the above link
-   * for greater detail.
-   *
-   * <ul>
-   * <li>Avoid changing field names.
-   * <li>Never remove a <code>required</code> field.
-   * <li>Only add <code>optional</code> fields, with sensible defaults.
-   * <li>When changing the type of a field, consult the Avro documentation to ensure the new and
-   * old types are interchangeable.
-   * </ul>
-   *
-   * <p>Code consuming this message class should be prepared to support <i>all</i> versions of
-   * the class until it is certain that no remaining serialized instances exist.
-   *
-   * <p>If backwards incompatible changes must be made, the best recourse is to change the name
-   * of your class.
-   */
-  @Override
-  public String getEncodingId() {
-    return type.getName();
-  }
-
-  /**
-   * Returns the type this coder encodes/decodes.
-   */
-  public Class<T> getType() {
-    return type;
-  }
-
-  private Object writeReplace() {
-    // When serialized by Java, instances of AvroCoder should be replaced by
-    // a SerializedAvroCoderProxy.
-    return new SerializedAvroCoderProxy<>(type, schema.toString());
-  }
-
-  @Override
-  public void encode(T value, OutputStream outStream, Context context) throws IOException {
-    // Get a BinaryEncoder instance from the ThreadLocal cache and attempt to reuse it.
-    BinaryEncoder encoderInstance = ENCODER_FACTORY.directBinaryEncoder(outStream, encoder.get());
-    // Save the potentially-new instance for reuse later.
-    encoder.set(encoderInstance);
-    writer.get().write(value, encoderInstance);
-    // Direct binary encoder does not buffer any data and need not be flushed.
-  }
-
-  @Override
-  public T decode(InputStream inStream, Context context) throws IOException {
-    // Get a BinaryDecoder instance from the ThreadLocal cache and attempt to reuse it.
-    BinaryDecoder decoderInstance = DECODER_FACTORY.directBinaryDecoder(inStream, decoder.get());
-    // Save the potentially-new instance for later.
-    decoder.set(decoderInstance);
-    return reader.get().read(null, decoderInstance);
-  }
-
-  @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-    return null;
-  }
-
-  @Override
-  public CloudObject asCloudObject() {
-    CloudObject result = super.asCloudObject();
-    addString(result, "type", type.getName());
-    addString(result, "schema", schema.toString());
-    return result;
-  }
-
-  /**
-   * @throws NonDeterministicException when the type may not be deterministically
-   * encoded using the given {@link Schema}, the {@code directBinaryEncoder}, and the
-   * {@link ReflectDatumWriter} or {@link GenericDatumWriter}.
-   */
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    if (!nonDeterministicReasons.isEmpty()) {
-      throw new NonDeterministicException(this, nonDeterministicReasons);
-    }
-  }
-
-  /**
-   * Returns a new {@link DatumReader} that can be used to read from an Avro file directly. Assumes
-   * the schema used to read is the same as the schema that was used when writing.
-   *
-   * @deprecated For {@code AvroCoder} internal use only.
-   */
-  // TODO: once we can remove this deprecated function, inline in constructor.
-  @Deprecated
-  public DatumReader<T> createDatumReader() {
-    if (type.equals(GenericRecord.class)) {
-      return new GenericDatumReader<>(schema);
-    } else {
-      return new ReflectDatumReader<>(schema);
-    }
-  }
-
-  /**
-   * Returns a new {@link DatumWriter} that can be used to write to an Avro file directly.
-   *
-   * @deprecated For {@code AvroCoder} internal use only.
-   */
-  // TODO: once we can remove this deprecated function, inline in constructor.
-  @Deprecated
-  public DatumWriter<T> createDatumWriter() {
-    if (type.equals(GenericRecord.class)) {
-      return new GenericDatumWriter<>(schema);
-    } else {
-      return new ReflectDatumWriter<>(schema);
-    }
-  }
-
-  /**
-   * Returns the schema used by this coder.
-   */
-  public Schema getSchema() {
-    return schema;
-  }
-
-  /**
-   * Proxy to use in place of serializing the {@link AvroCoder}. This allows the fields
-   * to remain final.
-   */
-  private static class SerializedAvroCoderProxy<T> implements Serializable {
-    private final Class<T> type;
-    private final String schemaStr;
-
-    public SerializedAvroCoderProxy(Class<T> type, String schemaStr) {
-      this.type = type;
-      this.schemaStr = schemaStr;
-    }
-
-    private Object readResolve() {
-      // When deserialized, instances of this object should be replaced by
-      // constructing an AvroCoder.
-      Schema.Parser parser = new Schema.Parser();
-      return new AvroCoder<T>(type, parser.parse(schemaStr));
-    }
-  }
-
-  /**
-   * Helper class encapsulating the various pieces of state maintained by the
-   * recursive walk used for checking if the encoding will be deterministic.
-   */
-  private static class AvroDeterminismChecker {
-
-    // Reasons that the original type are not deterministic. This accumulates
-    // the actual output.
-    private List<String> reasons = new ArrayList<>();
-
-    // Types that are currently "open". Used to make sure we don't have any
-    // recursive types. Note that we assume that all occurrences of a given type
-    // are equal, rather than tracking pairs of type + schema.
-    private Set<TypeDescriptor<?>> activeTypes = new HashSet<>();
-
-    // Similarly to how we record active types, we record the schemas we visit
-    // to make sure we don't encounter recursive fields.
-    private Set<Schema> activeSchemas = new HashSet<>();
-
-    /**
-     * Report an error in the current context.
-     */
-    private void reportError(String context, String fmt, Object... args) {
-      String message = String.format(fmt, args);
-      reasons.add(context + ": " + message);
-    }
-
-    /**
-     * Classes that are serialized by Avro as a String include
-     * <ul>
-     * <li>Subtypes of CharSequence (including String, Avro's mutable Utf8, etc.)
-     * <li>Several predefined classes (BigDecimal, BigInteger, URI, URL)
-     * <li>Classes annotated with @Stringable (uses their #toString() and a String constructor)
-     * </ul>
-     *
-     * <p>Rather than determine which of these cases are deterministic, we list some classes
-     * that definitely are, and treat any others as non-deterministic.
-     */
-    private static final Set<Class<?>> DETERMINISTIC_STRINGABLE_CLASSES = new HashSet<>();
-    static {
-      // CharSequences:
-      DETERMINISTIC_STRINGABLE_CLASSES.add(String.class);
-      DETERMINISTIC_STRINGABLE_CLASSES.add(Utf8.class);
-
-      // Explicitly Stringable:
-      DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigDecimal.class);
-      DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigInteger.class);
-      DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URI.class);
-      DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URL.class);
-
-      // Classes annotated with @Stringable:
-    }
-
-    /**
-     * Return true if the given type token is a subtype of *any* of the listed parents.
-     */
-    private static boolean isSubtypeOf(TypeDescriptor<?> type, Class<?>... parents) {
-      for (Class<?> parent : parents) {
-        if (type.isSubtypeOf(TypeDescriptor.of(parent))) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    protected AvroDeterminismChecker() {}
-
-    // The entry point for the check. Should not be recursively called.
-    public List<String> check(TypeDescriptor<?> type, Schema schema) {
-      recurse(type.getRawType().getName(), type, schema);
-      return reasons;
-    }
-
-    // This is the method that should be recursively called. It sets up the path
-    // and visited types correctly.
-    private void recurse(String context, TypeDescriptor<?> type, Schema schema) {
-      if (type.getRawType().isAnnotationPresent(AvroSchema.class)) {
-        reportError(context, "Custom schemas are not supported -- remove @AvroSchema.");
-        return;
-      }
-
-      if (!activeTypes.add(type)) {
-        reportError(context, "%s appears recursively", type);
-        return;
-      }
-
-      // If the the record isn't a true class, but rather a GenericRecord, SpecificRecord, etc.
-      // with a specified schema, then we need to make the decision based on the generated
-      // implementations.
-      if (isSubtypeOf(type, IndexedRecord.class)) {
-        checkIndexedRecord(context, schema, null);
-      } else {
-        doCheck(context, type, schema);
-      }
-
-      activeTypes.remove(type);
-    }
-
-    private void doCheck(String context, TypeDescriptor<?> type, Schema schema) {
-      switch (schema.getType()) {
-        case ARRAY:
-          checkArray(context, type, schema);
-          break;
-        case ENUM:
-          // Enums should be deterministic, since they depend only on the ordinal.
-          break;
-        case FIXED:
-          // Depending on the implementation of GenericFixed, we don't know how
-          // the given field will be encoded. So, we assume that it isn't
-          // deterministic.
-          reportError(context, "FIXED encodings are not guaranteed to be deterministic");
-          break;
-        case MAP:
-          checkMap(context, type, schema);
-          break;
-        case RECORD:
-          checkRecord(type, schema);
-          break;
-        case UNION:
-          checkUnion(context, type, schema);
-          break;
-        case STRING:
-          checkString(context, type);
-          break;
-        case BOOLEAN:
-        case BYTES:
-        case DOUBLE:
-        case INT:
-        case FLOAT:
-        case LONG:
-        case NULL:
-          // For types that Avro encodes using one of the above primitives, we assume they are
-          // deterministic.
-          break;
-        default:
-          // In any other case (eg., new types added to Avro) we cautiously return
-          // false.
-          reportError(context, "Unknown schema type %s may be non-deterministic", schema.getType());
-          break;
-      }
-    }
-
-    private void checkString(String context, TypeDescriptor<?> type) {
-      // For types that are encoded as strings, we need to make sure they're in an approved
-      // whitelist. For other types that are annotated @Stringable, Avro will just use the
-      // #toString() methods, which has no guarantees of determinism.
-      if (!DETERMINISTIC_STRINGABLE_CLASSES.contains(type.getRawType())) {
-        reportError(context, "%s may not have deterministic #toString()", type);
-      }
-    }
-
-   private static final Schema AVRO_NULL_SCHEMA = Schema.create(Schema.Type.NULL);
-
-   private void checkUnion(String context, TypeDescriptor<?> type, Schema schema) {
-      final List<Schema> unionTypes = schema.getTypes();
-
-      if (!type.getRawType().isAnnotationPresent(Union.class)) {
-        // First check for @Nullable field, which shows up as a union of field type and null.
-        if (unionTypes.size() == 2 && unionTypes.contains(AVRO_NULL_SCHEMA)) {
-          // Find the Schema that is not NULL and recursively check that it is deterministic.
-          Schema nullableFieldSchema = unionTypes.get(0).equals(AVRO_NULL_SCHEMA)
-              ? unionTypes.get(1) : unionTypes.get(0);
-          doCheck(context, type, nullableFieldSchema);
-          return;
-        }
-
-        // Otherwise report a schema error.
-        reportError(context, "Expected type %s to have @Union annotation", type);
-        return;
-      }
-
-      // Errors associated with this union will use the base class as their context.
-      String baseClassContext = type.getRawType().getName();
-
-      // For a union, we need to make sure that each possible instantiation is deterministic.
-      for (Schema concrete : unionTypes) {
-        @SuppressWarnings("unchecked")
-        TypeDescriptor<?> unionType = TypeDescriptor.of(ReflectData.get().getClass(concrete));
-
-        recurse(baseClassContext, unionType, concrete);
-      }
-    }
-
-    private void checkRecord(TypeDescriptor<?> type, Schema schema) {
-      // For a record, we want to make sure that all the fields are deterministic.
-      Class<?> clazz = type.getRawType();
-      for (org.apache.avro.Schema.Field fieldSchema : schema.getFields()) {
-        Field field = getField(clazz, fieldSchema.name());
-        String fieldContext = field.getDeclaringClass().getName() + "#" + field.getName();
-
-        if (field.isAnnotationPresent(AvroEncode.class)) {
-          reportError(fieldContext,
-              "Custom encoders may be non-deterministic -- remove @AvroEncode");
-          continue;
-        }
-
-        if (!IndexedRecord.class.isAssignableFrom(field.getType())
-            && field.isAnnotationPresent(AvroSchema.class)) {
-          // TODO: We should be able to support custom schemas on POJO fields, but we shouldn't
-          // need to, so we just allow it in the case of IndexedRecords.
-          reportError(fieldContext,
-              "Custom schemas are only supported for subtypes of IndexedRecord.");
-          continue;
-        }
-
-        TypeDescriptor<?> fieldType = type.resolveType(field.getGenericType());
-        recurse(fieldContext, fieldType, fieldSchema.schema());
-      }
-    }
-
-    private void checkIndexedRecord(String context, Schema schema,
-        @Nullable String specificClassStr) {
-
-      if (!activeSchemas.add(schema)) {
-        reportError(context, "%s appears recursively", schema.getName());
-        return;
-      }
-
-      switch (schema.getType()) {
-        case ARRAY:
-          // Generic Records use GenericData.Array to implement arrays, which is
-          // essentially an ArrayList, and therefore ordering is deterministic.
-          // The array is thus deterministic if the elements are deterministic.
-          checkIndexedRecord(context, schema.getElementType(), null);
-          break;
-        case ENUM:
-          // Enums are deterministic because they encode as a single integer.
-          break;
-        case FIXED:
-          // In the case of GenericRecords, FIXED is deterministic because it
-          // encodes/decodes as a Byte[].
-          break;
-        case MAP:
-          reportError(context,
-              "GenericRecord and SpecificRecords use a HashMap to represent MAPs,"
-              + " so it is non-deterministic");
-          break;
-        case RECORD:
-          for (org.apache.avro.Schema.Field field : schema.getFields()) {
-            checkIndexedRecord(
-                schema.getName() + "." + field.name(),
-                field.schema(),
-                field.getProp(SpecificData.CLASS_PROP));
-          }
-          break;
-        case STRING:
-          // GenericDatumWriter#findStringClass will use a CharSequence or a String
-          // for each string, so it is deterministic.
-
-          // SpecificCompiler#getStringType will use java.lang.String, org.apache.avro.util.Utf8,
-          // or java.lang.CharSequence, unless SpecificData.CLASS_PROP overrides that.
-          if (specificClassStr != null) {
-            Class<?> specificClass;
-            try {
-              specificClass = ClassUtils.forName(specificClassStr);
-              if (!DETERMINISTIC_STRINGABLE_CLASSES.contains(specificClass)) {
-                reportError(context, "Specific class %s is not known to be deterministic",
-                    specificClassStr);
-              }
-            } catch (ClassNotFoundException e) {
-              reportError(context, "Specific class %s is not known to be deterministic",
-                  specificClassStr);
-            }
-          }
-          break;
-        case UNION:
-          for (org.apache.avro.Schema subschema : schema.getTypes()) {
-            checkIndexedRecord(subschema.getName(), subschema, null);
-          }
-          break;
-        case BOOLEAN:
-        case BYTES:
-        case DOUBLE:
-        case INT:
-        case FLOAT:
-        case LONG:
-        case NULL:
-          // For types that Avro encodes using one of the above primitives, we assume they are
-          // deterministic.
-          break;
-        default:
-          reportError(context, "Unknown schema type %s may be non-deterministic", schema.getType());
-          break;
-      }
-
-      activeSchemas.remove(schema);
-    }
-
-    private void checkMap(String context, TypeDescriptor<?> type, Schema schema) {
-      if (!isSubtypeOf(type, SortedMap.class)) {
-        reportError(context, "%s may not be deterministically ordered", type);
-      }
-
-      // Avro (currently) asserts that all keys are strings.
-      // In case that changes, we double check that the key was a string:
-      Class<?> keyType = type.resolveType(Map.class.getTypeParameters()[0]).getRawType();
-      if (!String.class.equals(keyType)) {
-        reportError(context, "map keys should be Strings, but was %s", keyType);
-      }
-
-      recurse(context,
-          type.resolveType(Map.class.getTypeParameters()[1]),
-          schema.getValueType());
-    }
-
-    private void checkArray(String context, TypeDescriptor<?> type, Schema schema) {
-      TypeDescriptor<?> elementType = null;
-      if (type.isArray()) {
-        // The type is an array (with ordering)-> deterministic iff the element is deterministic.
-        elementType = type.getComponentType();
-      } else if (isSubtypeOf(type, Collection.class)) {
-        if (isSubtypeOf(type, List.class, SortedSet.class)) {
-          // Ordered collection -> deterministic iff the element is deterministic
-          elementType = type.resolveType(Collection.class.getTypeParameters()[0]);
-        } else {
-          // Not an ordered collection -> not deterministic
-          reportError(context, "%s may not be deterministically ordered", type);
-          return;
-        }
-      } else {
-        // If it was an unknown type encoded as an array, be conservative and assume
-        // that we don't know anything about the order.
-        reportError(context, "encoding %s as an ARRAY was unexpected");
-        return;
-      }
-
-      // If we get here, it's either a deterministically-ordered Collection, or
-      // an array. Either way, the type is deterministic iff the element type is
-      // deterministic.
-      recurse(context, elementType, schema.getElementType());
-    }
-
-    /**
-     * Extract a field from a class. We need to look at the declared fields so that we can
-     * see private fields. We may need to walk up to the parent to get classes from the parent.
-     */
-    private static Field getField(Class<?> clazz, String name) {
-      while (clazz != null) {
-        for (Field field : clazz.getDeclaredFields()) {
-          AvroName avroName = field.getAnnotation(AvroName.class);
-          if (avroName != null && name.equals(avroName.value())) {
-            return field;
-          } else if (avroName == null && name.equals(field.getName())) {
-            return field;
-          }
-        }
-        clazz = clazz.getSuperclass();
-      }
-
-      throw new IllegalArgumentException(
-          "Unable to get field " + name + " from class " + clazz);
-    }
-  }
-}

[09/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
deleted file mode 100644
index 2e2d1f6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReduceFnRunner.java
+++ /dev/null
@@ -1,843 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey.GroupByKeyOnly;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.Timing;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.OnTriggerCallbacks;
-import com.google.cloud.dataflow.sdk.util.ReduceFnContextFactory.StateStyle;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy.AccumulationMode;
-import com.google.cloud.dataflow.sdk.util.state.ReadableState;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.util.state.StateNamespaces.WindowNamespace;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * Manages the execution of a {@link ReduceFn} after a {@link GroupByKeyOnly} has partitioned the
- * {@link PCollection} by key.
- *
- * <p>The {@link #onTrigger} relies on a {@link TriggerRunner} to manage the execution of
- * the triggering logic. The {@code ReduceFnRunner}s responsibilities are:
- *
- * <ul>
- *   <li>Tracking the windows that are active (have buffered data) as elements arrive and
- *       triggers are fired.
- *   <li>Holding the watermark based on the timestamps of elements in a pane and releasing it
- *       when the trigger fires.
- *   <li>Calling the appropriate callbacks on {@link ReduceFn} based on trigger execution, timer
- *       firings, etc, and providing appropriate contexts to the {@link ReduceFn} for actions
- *       such as output.
- *   <li>Scheduling garbage collection of state associated with a specific window, and making that
- *       happen when the appropriate timer fires.
- * </ul>
- *
- * @param <K> The type of key being processed.
- * @param <InputT> The type of values associated with the key.
- * @param <OutputT> The output type that will be produced for each key.
- * @param <W> The type of windows this operates on.
- */
-public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
-
-  /**
-   * The {@link ReduceFnRunner} depends on most aspects of the {@link WindowingStrategy}.
-   *
-   * <ul>
-   *   <li>It runs the trigger from the {@link WindowingStrategy}.</li>
-   *   <li>It merges windows according to the {@link WindowingStrategy}.</li>
-   *   <li>It chooses how to track active windows and clear out expired windows
-   *       according to the {@link WindowingStrategy}, based on the allowed lateness and
-   *       whether windows can merge.</li>
-   *   <li>It decides whether to emit empty final panes according to whether the
-   *       {@link WindowingStrategy} requires it.<li>
-   *   <li>It uses discarding or accumulation mode according to the {@link WindowingStrategy}.</li>
-   * </ul>
-   */
-  private final WindowingStrategy<Object, W> windowingStrategy;
-
-  private final OutputWindowedValue<KV<K, OutputT>> outputter;
-
-  private final StateInternals<K> stateInternals;
-
-  private final Aggregator<Long, Long> droppedDueToClosedWindow;
-
-  private final K key;
-
-  private final OnMergeCallback onMergeCallback = new OnMergeCallback();
-
-  /**
-   * Track which windows are still active and which 'state address' windows contain state
-   * for a merged window.
-   *
-   * <ul>
-   * <li>State: Global map for all active windows for this computation and key.
-   * <li>Lifetime: Cleared when no active windows need to be tracked. A window lives within
-   * the active window set until its trigger is closed or the window is garbage collected.
-   * </ul>
-   */
-  private final ActiveWindowSet<W> activeWindows;
-
-  /**
-   * Always a {@link SystemReduceFn}.
-   *
-   * <ul>
-   * <li>State: A bag of accumulated values, or the intermediate result of a combiner.
-   * <li>State style: RENAMED
-   * <li>Merging: Concatenate or otherwise combine the state from each merged window.
-   * <li>Lifetime: Cleared when a pane fires if DISCARDING_FIRED_PANES. Otherwise cleared
-   * when trigger is finished or when the window is garbage collected.
-   * </ul>
-   */
-  private final ReduceFn<K, InputT, OutputT, W> reduceFn;
-
-  /**
-   * Manage the setting and firing of timer events.
-   *
-   * <ul>
-   * <li>Merging: End-of-window and garbage collection timers are cancelled when windows are
-   * merged away. Timers created by triggers are never garbage collected and are left to
-   * fire and be ignored.
-   * <li>Lifetime: Timers automatically disappear after they fire.
-   * </ul>
-   */
-  private final TimerInternals timerInternals;
-
-  /**
-   * Manage the execution and state for triggers.
-   *
-   * <ul>
-   * <li>State: Tracks which sub-triggers have finished, and any additional state needed to
-   * determine when the trigger should fire.
-   * <li>State style: DIRECT
-   * <li>Merging: Finished bits are explicitly managed. Other state is eagerly merged as
-   * needed.
-   * <li>Lifetime: Most trigger state is cleared when the final pane is emitted. However
-   * the finished bits are left behind and must be cleared when the window is
-   * garbage collected.
-   * </ul>
-   */
-  private final TriggerRunner<W> triggerRunner;
-
-  /**
-   * Store the output watermark holds for each window.
-   *
-   * <ul>
-   * <li>State: Bag of hold timestamps.
-   * <li>State style: RENAMED
-   * <li>Merging: Depending on {@link OutputTimeFn}, may need to be recalculated on merging.
-   * When a pane fires it may be necessary to add (back) an end-of-window or garbage collection
-   * hold.
-   * <li>Lifetime: Cleared when a pane fires or when the window is garbage collected.
-   * </ul>
-   */
-  private final WatermarkHold<W> watermarkHold;
-
-  private final ReduceFnContextFactory<K, InputT, OutputT, W> contextFactory;
-
-  /**
-   * Store the previously emitted pane (if any) for each window.
-   *
-   * <ul>
-   * <li>State: The previous {@link PaneInfo} passed to the user's {@link DoFn#processElement},
-   * if any.
-   * <li>Style style: DIRECT
-   * <li>Merging: Always keyed by actual window, so does not depend on {@link #activeWindows}.
-   * Cleared when window is merged away.
-   * <li>Lifetime: Cleared when trigger is closed or window is garbage collected.
-   * </ul>
-   */
-  private final PaneInfoTracker paneInfoTracker;
-
-  /**
-   * Store whether we've seen any elements for a window since the last pane was emitted.
-   *
-   * <ul>
-   * <li>State: Unless DISCARDING_FIRED_PANES, a count of number of elements added so far.
-   * <li>State style: RENAMED.
-   * <li>Merging: Counts are summed when windows are merged.
-   * <li>Lifetime: Cleared when pane fires or window is garbage collected.
-   * </ul>
-   */
-  private final NonEmptyPanes<K, W> nonEmptyPanes;
-
-  public ReduceFnRunner(
-      K key,
-      WindowingStrategy<?, W> windowingStrategy,
-      StateInternals<K> stateInternals,
-      TimerInternals timerInternals,
-      WindowingInternals<?, KV<K, OutputT>> windowingInternals,
-      Aggregator<Long, Long> droppedDueToClosedWindow,
-      ReduceFn<K, InputT, OutputT, W> reduceFn,
-      PipelineOptions options) {
-    this.key = key;
-    this.timerInternals = timerInternals;
-    this.paneInfoTracker = new PaneInfoTracker(timerInternals);
-    this.stateInternals = stateInternals;
-    this.outputter = new OutputViaWindowingInternals<>(windowingInternals);
-    this.droppedDueToClosedWindow = droppedDueToClosedWindow;
-    this.reduceFn = reduceFn;
-
-    @SuppressWarnings("unchecked")
-    WindowingStrategy<Object, W> objectWindowingStrategy =
-        (WindowingStrategy<Object, W>) windowingStrategy;
-    this.windowingStrategy = objectWindowingStrategy;
-
-    this.nonEmptyPanes = NonEmptyPanes.create(this.windowingStrategy, this.reduceFn);
-
-    // Note this may incur I/O to load persisted window set data.
-    this.activeWindows = createActiveWindowSet();
-
-    this.contextFactory =
-        new ReduceFnContextFactory<K, InputT, OutputT, W>(key, reduceFn, this.windowingStrategy,
-            stateInternals, this.activeWindows, timerInternals, windowingInternals, options);
-
-    this.watermarkHold = new WatermarkHold<>(timerInternals, windowingStrategy);
-    this.triggerRunner =
-        new TriggerRunner<>(
-            windowingStrategy.getTrigger(),
-            new TriggerContextFactory<>(windowingStrategy, stateInternals, activeWindows));
-  }
-
-  private ActiveWindowSet<W> createActiveWindowSet() {
-    return windowingStrategy.getWindowFn().isNonMerging()
-        ? new NonMergingActiveWindowSet<W>()
-        : new MergingActiveWindowSet<W>(windowingStrategy.getWindowFn(), stateInternals);
-  }
-
-  @VisibleForTesting
-  boolean isFinished(W window) {
-    return triggerRunner.isClosed(contextFactory.base(window, StateStyle.DIRECT).state());
-  }
-
-  /**
-   * Incorporate {@code values} into the underlying reduce function, and manage holds, timers,
-   * triggers, and window merging.
-   *
-   * <p>The general strategy is:
-   * <ol>
-   *   <li>Use {@link WindowedValue#getWindows} (itself determined using
-   *       {@link WindowFn#assignWindows}) to determine which windows each element belongs to. Some
-   *       of those windows will already have state associated with them. The rest are considered
-   *       NEW.
-   *   <li>Use {@link WindowFn#mergeWindows} to attempt to merge currently ACTIVE and NEW windows.
-   *       Each NEW window will become either ACTIVE, MERGED, or EPHEMERAL. (See {@link
-   *       ActiveWindowSet} for definitions of these terms.)
-   *   <li>If at all possible, eagerly substitute EPHEMERAL windows with their ACTIVE state address
-   *       windows before any state is associated with the EPHEMERAL window. In the common case that
-   *       windows for new elements are merged into existing ACTIVE windows then no additional
-   *       storage or merging overhead will be incurred.
-   *   <li>Otherwise, keep track of the state address windows for ACTIVE windows so that their
-   *       states can be merged on-demand when a pane fires.
-   *   <li>Process the element for each of the windows it's windows have been merged into according
-   *       to {@link ActiveWindowSet}. Processing may require running triggers, setting timers,
-   *       setting holds, and invoking {@link ReduceFn#onTrigger}.
-   * </ol>
-   */
-  public void processElements(Iterable<WindowedValue<InputT>> values) throws Exception {
-    // If an incoming element introduces a new window, attempt to merge it into an existing
-    // window eagerly. The outcome is stored in the ActiveWindowSet.
-    collectAndMergeWindows(values);
-
-    Set<W> windowsToConsider = new HashSet<>();
-
-    // Process each element, using the updated activeWindows determined by collectAndMergeWindows.
-    for (WindowedValue<InputT> value : values) {
-      windowsToConsider.addAll(processElement(value));
-    }
-
-    // Trigger output from any window for which the trigger is ready
-    for (W mergedWindow : windowsToConsider) {
-      ReduceFn<K, InputT, OutputT, W>.Context directContext =
-          contextFactory.base(mergedWindow, StateStyle.DIRECT);
-      ReduceFn<K, InputT, OutputT, W>.Context renamedContext =
-          contextFactory.base(mergedWindow, StateStyle.RENAMED);
-      triggerRunner.prefetchShouldFire(mergedWindow, directContext.state());
-      emitIfAppropriate(directContext, renamedContext);
-    }
-
-    // We're all done with merging and emitting elements so can compress the activeWindow state.
-    activeWindows.removeEphemeralWindows();
-  }
-
-  public void persist() {
-    activeWindows.persist();
-  }
-
-  /**
-   * Extract the windows associated with the values, and invoke merge.
-   */
-  private void collectAndMergeWindows(Iterable<WindowedValue<InputT>> values) throws Exception {
-    // No-op if no merging can take place
-    if (windowingStrategy.getWindowFn().isNonMerging()) {
-      return;
-    }
-
-    // Collect the windows from all elements (except those which are too late) and
-    // make sure they are already in the active window set or are added as NEW windows.
-    for (WindowedValue<?> value : values) {
-      for (BoundedWindow untypedWindow : value.getWindows()) {
-        @SuppressWarnings("unchecked")
-        W window = (W) untypedWindow;
-
-        ReduceFn<K, InputT, OutputT, W>.Context directContext =
-            contextFactory.base(window, StateStyle.DIRECT);
-        if (triggerRunner.isClosed(directContext.state())) {
-          // This window has already been closed.
-          // We will update the counter for this in the corresponding processElement call.
-          continue;
-        }
-
-        if (activeWindows.isActive(window)) {
-          Set<W> stateAddressWindows = activeWindows.readStateAddresses(window);
-          if (stateAddressWindows.size() > 1) {
-            // This is a legacy window who's state has not been eagerly merged.
-            // Do that now.
-            ReduceFn<K, InputT, OutputT, W>.OnMergeContext premergeContext =
-                contextFactory.forPremerge(window);
-            reduceFn.onMerge(premergeContext);
-            watermarkHold.onMerge(premergeContext);
-            activeWindows.merged(window);
-          }
-        }
-
-        // Add this window as NEW if we've not yet seen it.
-        activeWindows.addNew(window);
-      }
-    }
-
-    // Merge all of the active windows and retain a mapping from source windows to result windows.
-    mergeActiveWindows();
-  }
-
-  private class OnMergeCallback implements ActiveWindowSet.MergeCallback<W> {
-    /**
-     * Called from the active window set to indicate {@code toBeMerged} (of which only
-     * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) will later
-     * be merged into {@code mergeResult}.
-     */
-    @Override
-    public void prefetchOnMerge(
-        Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult) throws Exception {
-      ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
-          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
-      ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
-          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
-
-      // Prefetch various state.
-      triggerRunner.prefetchForMerge(mergeResult, activeToBeMerged, directMergeContext.state());
-      reduceFn.prefetchOnMerge(renamedMergeContext.state());
-      watermarkHold.prefetchOnMerge(renamedMergeContext.state());
-      nonEmptyPanes.prefetchOnMerge(renamedMergeContext.state());
-    }
-
-    /**
-     * Called from the active window set to indicate {@code toBeMerged} (of which only
-     * {@code activeToBeMerged} are ACTIVE and thus have state associated with them) are about
-     * to be merged into {@code mergeResult}.
-     */
-    @Override
-    public void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
-        throws Exception {
-      // At this point activeWindows has NOT incorporated the results of the merge.
-      ReduceFn<K, InputT, OutputT, W>.OnMergeContext directMergeContext =
-          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.DIRECT);
-      ReduceFn<K, InputT, OutputT, W>.OnMergeContext renamedMergeContext =
-          contextFactory.forMerge(activeToBeMerged, mergeResult, StateStyle.RENAMED);
-
-      // Run the reduceFn to perform any needed merging.
-      reduceFn.onMerge(renamedMergeContext);
-
-      // Merge the watermark holds.
-      watermarkHold.onMerge(renamedMergeContext);
-
-      // Merge non-empty pane state.
-      nonEmptyPanes.onMerge(renamedMergeContext.state());
-
-      // Have the trigger merge state as needed
-      triggerRunner.onMerge(
-          directMergeContext.window(), directMergeContext.timers(), directMergeContext.state());
-
-      for (W active : activeToBeMerged) {
-        if (active.equals(mergeResult)) {
-          // Not merged away.
-          continue;
-        }
-        // Cleanup flavor A: Currently ACTIVE window is about to become MERGED.
-        // Clear any state not already cleared by the onMerge calls above.
-        WindowTracing.debug("ReduceFnRunner.onMerge: Merging {} into {}", active, mergeResult);
-        ReduceFn<K, InputT, OutputT, W>.Context directClearContext =
-            contextFactory.base(active, StateStyle.DIRECT);
-        // No need for the end-of-window or garbage collection timers.
-        // We will establish a new end-of-window or garbage collection timer for the mergeResult
-        // window in processElement below. There must be at least one element for the mergeResult
-        // window since a new element with a new window must have triggered this onMerge.
-        cancelEndOfWindowAndGarbageCollectionTimers(directClearContext);
-        // We no longer care about any previous panes of merged away windows. The
-        // merge result window gets to start fresh if it is new.
-        paneInfoTracker.clear(directClearContext.state());
-      }
-    }
-  }
-
-  private void mergeActiveWindows() throws Exception {
-    activeWindows.merge(onMergeCallback);
-  }
-
-  /**
-   * Process an element.
-   * @param value the value being processed
-   *
-   * @return the set of windows in which the element was actually processed
-   */
-  private Collection<W> processElement(WindowedValue<InputT> value) throws Exception {
-    // Redirect element windows to the ACTIVE windows they have been merged into.
-    // The compressed representation (value, {window1, window2, ...}) actually represents
-    // distinct elements (value, window1), (value, window2), ...
-    // so if window1 and window2 merge, the resulting window will contain both copies
-    // of the value.
-    Collection<W> windows = new ArrayList<>();
-    for (BoundedWindow untypedWindow : value.getWindows()) {
-      @SuppressWarnings("unchecked")
-      W window = (W) untypedWindow;
-      W active = activeWindows.representative(window);
-      Preconditions.checkState(active != null, "Window %s should have been added", window);
-      windows.add(active);
-    }
-
-    // Prefetch in each of the windows if we're going to need to process triggers
-    for (W window : windows) {
-      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext directContext = contextFactory.forValue(
-          window, value.getValue(), value.getTimestamp(), StateStyle.DIRECT);
-      triggerRunner.prefetchForValue(window, directContext.state());
-    }
-
-    // Process the element for each (representative) window it belongs to.
-    for (W window : windows) {
-      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext directContext = contextFactory.forValue(
-          window, value.getValue(), value.getTimestamp(), StateStyle.DIRECT);
-      ReduceFn<K, InputT, OutputT, W>.ProcessValueContext renamedContext = contextFactory.forValue(
-          window, value.getValue(), value.getTimestamp(), StateStyle.RENAMED);
-
-      // Check to see if the triggerRunner thinks the window is closed. If so, drop that window.
-      if (triggerRunner.isClosed(directContext.state())) {
-        droppedDueToClosedWindow.addValue(1L);
-        WindowTracing.debug(
-            "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} "
-            + "since window is no longer active at inputWatermark:{}; outputWatermark:{}",
-            value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(),
-            timerInternals.currentOutputWatermarkTime());
-        continue;
-      }
-
-      nonEmptyPanes.recordContent(renamedContext.state());
-
-      // Make sure we've scheduled the end-of-window or garbage collection timer for this window.
-      Instant timer = scheduleEndOfWindowOrGarbageCollectionTimer(directContext);
-
-      // Hold back progress of the output watermark until we have processed the pane this
-      // element will be included within. If the element is too late for that, place a hold at
-      // the end-of-window or garbage collection time to allow empty panes to contribute elements
-      // which won't be dropped due to lateness by a following computation (assuming the following
-      // computation uses the same allowed lateness value...)
-      @Nullable Instant hold = watermarkHold.addHolds(renamedContext);
-
-      if (hold != null) {
-        // Assert that holds have a proximate timer.
-        boolean holdInWindow = !hold.isAfter(window.maxTimestamp());
-        boolean timerInWindow = !timer.isAfter(window.maxTimestamp());
-        Preconditions.checkState(
-            holdInWindow == timerInWindow,
-            "set a hold at %s, a timer at %s, which disagree as to whether they are in window %s",
-            hold,
-            timer,
-            directContext.window());
-      }
-
-      // Execute the reduceFn, which will buffer the value as appropriate
-      reduceFn.processValue(renamedContext);
-
-      // Run the trigger to update its state
-      triggerRunner.processValue(
-          directContext.window(),
-          directContext.timestamp(),
-          directContext.timers(),
-          directContext.state());
-    }
-
-    return windows;
-  }
-
-  /**
-   * Called when an end-of-window, garbage collection, or trigger-specific timer fires.
-   */
-  public void onTimer(TimerData timer) throws Exception {
-    // Which window is the timer for?
-    Preconditions.checkArgument(timer.getNamespace() instanceof WindowNamespace,
-        "Expected timer to be in WindowNamespace, but was in %s", timer.getNamespace());
-    @SuppressWarnings("unchecked")
-    WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
-    W window = windowNamespace.getWindow();
-    ReduceFn<K, InputT, OutputT, W>.Context directContext =
-        contextFactory.base(window, StateStyle.DIRECT);
-    ReduceFn<K, InputT, OutputT, W>.Context renamedContext =
-        contextFactory.base(window, StateStyle.RENAMED);
-
-    // Has this window had its trigger finish?
-    // - The trigger may implement isClosed as constant false.
-    // - If the window function does not support windowing then all windows will be considered
-    // active.
-    // So we must take conjunction of activeWindows and triggerRunner state.
-    boolean windowIsActive =
-        activeWindows.isActive(window) && !triggerRunner.isClosed(directContext.state());
-
-    if (!windowIsActive) {
-      WindowTracing.debug(
-          "ReduceFnRunner.onTimer: Note that timer {} is for non-ACTIVE window {}", timer, window);
-    }
-
-    // If this is a garbage collection timer then we should trigger and garbage collect the window.
-    Instant cleanupTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
-    boolean isGarbageCollection =
-        TimeDomain.EVENT_TIME == timer.getDomain() && timer.getTimestamp().equals(cleanupTime);
-
-    if (isGarbageCollection) {
-      WindowTracing.debug(
-          "ReduceFnRunner.onTimer: Cleaning up for key:{}; window:{} at {} with "
-          + "inputWatermark:{}; outputWatermark:{}",
-          key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
-          timerInternals.currentOutputWatermarkTime());
-
-      if (windowIsActive) {
-        // We need to call onTrigger to emit the final pane if required.
-        // The final pane *may* be ON_TIME if no prior ON_TIME pane has been emitted,
-        // and the watermark has passed the end of the window.
-        onTrigger(directContext, renamedContext, true/* isFinished */);
-      }
-
-      // Cleanup flavor B: Clear all the remaining state for this window since we'll never
-      // see elements for it again.
-      clearAllState(directContext, renamedContext, windowIsActive);
-    } else {
-      WindowTracing.debug(
-          "ReduceFnRunner.onTimer: Triggering for key:{}; window:{} at {} with "
-          + "inputWatermark:{}; outputWatermark:{}",
-          key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(),
-          timerInternals.currentOutputWatermarkTime());
-      if (windowIsActive) {
-        emitIfAppropriate(directContext, renamedContext);
-      }
-
-      // If this is an end-of-window timer then, we need to set a GC timer
-      boolean isEndOfWindow = TimeDomain.EVENT_TIME == timer.getDomain()
-          && timer.getTimestamp().equals(window.maxTimestamp());
-      if (isEndOfWindow) {
-        // Since we are processing an on-time firing we should schedule the garbage collection
-        // timer. (If getAllowedLateness is zero then the timer event will be considered a
-        // cleanup event and handled by the above).
-        // Note we must do this even if the trigger is finished so that we are sure to cleanup
-        // any final trigger tombstones.
-        Preconditions.checkState(
-            windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO),
-            "Unexpected zero getAllowedLateness");
-        WindowTracing.debug(
-            "ReduceFnRunner.onTimer: Scheduling cleanup timer for key:{}; window:{} at {} with "
-            + "inputWatermark:{}; outputWatermark:{}",
-            key, directContext.window(), cleanupTime, timerInternals.currentInputWatermarkTime(),
-            timerInternals.currentOutputWatermarkTime());
-        directContext.timers().setTimer(cleanupTime, TimeDomain.EVENT_TIME);
-      }
-    }
-  }
-
-  /**
-   * Clear all the state associated with {@code context}'s window.
-   * Should only be invoked if we know all future elements for this window will be considered
-   * beyond allowed lateness.
-   * This is a superset of the clearing done by {@link #emitIfAppropriate} below since:
-   * <ol>
-   * <li>We can clear the trigger state tombstone since we'll never need to ask about it again.
-   * <li>We can clear any remaining garbage collection hold.
-   * </ol>
-   */
-  private void clearAllState(
-      ReduceFn<K, InputT, OutputT, W>.Context directContext,
-      ReduceFn<K, InputT, OutputT, W>.Context renamedContext,
-      boolean windowIsActive)
-          throws Exception {
-    if (windowIsActive) {
-      // Since both the window is in the active window set AND the trigger was not yet closed,
-      // it is possible we still have state.
-      reduceFn.clearState(renamedContext);
-      watermarkHold.clearHolds(renamedContext);
-      nonEmptyPanes.clearPane(renamedContext.state());
-      triggerRunner.clearState(
-          directContext.window(), directContext.timers(), directContext.state());
-    } else {
-      // Needed only for backwards compatibility over UPDATE.
-      // Clear any end-of-window or garbage collection holds keyed by the current window.
-      // Only needed if:
-      // - We have merging windows.
-      // - We are DISCARDING_FIRED_PANES.
-      // - A pane has fired.
-      // - But the trigger is not (yet) closed.
-      if (windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES
-          && !windowingStrategy.getWindowFn().isNonMerging()) {
-        watermarkHold.clearHolds(directContext);
-      }
-    }
-    paneInfoTracker.clear(directContext.state());
-    if (activeWindows.isActive(directContext.window())) {
-      // Don't need to track address state windows anymore.
-      activeWindows.remove(directContext.window());
-    }
-    // We'll never need to test for the trigger being closed again.
-    triggerRunner.clearFinished(directContext.state());
-  }
-
-  /** Should the reduce function state be cleared? */
-  private boolean shouldDiscardAfterFiring(boolean isFinished) {
-    if (isFinished) {
-      // This is the last firing for trigger.
-      return true;
-    }
-    if (windowingStrategy.getMode() == AccumulationMode.DISCARDING_FIRED_PANES) {
-      // Nothing should be accumulated between panes.
-      return true;
-    }
-    return false;
-  }
-
-  /**
-   * Possibly emit a pane if a trigger is ready to fire or timers require it, and cleanup state.
-   */
-  private void emitIfAppropriate(ReduceFn<K, InputT, OutputT, W>.Context directContext,
-      ReduceFn<K, InputT, OutputT, W>.Context renamedContext)
-      throws Exception {
-    if (!triggerRunner.shouldFire(
-        directContext.window(), directContext.timers(), directContext.state())) {
-      // Ignore unless trigger is ready to fire
-      return;
-    }
-
-    // Inform the trigger of the transition to see if it is finished
-    triggerRunner.onFire(directContext.window(), directContext.timers(), directContext.state());
-    boolean isFinished = triggerRunner.isClosed(directContext.state());
-
-    // Will be able to clear all element state after triggering?
-    boolean shouldDiscard = shouldDiscardAfterFiring(isFinished);
-
-    // Run onTrigger to produce the actual pane contents.
-    // As a side effect it will clear all element holds, but not necessarily any
-    // end-of-window or garbage collection holds.
-    onTrigger(directContext, renamedContext, isFinished);
-
-    // Now that we've triggered, the pane is empty.
-    nonEmptyPanes.clearPane(renamedContext.state());
-
-    // Cleanup buffered data if appropriate
-    if (shouldDiscard) {
-      // Cleanup flavor C: The user does not want any buffered data to persist between panes.
-      reduceFn.clearState(renamedContext);
-    }
-
-    if (isFinished) {
-      // Cleanup flavor D: If trigger is closed we will ignore all new incoming elements.
-      // Clear state not otherwise cleared by onTrigger and clearPane above.
-      // Remember the trigger is, indeed, closed until the window is garbage collected.
-      triggerRunner.clearState(
-          directContext.window(), directContext.timers(), directContext.state());
-      paneInfoTracker.clear(directContext.state());
-      activeWindows.remove(directContext.window());
-    }
-  }
-
-  /**
-   * Do we need to emit a pane?
-   */
-  private boolean needToEmit(boolean isEmpty, boolean isFinished, PaneInfo.Timing timing) {
-    if (!isEmpty) {
-      // The pane has elements.
-      return true;
-    }
-    if (timing == Timing.ON_TIME) {
-      // This is the unique ON_TIME pane.
-      return true;
-    }
-    if (isFinished && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) {
-      // This is known to be the final pane, and the user has requested it even when empty.
-      return true;
-    }
-    return false;
-  }
-
-  /**
-   * Run the {@link ReduceFn#onTrigger} method and produce any necessary output.
-   */
-  private void onTrigger(
-      final ReduceFn<K, InputT, OutputT, W>.Context directContext,
-      ReduceFn<K, InputT, OutputT, W>.Context renamedContext,
-      boolean isFinished)
-          throws Exception {
-    // Prefetch necessary states
-    ReadableState<Instant> outputTimestampFuture =
-        watermarkHold.extractAndRelease(renamedContext, isFinished).readLater();
-    ReadableState<PaneInfo> paneFuture =
-        paneInfoTracker.getNextPaneInfo(directContext, isFinished).readLater();
-    ReadableState<Boolean> isEmptyFuture =
-        nonEmptyPanes.isEmpty(renamedContext.state()).readLater();
-
-    reduceFn.prefetchOnTrigger(directContext.state());
-    triggerRunner.prefetchOnFire(directContext.window(), directContext.state());
-
-    // Calculate the pane info.
-    final PaneInfo pane = paneFuture.read();
-    // Extract the window hold, and as a side effect clear it.
-    final Instant outputTimestamp = outputTimestampFuture.read();
-
-    // Only emit a pane if it has data or empty panes are observable.
-    if (needToEmit(isEmptyFuture.read(), isFinished, pane.getTiming())) {
-      // Run reduceFn.onTrigger method.
-      final List<W> windows = Collections.singletonList(directContext.window());
-      ReduceFn<K, InputT, OutputT, W>.OnTriggerContext renamedTriggerContext =
-          contextFactory.forTrigger(directContext.window(), paneFuture, StateStyle.RENAMED,
-              new OnTriggerCallbacks<OutputT>() {
-                @Override
-                public void output(OutputT toOutput) {
-                  // We're going to output panes, so commit the (now used) PaneInfo.
-                  // TODO: This is unnecessary if the trigger isFinished since the saved
-                  // state will be immediately deleted.
-                  paneInfoTracker.storeCurrentPaneInfo(directContext, pane);
-
-                  // Output the actual value.
-                  outputter.outputWindowedValue(
-                      KV.of(key, toOutput), outputTimestamp, windows, pane);
-                }
-              });
-
-      reduceFn.onTrigger(renamedTriggerContext);
-    }
-  }
-
-  /**
-   * Make sure we'll eventually have a timer fire which will tell us to garbage collect
-   * the window state. For efficiency we may need to do this in two steps rather
-   * than one. Return the time at which the timer will fire.
-   *
-   * <ul>
-   * <li>If allowedLateness is zero then we'll garbage collect at the end of the window.
-   * For simplicity we'll set our own timer for this situation even though an
-   * {@link AfterWatermark} trigger may have also set an end-of-window timer.
-   * ({@code setTimer} is idempotent.)
-   * <li>If allowedLateness is non-zero then we could just always set a timer for the garbage
-   * collection time. However if the windows are large (eg hourly) and the allowedLateness is small
-   * (eg seconds) then we'll end up with nearly twice the number of timers in-flight. So we
-   * instead set an end-of-window timer and then roll that forward to a garbage collection timer
-   * when it fires. We use the input watermark to distinguish those cases.
-   * </ul>
-   */
-  private Instant scheduleEndOfWindowOrGarbageCollectionTimer(
-      ReduceFn<?, ?, ?, W>.Context directContext) {
-    Instant inputWM = timerInternals.currentInputWatermarkTime();
-    Instant endOfWindow = directContext.window().maxTimestamp();
-    Instant fireTime;
-    String which;
-    if (inputWM != null && endOfWindow.isBefore(inputWM)) {
-      fireTime = endOfWindow.plus(windowingStrategy.getAllowedLateness());
-      which = "garbage collection";
-    } else {
-      fireTime = endOfWindow;
-      which = "end-of-window";
-    }
-    WindowTracing.trace(
-        "ReduceFnRunner.scheduleEndOfWindowOrGarbageCollectionTimer: Scheduling {} timer at {} for "
-            + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
-        which,
-        fireTime,
-        key,
-        directContext.window(),
-        inputWM,
-        timerInternals.currentOutputWatermarkTime());
-    directContext.timers().setTimer(fireTime, TimeDomain.EVENT_TIME);
-    return fireTime;
-  }
-
-  private void cancelEndOfWindowAndGarbageCollectionTimers(ReduceFn<?, ?, ?, W>.Context context) {
-    WindowTracing.debug(
-        "ReduceFnRunner.cancelEndOfWindowAndGarbageCollectionTimers: Deleting timers for "
-        + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}",
-        key, context.window(), timerInternals.currentInputWatermarkTime(),
-        timerInternals.currentOutputWatermarkTime());
-    Instant timer = context.window().maxTimestamp();
-    context.timers().deleteTimer(timer, TimeDomain.EVENT_TIME);
-    if (windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO)) {
-      timer = timer.plus(windowingStrategy.getAllowedLateness());
-      context.timers().deleteTimer(timer, TimeDomain.EVENT_TIME);
-    }
-  }
-
-  /**
-   * An object that can output a value with all of its windowing information. This is a deliberately
-   * restricted subinterface of {@link WindowingInternals} to express how it is used here.
-   */
-  private interface OutputWindowedValue<OutputT> {
-    void outputWindowedValue(OutputT output, Instant timestamp,
-        Collection<? extends BoundedWindow> windows, PaneInfo pane);
-  }
-
-  private static class OutputViaWindowingInternals<OutputT>
-      implements OutputWindowedValue<OutputT> {
-
-    private final WindowingInternals<?, OutputT> windowingInternals;
-
-    public OutputViaWindowingInternals(WindowingInternals<?, OutputT> windowingInternals) {
-      this.windowingInternals = windowingInternals;
-    }
-
-    @Override
-    public void outputWindowedValue(
-        OutputT output,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows,
-        PaneInfo pane) {
-      windowingInternals.outputWindowedValue(output, timestamp, windows, pane);
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
deleted file mode 100644
index 88a1c15..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReifyTimestampAndWindowsDoFn.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-/**
- * DoFn that makes timestamps and window assignments explicit in the value part of each key/value
- * pair.
- *
- * @param <K> the type of the keys of the input and output {@code PCollection}s
- * @param <V> the type of the values of the input {@code PCollection}
- */
-@SystemDoFnInternal
-public class ReifyTimestampAndWindowsDoFn<K, V>
-    extends DoFn<KV<K, V>, KV<K, WindowedValue<V>>> {
-  @Override
-  public void processElement(ProcessContext c)
-      throws Exception {
-    KV<K, V> kv = c.element();
-    K key = kv.getKey();
-    V value = kv.getValue();
-    c.output(KV.of(
-        key,
-        WindowedValue.of(
-            value,
-            c.timestamp(),
-            c.windowingInternals().windows(),
-            c.pane())));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
deleted file mode 100644
index 367db2d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Reshuffle.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.NonMergingWindowFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.Collection;
-
-/**
- * A {@link PTransform} that returns a {@link PCollection} equivalent to its input but operationally
- * provides some of the side effects of a {@link GroupByKey}, in particular preventing fusion of
- * the surrounding transforms, checkpointing and deduplication by id (see
- * {@link ValueWithRecordId}).
- *
- * <p>Performs a {@link GroupByKey} so that the data is key-partitioned. Configures the
- * {@link WindowingStrategy} so that no data is dropped, but doesn't affect the need for
- * the user to specify allowed lateness and accumulation mode before a user-inserted GroupByKey.
- *
- * @param <K> The type of key being reshuffled on.
- * @param <V> The type of value being reshuffled.
- */
-public class Reshuffle<K, V> extends PTransform<PCollection<KV<K, V>>, PCollection<KV<K, V>>> {
-
-  private Reshuffle() {
-  }
-
-  public static <K, V> Reshuffle<K, V> of() {
-    return new Reshuffle<K, V>();
-  }
-
-  @Override
-  public PCollection<KV<K, V>> apply(PCollection<KV<K, V>> input) {
-    WindowingStrategy<?, ?> originalStrategy = input.getWindowingStrategy();
-    // If the input has already had its windows merged, then the GBK that performed the merge
-    // will have set originalStrategy.getWindowFn() to InvalidWindows, causing the GBK contained
-    // here to fail. Instead, we install a valid WindowFn that leaves all windows unchanged.
-    Window.Bound<KV<K, V>> rewindow = Window
-        .<KV<K, V>>into(new PassThroughWindowFn<>(originalStrategy.getWindowFn()))
-        .triggering(new ReshuffleTrigger<>())
-        .discardingFiredPanes()
-        .withAllowedLateness(Duration.millis(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
-
-    return input.apply(rewindow)
-        .apply(GroupByKey.<K, V>create())
-        // Set the windowing strategy directly, so that it doesn't get counted as the user having
-        // set allowed lateness.
-        .setWindowingStrategyInternal(originalStrategy)
-        .apply(ParDo.named("ExpandIterable").of(
-            new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                K key = c.element().getKey();
-                for (V value : c.element().getValue()) {
-                  c.output(KV.of(key, value));
-                }
-              }
-            }));
-  }
-
-  /**
-   * A {@link WindowFn} that leaves all associations between elements and windows unchanged.
-   *
-   * <p>In order to implement all the abstract methods of {@link WindowFn}, this requires the
-   * prior {@link WindowFn}, to which all auxiliary functionality is delegated.
-   */
-  private static class PassThroughWindowFn<T> extends NonMergingWindowFn<T, BoundedWindow> {
-
-    /** The WindowFn prior to this. Used for its windowCoder, etc. */
-    private final WindowFn<?, BoundedWindow> priorWindowFn;
-
-    public PassThroughWindowFn(WindowFn<?, ?> priorWindowFn) {
-      // Safe because it is only used privately here.
-      // At every point where a window is returned or accepted, it has been provided
-      // by priorWindowFn, so it is of the type expected.
-      @SuppressWarnings("unchecked")
-      WindowFn<?, BoundedWindow> internalWindowFn = (WindowFn<?, BoundedWindow>) priorWindowFn;
-      this.priorWindowFn = internalWindowFn;
-    }
-
-    @Override
-    public Collection<BoundedWindow> assignWindows(WindowFn<T, BoundedWindow>.AssignContext c)
-        throws Exception {
-      // The windows are provided by priorWindowFn, which also provides the coder for them
-      @SuppressWarnings("unchecked")
-      Collection<BoundedWindow> priorWindows = (Collection<BoundedWindow>) c.windows();
-      return priorWindows;
-    }
-
-    @Override
-    public boolean isCompatible(WindowFn<?, ?> other) {
-      throw new UnsupportedOperationException(
-          String.format("%s.isCompatible() should never be called."
-              + " It is a private implementation detail of Reshuffle."
-              + " This message indicates a bug in the Dataflow SDK.",
-              getClass().getCanonicalName()));
-    }
-
-    @Override
-    public Coder<BoundedWindow> windowCoder() {
-      // Safe because priorWindowFn provides the windows also.
-      // The Coder is _not_ actually a coder for an arbitrary BoundedWindow.
-      return priorWindowFn.windowCoder();
-    }
-
-    @Override
-    public BoundedWindow getSideInputWindow(BoundedWindow window) {
-      throw new UnsupportedOperationException(
-          String.format("%s.getSideInputWindow() should never be called."
-              + " It is a private implementation detail of Reshuffle."
-              + " This message indicates a bug in the Dataflow SDK.",
-              getClass().getCanonicalName()));
-    }
-
-    @Override
-    public Instant getOutputTime(Instant inputTimestamp, BoundedWindow window) {
-      return inputTimestamp;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
deleted file mode 100644
index 248f005..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ReshuffleTrigger.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-
-import org.joda.time.Instant;
-
-import java.util.List;
-
-/**
- * The trigger used with {@link Reshuffle} which triggers on every element
- * and never buffers state.
- *
- * @param <W> The kind of window that is being reshuffled.
- */
-public class ReshuffleTrigger<W extends BoundedWindow> extends Trigger<W> {
-
-  ReshuffleTrigger() {
-    super(null);
-  }
-
-  @Override
-  public void onElement(Trigger<W>.OnElementContext c) { }
-
-  @Override
-  public void onMerge(Trigger<W>.OnMergeContext c) { }
-
-  @Override
-  protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return this;
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    throw new UnsupportedOperationException(
-        "ReshuffleTrigger should not be used outside of Reshuffle");
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    return true;
-  }
-
-  @Override
-  public void onFire(Trigger<W>.TriggerContext context) throws Exception { }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
deleted file mode 100644
index 756dce0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/RetryHttpRequestInitializer.java
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.http.HttpBackOffIOExceptionHandler;
-import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler;
-import com.google.api.client.http.HttpRequest;
-import com.google.api.client.http.HttpRequestInitializer;
-import com.google.api.client.http.HttpResponse;
-import com.google.api.client.http.HttpResponseInterceptor;
-import com.google.api.client.http.HttpUnsuccessfulResponseHandler;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.ExponentialBackOff;
-import com.google.api.client.util.NanoClock;
-import com.google.api.client.util.Sleeper;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * Implements a request initializer that adds retry handlers to all
- * HttpRequests.
- *
- * <p>This allows chaining through to another HttpRequestInitializer, since
- * clients have exactly one HttpRequestInitializer, and Credential is also
- * a required HttpRequestInitializer.
- *
- * <p>Also can take a HttpResponseInterceptor to be applied to the responses.
- */
-public class RetryHttpRequestInitializer implements HttpRequestInitializer {
-
-  private static final Logger LOG = LoggerFactory.getLogger(RetryHttpRequestInitializer.class);
-
-  /**
-   * Http response codes that should be silently ignored.
-   */
-  private static final Set<Integer> DEFAULT_IGNORED_RESPONSE_CODES = new HashSet<>(
-      Arrays.asList(307 /* Redirect, handled by the client library */,
-                    308 /* Resume Incomplete, handled by the client library */));
-
-  /**
-   * Http response timeout to use for hanging gets.
-   */
-  private static final int HANGING_GET_TIMEOUT_SEC = 80;
-
-  private static class LoggingHttpBackOffIOExceptionHandler
-      extends HttpBackOffIOExceptionHandler {
-    public LoggingHttpBackOffIOExceptionHandler(BackOff backOff) {
-      super(backOff);
-    }
-
-    @Override
-    public boolean handleIOException(HttpRequest request, boolean supportsRetry)
-        throws IOException {
-      boolean willRetry = super.handleIOException(request, supportsRetry);
-      if (willRetry) {
-        LOG.debug("Request failed with IOException, will retry: {}", request.getUrl());
-      } else {
-        LOG.warn("Request failed with IOException, will NOT retry: {}", request.getUrl());
-      }
-      return willRetry;
-    }
-  }
-
-  private static class LoggingHttpBackoffUnsuccessfulResponseHandler
-      implements HttpUnsuccessfulResponseHandler {
-    private final HttpBackOffUnsuccessfulResponseHandler handler;
-    private final Set<Integer> ignoredResponseCodes;
-
-    public LoggingHttpBackoffUnsuccessfulResponseHandler(BackOff backoff,
-        Sleeper sleeper, Set<Integer> ignoredResponseCodes) {
-      this.ignoredResponseCodes = ignoredResponseCodes;
-      handler = new HttpBackOffUnsuccessfulResponseHandler(backoff);
-      handler.setSleeper(sleeper);
-      handler.setBackOffRequired(
-          new HttpBackOffUnsuccessfulResponseHandler.BackOffRequired() {
-            @Override
-            public boolean isRequired(HttpResponse response) {
-              int statusCode = response.getStatusCode();
-              return (statusCode / 100 == 5) ||  // 5xx: server error
-                  statusCode == 429;             // 429: Too many requests
-            }
-          });
-    }
-
-    @Override
-    public boolean handleResponse(HttpRequest request, HttpResponse response,
-        boolean supportsRetry) throws IOException {
-      boolean retry = handler.handleResponse(request, response, supportsRetry);
-      if (retry) {
-        LOG.debug("Request failed with code {} will retry: {}",
-            response.getStatusCode(), request.getUrl());
-
-      } else if (!ignoredResponseCodes.contains(response.getStatusCode())) {
-        LOG.warn("Request failed with code {}, will NOT retry: {}",
-            response.getStatusCode(), request.getUrl());
-      }
-
-      return retry;
-    }
-  }
-
-  @Deprecated
-  private final HttpRequestInitializer chained;
-
-  private final HttpResponseInterceptor responseInterceptor;  // response Interceptor to use
-
-  private final NanoClock nanoClock;  // used for testing
-
-  private final Sleeper sleeper;  // used for testing
-
-  private Set<Integer> ignoredResponseCodes = new HashSet<>(DEFAULT_IGNORED_RESPONSE_CODES);
-
-  public RetryHttpRequestInitializer() {
-    this(Collections.<Integer>emptyList());
-  }
-
-  /**
-   * @param chained a downstream HttpRequestInitializer, which will also be
-   *                applied to HttpRequest initialization.  May be null.
-   *
-   * @deprecated use {@link #RetryHttpRequestInitializer}.
-   */
-  @Deprecated
-  public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained) {
-    this(chained, Collections.<Integer>emptyList());
-  }
-
-  /**
-   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
-   */
-  public RetryHttpRequestInitializer(Collection<Integer> additionalIgnoredResponseCodes) {
-    this(additionalIgnoredResponseCodes, null);
-  }
-
-
-  /**
-   * @param chained a downstream HttpRequestInitializer, which will also be
-   *                applied to HttpRequest initialization.  May be null.
-   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
-   *
-   * @deprecated use {@link #RetryHttpRequestInitializer(Collection)}.
-   */
-  @Deprecated
-  public RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
-      Collection<Integer> additionalIgnoredResponseCodes) {
-    this(chained, additionalIgnoredResponseCodes, null);
-  }
-
-  /**
-   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
-   * @param responseInterceptor HttpResponseInterceptor to be applied on all requests. May be null.
-   */
-  public RetryHttpRequestInitializer(
-      Collection<Integer> additionalIgnoredResponseCodes,
-      @Nullable HttpResponseInterceptor responseInterceptor) {
-    this(null, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes,
-        responseInterceptor);
-  }
-
-  /**
-   * @param chained a downstream HttpRequestInitializer, which will also be applied to HttpRequest
-   * initialization.  May be null.
-   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
-   * @param responseInterceptor HttpResponseInterceptor to be applied on all requests. May be null.
-   *
-   * @deprecated use {@link #RetryHttpRequestInitializer(Collection, HttpResponseInterceptor)}.
-   */
-  @Deprecated
-  public RetryHttpRequestInitializer(
-      @Nullable HttpRequestInitializer chained,
-      Collection<Integer> additionalIgnoredResponseCodes,
-      @Nullable HttpResponseInterceptor responseInterceptor) {
-    this(chained, NanoClock.SYSTEM, Sleeper.DEFAULT, additionalIgnoredResponseCodes,
-        responseInterceptor);
-  }
-
-  /**
-   * Visible for testing.
-   *
-   * @param chained a downstream HttpRequestInitializer, which will also be
-   *                applied to HttpRequest initialization.  May be null.
-   * @param nanoClock used as a timing source for knowing how much time has elapsed.
-   * @param sleeper used to sleep between retries.
-   * @param additionalIgnoredResponseCodes a list of HTTP status codes that should not be logged.
-   */
-  RetryHttpRequestInitializer(@Nullable HttpRequestInitializer chained,
-      NanoClock nanoClock, Sleeper sleeper, Collection<Integer> additionalIgnoredResponseCodes,
-      HttpResponseInterceptor responseInterceptor) {
-    this.chained = chained;
-    this.nanoClock = nanoClock;
-    this.sleeper = sleeper;
-    this.ignoredResponseCodes.addAll(additionalIgnoredResponseCodes);
-    this.responseInterceptor = responseInterceptor;
-  }
-
-  @Override
-  public void initialize(HttpRequest request) throws IOException {
-    if (chained != null) {
-      chained.initialize(request);
-    }
-
-    // Set a timeout for hanging-gets.
-    // TODO: Do this exclusively for work requests.
-    request.setReadTimeout(HANGING_GET_TIMEOUT_SEC * 1000);
-
-    // Back off on retryable http errors.
-    request.setUnsuccessfulResponseHandler(
-        // A back-off multiplier of 2 raises the maximum request retrying time
-        // to approximately 5 minutes (keeping other back-off parameters to
-        // their default values).
-        new LoggingHttpBackoffUnsuccessfulResponseHandler(
-            new ExponentialBackOff.Builder().setNanoClock(nanoClock)
-                                            .setMultiplier(2).build(),
-            sleeper, ignoredResponseCodes));
-
-    // Retry immediately on IOExceptions.
-    LoggingHttpBackOffIOExceptionHandler loggingBackoffHandler =
-        new LoggingHttpBackOffIOExceptionHandler(BackOff.ZERO_BACKOFF);
-    request.setIOExceptionHandler(loggingBackoffHandler);
-
-    // Set response initializer
-    if (responseInterceptor != null) {
-      request.setResponseInterceptor(responseInterceptor);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
deleted file mode 100644
index 501b430..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SerializableUtils.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.decodeFromByteArray;
-import static com.google.cloud.dataflow.sdk.util.CoderUtils.encodeToByteArray;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.common.base.Preconditions;
-
-import org.xerial.snappy.SnappyInputStream;
-import org.xerial.snappy.SnappyOutputStream;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
-import java.util.Arrays;
-
-/**
- * Utilities for working with Serializables.
- */
-public class SerializableUtils {
-  /**
-   * Serializes the argument into an array of bytes, and returns it.
-   *
-   * @throws IllegalArgumentException if there are errors when serializing
-   */
-  public static byte[] serializeToByteArray(Serializable value) {
-    try {
-      ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-      try (ObjectOutputStream oos = new ObjectOutputStream(new SnappyOutputStream(buffer))) {
-        oos.writeObject(value);
-      }
-      return buffer.toByteArray();
-    } catch (IOException exn) {
-      throw new IllegalArgumentException(
-          "unable to serialize " + value,
-          exn);
-    }
-  }
-
-  /**
-   * Deserializes an object from the given array of bytes, e.g., as
-   * serialized using {@link #serializeToByteArray}, and returns it.
-   *
-   * @throws IllegalArgumentException if there are errors when
-   * deserializing, using the provided description to identify what
-   * was being deserialized
-   */
-  public static Object deserializeFromByteArray(byte[] encodedValue,
-      String description) {
-    try {
-      try (ObjectInputStream ois = new ObjectInputStream(
-          new SnappyInputStream(new ByteArrayInputStream(encodedValue)))) {
-        return ois.readObject();
-      }
-    } catch (IOException | ClassNotFoundException exn) {
-      throw new IllegalArgumentException(
-          "unable to deserialize " + description,
-          exn);
-    }
-  }
-
-  public static <T extends Serializable> T ensureSerializable(T value) {
-    @SuppressWarnings("unchecked")
-    T copy = (T) deserializeFromByteArray(serializeToByteArray(value),
-        value.toString());
-    return copy;
-  }
-
-  public static <T extends Serializable> T clone(T value) {
-    @SuppressWarnings("unchecked")
-    T copy = (T) deserializeFromByteArray(serializeToByteArray(value),
-        value.toString());
-    return copy;
-  }
-
-  /**
-   * Serializes a Coder and verifies that it can be correctly deserialized.
-   *
-   * <p>Throws a RuntimeException if serialized Coder cannot be deserialized, or
-   * if the deserialized instance is not equal to the original.
-   *
-   * @return the serialized Coder, as a {@link CloudObject}
-   */
-  public static CloudObject ensureSerializable(Coder<?> coder) {
-    // Make sure that Coders are java serializable as well since
-    // they are regularly captured within DoFn's.
-    Coder<?> copy = (Coder<?>) ensureSerializable((Serializable) coder);
-
-    CloudObject cloudObject = copy.asCloudObject();
-
-    Coder<?> decoded;
-    try {
-      decoded = Serializer.deserialize(cloudObject, Coder.class);
-    } catch (RuntimeException e) {
-      throw new RuntimeException(
-          String.format("Unable to deserialize Coder: %s. "
-              + "Check that a suitable constructor is defined.  "
-              + "See Coder for details.", coder), e
-      );
-    }
-    Preconditions.checkState(coder.equals(decoded),
-        String.format("Coder not equal to original after serialization, "
-            + "indicating that the Coder may not implement serialization "
-            + "correctly.  Before: %s, after: %s, cloud encoding: %s",
-            coder, decoded, cloudObject));
-
-    return cloudObject;
-  }
-
-  /**
-   * Serializes an arbitrary T with the given {@code Coder<T>} and verifies
-   * that it can be correctly deserialized.
-   */
-  public static <T> T ensureSerializableByCoder(
-      Coder<T> coder, T value, String errorContext) {
-      byte[] encodedValue;
-      try {
-        encodedValue = encodeToByteArray(coder, value);
-      } catch (CoderException exn) {
-        // TODO: Put in better element printing:
-        // truncate if too long.
-        throw new IllegalArgumentException(
-            errorContext + ": unable to encode value "
-            + value + " using " + coder,
-            exn);
-      }
-      try {
-        return decodeFromByteArray(coder, encodedValue);
-      } catch (CoderException exn) {
-        // TODO: Put in better encoded byte array printing:
-        // use printable chars with escapes instead of codes, and
-        // truncate if too long.
-        throw new IllegalArgumentException(
-            errorContext + ": unable to decode " + Arrays.toString(encodedValue)
-            + ", encoding of value " + value + ", using " + coder,
-            exn);
-      }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
deleted file mode 100644
index 6a8a337..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Serializer.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * Utility for converting objects between Java and Cloud representations.
- */
-public final class Serializer {
-  // Delay initialization of statics until the first call to Serializer.
-  private static class SingletonHelper {
-    static final ObjectMapper OBJECT_MAPPER = createObjectMapper();
-    static final ObjectMapper TREE_MAPPER = createTreeMapper();
-
-    /**
-     * Creates the object mapper that will be used for serializing Google API
-     * client maps into Jackson trees.
-     */
-    private static ObjectMapper createTreeMapper() {
-      return new ObjectMapper();
-    }
-
-    /**
-     * Creates the object mapper that will be used for deserializing Jackson
-     * trees into objects.
-     */
-    private static ObjectMapper createObjectMapper() {
-      ObjectMapper m = new ObjectMapper();
-      // Ignore properties that are not used by the object.
-      m.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
-
-      // For parameters of type Object, use the @type property to determine the
-      // class to instantiate.
-      //
-      // TODO: It would be ideal to do this for all non-final classes.  The
-      // problem with using DefaultTyping.NON_FINAL is that it insists on having
-      // type information in the JSON for classes with useful default
-      // implementations, such as List.  Ideally, we'd combine these defaults
-      // with available type information if that information's present.
-      m.enableDefaultTypingAsProperty(
-           ObjectMapper.DefaultTyping.JAVA_LANG_OBJECT,
-           PropertyNames.OBJECT_TYPE_NAME);
-
-      m.registerModule(new CoderUtils.Jackson2Module());
-
-      return m;
-    }
-  }
-
-  /**
-   * Deserializes an object from a Dataflow structured encoding (represented in
-   * Java as a map).
-   *
-   * <p>The standard Dataflow SDK object serialization protocol is based on JSON.
-   * Data is typically encoded as a JSON object whose fields represent the
-   * object's data.
-   *
-   * <p>The actual deserialization is performed by Jackson, which can deserialize
-   * public fields, use JavaBean setters, or use injection annotations to
-   * indicate how to construct the object.  The {@link ObjectMapper} used is
-   * configured to use the "@type" field as the name of the class to instantiate
-   * (supporting polymorphic types), and may be further configured by
-   * annotations or via {@link ObjectMapper#registerModule}.
-   *
-   * @see <a href="http://wiki.fasterxml.com/JacksonFAQ#Data_Binding.2C_general">
-   * Jackson Data-Binding</a>
-   * @see <a href="https://github.com/FasterXML/jackson-annotations/wiki/Jackson-Annotations">
-   * Jackson-Annotations</a>
-   * @param serialized the object in untyped decoded form (i.e. a nested {@link Map})
-   * @param clazz the expected object class
-   */
-  public static <T> T deserialize(Map<String, Object> serialized, Class<T> clazz) {
-    try {
-      return SingletonHelper.OBJECT_MAPPER.treeToValue(
-          SingletonHelper.TREE_MAPPER.valueToTree(
-              deserializeCloudKnownTypes(serialized)),
-          clazz);
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException(
-          "Unable to deserialize class " + clazz, e);
-    }
-  }
-
-  /**
-   * Recursively walks the supplied map, looking for well-known cloud type
-   * information (keyed as {@link PropertyNames#OBJECT_TYPE_NAME}, matching a
-   * URI value from the {@link CloudKnownType} enum.  Upon finding this type
-   * information, it converts it into the correspondingly typed Java value.
-   */
-  @SuppressWarnings("unchecked")
-  private static Object deserializeCloudKnownTypes(Object src) {
-    if (src instanceof Map) {
-      Map<String, Object> srcMap = (Map<String, Object>) src;
-      @Nullable Object value = srcMap.get(PropertyNames.SCALAR_FIELD_NAME);
-      @Nullable CloudKnownType type =
-          CloudKnownType.forUri((String) srcMap.get(PropertyNames.OBJECT_TYPE_NAME));
-      if (type != null && value != null) {
-        // It's a value of a well-known cloud type; let the known type handler
-        // handle the translation.
-        Object result = type.parse(value, type.defaultClass());
-        return result;
-      }
-      // Otherwise, it's just an ordinary map.
-      Map<String, Object> dest = new HashMap<>(srcMap.size());
-      for (Map.Entry<String, Object> entry : srcMap.entrySet()) {
-        dest.put(entry.getKey(), deserializeCloudKnownTypes(entry.getValue()));
-      }
-      return dest;
-    }
-    if (src instanceof List) {
-      List<Object> srcList = (List<Object>) src;
-      List<Object> dest = new ArrayList<>(srcList.size());
-      for (Object obj : srcList) {
-        dest.add(deserializeCloudKnownTypes(obj));
-      }
-      return dest;
-    }
-    // Neither a Map nor a List; no translation needed.
-    return src;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
deleted file mode 100644
index 54794ef..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ShardingWritableByteChannel.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
-
-/**
- * Implements a WritableByteChannel that may contain multiple output shards.
- *
- * <p>This provides {@link #writeToShard}, which takes a shard number for
- * writing to a particular shard.
- *
- * <p>The channel is considered open if all downstream channels are open, and
- * closes all downstream channels when closed.
- */
-public class ShardingWritableByteChannel implements WritableByteChannel {
-
-  /**
-   * Special shard number that causes a write to all shards.
-   */
-  public static final int ALL_SHARDS = -2;
-
-
-  private final ArrayList<WritableByteChannel> writers = new ArrayList<>();
-
-  /**
-   * Returns the number of output shards.
-   */
-  public int getNumShards() {
-    return writers.size();
-  }
-
-  /**
-   * Adds another shard output channel.
-   */
-  public void addChannel(WritableByteChannel writer) {
-    writers.add(writer);
-  }
-
-  /**
-   * Returns the WritableByteChannel associated with the given shard number.
-   */
-  public WritableByteChannel getChannel(int shardNum) {
-    return writers.get(shardNum);
-  }
-
-  /**
-   * Writes the buffer to the given shard.
-   *
-   * <p>This does not change the current output shard.
-   *
-   * @return The total number of bytes written.  If the shard number is
-   * {@link #ALL_SHARDS}, then the total is the sum of each individual shard
-   * write.
-   */
-  public int writeToShard(int shardNum, ByteBuffer src) throws IOException {
-    if (shardNum >= 0) {
-      return writers.get(shardNum).write(src);
-    }
-
-    switch (shardNum) {
-      case ALL_SHARDS:
-        int size = 0;
-        for (WritableByteChannel writer : writers) {
-          size += writer.write(src);
-        }
-        return size;
-
-      default:
-        throw new IllegalArgumentException("Illegal shard number: " + shardNum);
-    }
-  }
-
-  /**
-   * Writes a buffer to all shards.
-   *
-   * <p>Same as calling {@code writeToShard(ALL_SHARDS, buf)}.
-   */
-  @Override
-  public int write(ByteBuffer src) throws IOException {
-    return writeToShard(ALL_SHARDS, src);
-  }
-
-  @Override
-  public boolean isOpen() {
-    for (WritableByteChannel writer : writers) {
-      if (!writer.isOpen()) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  @Override
-  public void close() throws IOException {
-    for (WritableByteChannel writer : writers) {
-      writer.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
deleted file mode 100644
index 37873f3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SideInputReader.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import javax.annotation.Nullable;
-
-/**
- * The interface to objects that provide side inputs. Particular implementations
- * may read a side input directly or use appropriate sorts of caching, etc.
- */
-public interface SideInputReader {
-  /**
-   * Returns the value of the given {@link PCollectionView} for the given {@link BoundedWindow}.
-   *
-   * <p>It is valid for a side input to be {@code null}. It is <i>not</i> valid for this to
-   * return {@code null} for any other reason.
-   */
-  @Nullable
-  <T> T get(PCollectionView<T> view, BoundedWindow window);
-
-  /**
-   * Returns true if the given {@link PCollectionView} is valid for this reader.
-   */
-  <T> boolean contains(PCollectionView<T> view);
-
-  /**
-   * Returns true if there are no side inputs in this reader.
-   */
-  boolean isEmpty();
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
deleted file mode 100644
index 15a5e51..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/SimpleDoFnRunner.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet.AddCounterMutator;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.List;
-
-/**
- * Runs a {@link DoFn} by constructing the appropriate contexts and passing them in.
- *
- * @param <InputT> the type of the DoFn's (main) input elements
- * @param <OutputT> the type of the DoFn's (main) output elements
- */
-public class SimpleDoFnRunner<InputT, OutputT> extends DoFnRunnerBase<InputT, OutputT>{
-
-  protected SimpleDoFnRunner(PipelineOptions options, DoFn<InputT, OutputT> fn,
-      SideInputReader sideInputReader,
-      OutputManager outputManager,
-      TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> sideOutputTags, StepContext stepContext,
-      AddCounterMutator addCounterMutator, WindowingStrategy<?, ?> windowingStrategy) {
-    super(options, fn, sideInputReader, outputManager, mainOutputTag, sideOutputTags, stepContext,
-        addCounterMutator, windowingStrategy);
-  }
-
-  @Override
-  protected void invokeProcessElement(WindowedValue<InputT> elem) {
-    final DoFn<InputT, OutputT>.ProcessContext processContext = createProcessContext(elem);
-    // This can contain user code. Wrap it in case it throws an exception.
-    try {
-      fn.processElement(processContext);
-    } catch (Exception ex) {
-      throw wrapUserCodeException(ex);
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
deleted file mode 100644
index 04fd599..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Stager.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.services.dataflow.model.DataflowPackage;
-
-import java.util.List;
-
-/**
- * Interface for staging files needed for running a Dataflow pipeline.
- */
-public interface Stager {
-  /* Stage files and return a list of packages. */
-  public List<DataflowPackage> stageFiles();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
deleted file mode 100644
index 268eb7f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/StreamUtils.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.ref.SoftReference;
-
-/**
- * Utility functions for stream operations.
- */
-public class StreamUtils {
-
-  private StreamUtils() {
-  }
-
-  private static final int BUF_SIZE = 8192;
-
-  private static ThreadLocal<SoftReference<byte[]>> threadLocalBuffer = new ThreadLocal<>();
-
-  /**
-   * Efficient converting stream to bytes.
-   */
-  public static byte[] getBytes(InputStream stream) throws IOException {
-    if (stream instanceof ExposedByteArrayInputStream) {
-      // Fast path for the exposed version.
-      return ((ExposedByteArrayInputStream) stream).readAll();
-    } else if (stream instanceof ByteArrayInputStream) {
-      // Fast path for ByteArrayInputStream.
-      byte[] ret = new byte[stream.available()];
-      stream.read(ret);
-      return ret;
-    }
-    // Falls back to normal stream copying.
-    SoftReference<byte[]> refBuffer = threadLocalBuffer.get();
-    byte[] buffer = refBuffer == null ? null : refBuffer.get();
-    if (buffer == null) {
-      buffer = new byte[BUF_SIZE];
-      threadLocalBuffer.set(new SoftReference<byte[]>(buffer));
-    }
-    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
-    while (true) {
-      int r = stream.read(buffer);
-      if (r == -1) {
-        break;
-      }
-      outStream.write(buffer, 0, r);
-    }
-    return outStream.toByteArray();
-  }
-
-}

[67/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

Directory reorganization

Move Java-specific archetypes from "maven-archetypes/" into "sdks/java/maven-archetypes/".


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/257a7a6b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/257a7a6b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/257a7a6b

Branch: refs/heads/master
Commit: 257a7a6be0cce4d08be749af159ec8a6adb7ceb9
Parents: d4233aa
Author: Davor Bonaci <da...@google.com>
Authored: Wed Mar 23 17:47:11 2016 -0700
Committer: Davor Bonaci <da...@google.com>
Committed: Wed Mar 23 18:33:33 2016 -0700

----------------------------------------------------------------------
 maven-archetypes/examples/pom.xml               |  56 ---
 .../META-INF/maven/archetype-metadata.xml       |  29 --
 .../main/resources/archetype-resources/pom.xml  | 204 ----------
 .../src/main/java/DebuggingWordCount.java       | 182 ---------
 .../src/main/java/MinimalWordCount.java         | 115 ------
 .../src/main/java/WindowedWordCount.java        | 262 ------------
 .../src/main/java/WordCount.java                | 204 ----------
 .../java/common/DataflowExampleOptions.java     |  29 --
 .../main/java/common/DataflowExampleUtils.java  | 398 -------------------
 .../common/ExampleBigQueryTableOptions.java     |  53 ---
 .../java/common/ExamplePubsubTopicOptions.java  |  49 ---
 .../main/java/common/PubsubFileInjector.java    | 153 -------
 .../src/test/java/DebuggingWordCountTest.java   |  44 --
 .../src/test/java/WordCountTest.java            |  85 ----
 .../projects/basic/archetype.properties         |   5 -
 .../src/test/resources/projects/basic/goal.txt  |   1 -
 maven-archetypes/pom.xml                        |  41 --
 maven-archetypes/starter/pom.xml                |  57 ---
 .../META-INF/maven/archetype-metadata.xml       |  21 -
 .../main/resources/archetype-resources/pom.xml  |  43 --
 .../src/main/java/StarterPipeline.java          |  67 ----
 .../projects/basic/archetype.properties         |   5 -
 .../src/test/resources/projects/basic/goal.txt  |   1 -
 .../resources/projects/basic/reference/pom.xml  |  43 --
 .../src/main/java/it/pkg/StarterPipeline.java   |  67 ----
 pom.xml                                         |   2 +-
 sdks/java/maven-archetypes/examples/pom.xml     |  56 +++
 .../META-INF/maven/archetype-metadata.xml       |  29 ++
 .../main/resources/archetype-resources/pom.xml  | 204 ++++++++++
 .../src/main/java/DebuggingWordCount.java       | 182 +++++++++
 .../src/main/java/MinimalWordCount.java         | 115 ++++++
 .../src/main/java/WindowedWordCount.java        | 262 ++++++++++++
 .../src/main/java/WordCount.java                | 204 ++++++++++
 .../java/common/DataflowExampleOptions.java     |  29 ++
 .../main/java/common/DataflowExampleUtils.java  | 398 +++++++++++++++++++
 .../common/ExampleBigQueryTableOptions.java     |  53 +++
 .../java/common/ExamplePubsubTopicOptions.java  |  49 +++
 .../main/java/common/PubsubFileInjector.java    | 153 +++++++
 .../src/test/java/DebuggingWordCountTest.java   |  44 ++
 .../src/test/java/WordCountTest.java            |  85 ++++
 .../projects/basic/archetype.properties         |   5 +
 .../src/test/resources/projects/basic/goal.txt  |   1 +
 sdks/java/maven-archetypes/pom.xml              |  41 ++
 sdks/java/maven-archetypes/starter/pom.xml      |  57 +++
 .../META-INF/maven/archetype-metadata.xml       |  21 +
 .../main/resources/archetype-resources/pom.xml  |  43 ++
 .../src/main/java/StarterPipeline.java          |  67 ++++
 .../projects/basic/archetype.properties         |   5 +
 .../src/test/resources/projects/basic/goal.txt  |   1 +
 .../resources/projects/basic/reference/pom.xml  |  43 ++
 .../src/main/java/it/pkg/StarterPipeline.java   |  67 ++++
 51 files changed, 2215 insertions(+), 2215 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/pom.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/pom.xml b/maven-archetypes/examples/pom.xml
deleted file mode 100644
index 7e74b9d..0000000
--- a/maven-archetypes/examples/pom.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>maven-archetypes-parent</artifactId>
-    <version>0.1.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>maven-archetypes-examples</artifactId>
-  <name>Apache Beam :: Maven Archetypes :: Examples</name>
-  <description>A Maven Archetype to create a project containing all the
-    example pipelines from the Apache Beam Java SDK.</description>
-
-  <packaging>maven-archetype</packaging>
-
-  <build>
-    <extensions>
-      <extension>
-        <groupId>org.apache.maven.archetype</groupId>
-        <artifactId>archetype-packaging</artifactId>
-        <version>2.4</version>
-      </extension>
-    </extensions>
-
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <artifactId>maven-archetype-plugin</artifactId>
-          <version>2.4</version>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
deleted file mode 100644
index 7742af4..0000000
--- a/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ /dev/null
@@ -1,29 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<archetype-descriptor
-    xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
-    name="Google Cloud Dataflow Example Pipelines Archetype"
-    xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-
-  <requiredProperties>
-    <requiredProperty key="targetPlatform">
-      <defaultValue>1.7</defaultValue>
-    </requiredProperty>
-  </requiredProperties>
-
-  <fileSets>
-    <fileSet filtered="true" packaged="true" encoding="UTF-8">
-      <directory>src/main/java</directory>
-      <includes>
-        <include>**/*.java</include>
-      </includes>
-    </fileSet>
-
-    <fileSet filtered="true" packaged="true" encoding="UTF-8">
-      <directory>src/test/java</directory>
-      <includes>
-        <include>**/*.java</include>
-      </includes>
-    </fileSet>
-  </fileSets>
-</archetype-descriptor>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
deleted file mode 100644
index d19d0c6..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ /dev/null
@@ -1,204 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-  ~ Copyright (C) 2015 Google Inc.
-  ~
-  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
-  ~ use this file except in compliance with the License. You may obtain a copy of
-  ~ the License at
-  ~
-  ~ http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-  ~ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-  ~ License for the specific language governing permissions and limitations under
-  ~ the License.
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~-->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <groupId>${groupId}</groupId>
-  <artifactId>${artifactId}</artifactId>
-  <version>${version}</version>
-
-  <packaging>jar</packaging>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <version>3.3</version>
-        <configuration>
-          <source>${targetPlatform}</source>
-          <target>${targetPlatform}</target>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <version>2.3</version>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <finalName>${project.artifactId}-bundled-${project.version}</finalName>
-              <artifactSet>
-                <includes>
-                  <include>*:*</include>
-                </includes>
-              </artifactSet>
-              <filters>
-                <filter>
-                  <artifact>*:*</artifact>
-                  <excludes>
-                    <exclude>META-INF/*.SF</exclude>
-                    <exclude>META-INF/*.DSA</exclude>
-                    <exclude>META-INF/*.RSA</exclude>
-                  </excludes>
-                </filter>
-              </filters>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <version>2.18.1</version>
-        <configuration>
-          <parallel>all</parallel>
-          <threadCount>4</threadCount>
-          <redirectTestOutputToFile>true</redirectTestOutputToFile>
-        </configuration>
-        <dependencies>
-          <dependency>
-            <groupId>org.apache.maven.surefire</groupId>
-            <artifactId>surefire-junit47</artifactId>
-            <version>2.18.1</version>
-          </dependency>
-        </dependencies>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <!-- Adds a dependency on a specific version of the Dataflow SDK. -->
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>java-sdk-all</artifactId>
-      <version>[0-incubating, 2-incubating)</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.api-client</groupId>
-      <artifactId>google-api-client</artifactId>
-      <version>1.21.0</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <!-- Dependencies below this line are specific dependencies needed by the examples code. -->
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-bigquery</artifactId>
-      <version>v2-rev248-1.21.0</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.http-client</groupId>
-      <artifactId>google-http-client</artifactId>
-      <version>1.21.0</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-pubsub</artifactId>
-      <version>v1-rev7-1.21.0</version>
-      <exclusions>
-        <!-- Exclude an old version of guava that is being pulled
-             in by a transitive dependency of google-api-client -->
-        <exclusion>
-          <groupId>com.google.guava</groupId>
-          <artifactId>guava-jdk5</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>2.4</version>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>18.0</version>
-    </dependency>
-
-     <dependency>
-      <groupId>javax.servlet</groupId>
-      <artifactId>javax.servlet-api</artifactId>
-      <version>3.1.0</version>
-    </dependency>
-
-    <!-- Add slf4j API frontend binding with JUL backend -->
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <version>1.7.7</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-jdk14</artifactId>
-      <version>1.7.7</version>
-      <!-- When loaded at runtime this will wire up slf4j to the JUL backend -->
-      <scope>runtime</scope>
-    </dependency>
-
-    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert,
-         which is used in the main code of DebuggingWordCount example. -->
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <version>1.3</version>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>4.11</version>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
deleted file mode 100644
index 3cf2bc0..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package};
-
-import ${package}.WordCount.WordCountOptions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Pattern;
-
-
-/**
- * An example that verifies word counts in Shakespeare and includes Dataflow best practices.
- *
- * <p>This class, {@link DebuggingWordCount}, is the third in a series of four successively more
- * detailed 'word count' examples. You may first want to take a look at {@link MinimalWordCount}
- * and {@link WordCount}. After you've looked at this example, then see the
- * {@link WindowedWordCount} pipeline, for introduction of additional concepts.
- *
- * <p>Basic concepts, also in the MinimalWordCount and WordCount examples:
- * Reading text files; counting a PCollection; executing a Pipeline both locally
- * and using the Dataflow service; defining DoFns.
- *
- * <p>New Concepts:
- * <pre>
- *   1. Logging to Cloud Logging
- *   2. Controlling Dataflow worker log levels
- *   3. Creating a custom aggregator
- *   4. Testing your Pipeline via DataflowAssert
- * </pre>
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- *
- * <p>To execute this pipeline using the Dataflow service and the additional logging discussed
- * below, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
- * }
- * </pre>
- *
- * <p>Note that when you run via <code>mvn exec</code>, you may need to escape
- * the quotations as appropriate for your shell. For example, in <code>bash</code>:
- * <pre>
- * mvn compile exec:java ... \
- *   -Dexec.args="... \
- *     --workerLogLevelOverrides={\\\"com.google.cloud.dataflow.examples\\\":\\\"DEBUG\\\"}"
- * </pre>
- *
- * <p>Concept #2: Dataflow workers which execute user code are configured to log to Cloud
- * Logging by default at "INFO" log level and higher. One may override log levels for specific
- * logging namespaces by specifying:
- * <pre><code>
- *   --workerLogLevelOverrides={"Name1":"Level1","Name2":"Level2",...}
- * </code></pre>
- * For example, by specifying:
- * <pre><code>
- *   --workerLogLevelOverrides={"com.google.cloud.dataflow.examples":"DEBUG"}
- * </code></pre>
- * when executing this pipeline using the Dataflow service, Cloud Logging would contain only
- * "DEBUG" or higher level logs for the {@code com.google.cloud.dataflow.examples} package in
- * addition to the default "INFO" or higher level logs. In addition, the default Dataflow worker
- * logging configuration can be overridden by specifying
- * {@code --defaultWorkerLogLevel=<one of TRACE, DEBUG, INFO, WARN, ERROR>}. For example,
- * by specifying {@code --defaultWorkerLogLevel=DEBUG} when executing this pipeline with
- * the Dataflow service, Cloud Logging would contain all "DEBUG" or higher level logs. Note
- * that changing the default worker log level to TRACE or DEBUG will significantly increase
- * the amount of logs output.
- *
- * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
- * overridden with {@code --inputFile}.
- */
-public class DebuggingWordCount {
-  /** A DoFn that filters for a specific key based upon a regular expression. */
-  public static class FilterTextFn extends DoFn<KV<String, Long>, KV<String, Long>> {
-    /**
-     * Concept #1: The logger below uses the fully qualified class name of FilterTextFn
-     * as the logger. All log statements emitted by this logger will be referenced by this name
-     * and will be visible in the Cloud Logging UI. Learn more at https://cloud.google.com/logging
-     * about the Cloud Logging UI.
-     */
-    private static final Logger LOG = LoggerFactory.getLogger(FilterTextFn.class);
-
-    private final Pattern filter;
-    public FilterTextFn(String pattern) {
-      filter = Pattern.compile(pattern);
-    }
-
-    /**
-     * Concept #3: A custom aggregator can track values in your pipeline as it runs. Those
-     * values will be displayed in the Dataflow Monitoring UI when this pipeline is run using the
-     * Dataflow service. These aggregators below track the number of matched and unmatched words.
-     * Learn more at https://cloud.google.com/dataflow/pipelines/dataflow-monitoring-intf about
-     * the Dataflow Monitoring UI.
-     */
-    private final Aggregator<Long, Long> matchedWords =
-        createAggregator("matchedWords", new Sum.SumLongFn());
-    private final Aggregator<Long, Long> unmatchedWords =
-        createAggregator("umatchedWords", new Sum.SumLongFn());
-
-    @Override
-    public void processElement(ProcessContext c) {
-      if (filter.matcher(c.element().getKey()).matches()) {
-        // Log at the "DEBUG" level each element that we match. When executing this pipeline
-        // using the Dataflow service, these log lines will appear in the Cloud Logging UI
-        // only if the log level is set to "DEBUG" or lower.
-        LOG.debug("Matched: " + c.element().getKey());
-        matchedWords.addValue(1L);
-        c.output(c.element());
-      } else {
-        // Log at the "TRACE" level each element that is not matched. Different log levels
-        // can be used to control the verbosity of logging providing an effective mechanism
-        // to filter less important information.
-        LOG.trace("Did not match: " + c.element().getKey());
-        unmatchedWords.addValue(1L);
-      }
-    }
-  }
-
-  public static void main(String[] args) {
-    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
-      .as(WordCountOptions.class);
-    Pipeline p = Pipeline.create(options);
-
-    PCollection<KV<String, Long>> filteredWords =
-        p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
-         .apply(new WordCount.CountWords())
-         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
-
-    /**
-     * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
-     * Hamcrest's collection matchers that can be used when writing Pipeline level tests
-     * to validate the contents of PCollections. DataflowAssert is best used in unit tests
-     * with small data sets but is demonstrated here as a teaching tool.
-     *
-     * <p>Below we verify that the set of filtered words matches our expected counts. Note
-     * that DataflowAssert does not provide any output and that successful completion of the
-     * Pipeline implies that the expectations were met. Learn more at
-     * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline on how to test
-     * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
-     */
-    List<KV<String, Long>> expectedResults = Arrays.asList(
-        KV.of("Flourish", 3L),
-        KV.of("stomach", 1L));
-    DataflowAssert.that(filteredWords).containsInAnyOrder(expectedResults);
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
deleted file mode 100644
index 035db01..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/MinimalWordCount.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package};
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-
-/**
- * An example that counts words in Shakespeare.
- *
- * <p>This class, {@link MinimalWordCount}, is the first in a series of four successively more
- * detailed 'word count' examples. Here, for simplicity, we don't show any error-checking or
- * argument processing, and focus on construction of the pipeline, which chains together the
- * application of core transforms.
- *
- * <p>Next, see the {@link WordCount} pipeline, then the {@link DebuggingWordCount}, and finally
- * the {@link WindowedWordCount} pipeline, for more detailed examples that introduce additional
- * concepts.
- *
- * <p>Concepts:
- * <pre>
- *   1. Reading data from text files
- *   2. Specifying 'inline' transforms
- *   3. Counting a PCollection
- *   4. Writing data to Cloud Storage as text files
- * </pre>
- *
- * <p>To execute this pipeline, first edit the code to set your project ID, the staging
- * location, and the output location. The specified GCS bucket(s) must already exist.
- *
- * <p>Then, run the pipeline as described in the README. It will be deployed and run using the
- * Dataflow service. No args are required to run the pipeline. You can see the results in your
- * output bucket in the GCS browser.
- */
-public class MinimalWordCount {
-
-  public static void main(String[] args) {
-    // Create a DataflowPipelineOptions object. This object lets us set various execution
-    // options for our pipeline, such as the associated Cloud Platform project and the location
-    // in Google Cloud Storage to stage files.
-    DataflowPipelineOptions options = PipelineOptionsFactory.create()
-      .as(DataflowPipelineOptions.class);
-    options.setRunner(BlockingDataflowPipelineRunner.class);
-    // CHANGE 1/3: Your project ID is required in order to run your pipeline on the Google Cloud.
-    options.setProject("SET_YOUR_PROJECT_ID_HERE");
-    // CHANGE 2/3: Your Google Cloud Storage path is required for staging local files.
-    options.setStagingLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_STAGING_DIRECTORY");
-
-    // Create the Pipeline object with the options we defined above.
-    Pipeline p = Pipeline.create(options);
-
-    // Apply the pipeline's transforms.
-
-    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
-    // of input text files. TextIO.Read returns a PCollection where each element is one line from
-    // the input text (a set of Shakespeare's texts).
-    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
-     // Concept #2: Apply a ParDo transform to our PCollection of text lines. This ParDo invokes a
-     // DoFn (defined in-line) on each element that tokenizes the text line into individual words.
-     // The ParDo returns a PCollection<String>, where each element is an individual word in
-     // Shakespeare's collected texts.
-     .apply(ParDo.named("ExtractWords").of(new DoFn<String, String>() {
-                       @Override
-                       public void processElement(ProcessContext c) {
-                         for (String word : c.element().split("[^a-zA-Z']+")) {
-                           if (!word.isEmpty()) {
-                             c.output(word);
-                           }
-                         }
-                       }
-                     }))
-     // Concept #3: Apply the Count transform to our PCollection of individual words. The Count
-     // transform returns a new PCollection of key/value pairs, where each key represents a unique
-     // word in the text. The associated value is the occurrence count for that word.
-     .apply(Count.<String>perElement())
-     // Apply another ParDo transform that formats our PCollection of word counts into a printable
-     // string, suitable for writing to an output file.
-     .apply(ParDo.named("FormatResults").of(new DoFn<KV<String, Long>, String>() {
-                       @Override
-                       public void processElement(ProcessContext c) {
-                         c.output(c.element().getKey() + ": " + c.element().getValue());
-                       }
-                     }))
-     // Concept #4: Apply a write transform, TextIO.Write, at the end of the pipeline.
-     // TextIO.Write writes the contents of a PCollection (in this case, our PCollection of
-     // formatted strings) to a series of text files in Google Cloud Storage.
-     // CHANGE 3/3: The Google Cloud Storage path is required for outputting the results to.
-     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
-
-    // Run the pipeline.
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
deleted file mode 100644
index 29921e2..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WindowedWordCount.java
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package};
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import ${package}.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-
-/**
- * An example that counts words in text, and can run over either unbounded or bounded input
- * collections.
- *
- * <p>This class, {@link WindowedWordCount}, is the last in a series of four successively more
- * detailed 'word count' examples. First take a look at {@link MinimalWordCount},
- * {@link WordCount}, and {@link DebuggingWordCount}.
- *
- * <p>Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount examples:
- * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both locally
- * and using the Dataflow service; defining DoFns; creating a custom aggregator;
- * user-defined PTransforms; defining PipelineOptions.
- *
- * <p>New Concepts:
- * <pre>
- *   1. Unbounded and bounded pipeline input modes
- *   2. Adding timestamps to data
- *   3. PubSub topics as sources
- *   4. Windowing
- *   5. Re-using PTransforms over windowed PCollections
- *   6. Writing to BigQuery
- * </pre>
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- *
- * <p>Optionally specify the input file path via:
- * {@code --inputFile=gs://INPUT_PATH},
- * which defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt}.
- *
- * <p>Specify an output BigQuery dataset and optionally, a table for the output. If you don't
- * specify the table, one will be created for you using the job name. If you don't specify the
- * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
- * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
- *
- * <p>Decide whether you want your pipeline to run with 'bounded' (such as files in GCS) or
- * 'unbounded' input (such as a PubSub topic). To run with unbounded input, set
- * {@code --unbounded=true}. Then, optionally specify the Google Cloud PubSub topic to read from
- * via {@code --pubsubTopic=projects/PROJECT_ID/topics/YOUR_TOPIC_NAME}. If the topic does not
- * exist, the pipeline will create one for you. It will delete this topic when it terminates.
- * The pipeline will automatically launch an auxiliary batch pipeline to populate the given PubSub
- * topic with the contents of the {@code --inputFile}, in order to make the example easy to run.
- * If you want to use an independently-populated PubSub topic, indicate this by setting
- * {@code --inputFile=""}. In that case, the auxiliary pipeline will not be started.
- *
- * <p>By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
- * change this interval by setting the {@code --windowSize} parameter, e.g. {@code --windowSize=10}
- * for 10-minute windows.
- */
-public class WindowedWordCount {
-    private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
-    static final int WINDOW_SIZE = 1;  // Default window duration in minutes
-
-  /**
-   * Concept #2: A DoFn that sets the data element timestamp. This is a silly method, just for
-   * this example, for the bounded data case.
-   *
-   * <p>Imagine that many ghosts of Shakespeare are all typing madly at the same time to recreate
-   * his masterworks. Each line of the corpus will get a random associated timestamp somewhere in a
-   * 2-hour period.
-   */
-  static class AddTimestampFn extends DoFn<String, String> {
-    private static final long RAND_RANGE = 7200000; // 2 hours in ms
-
-    @Override
-    public void processElement(ProcessContext c) {
-      // Generate a timestamp that falls somewhere in the past two hours.
-      long randomTimestamp = System.currentTimeMillis()
-        - (int) (Math.random() * RAND_RANGE);
-      /**
-       * Concept #2: Set the data element with that timestamp.
-       */
-      c.outputWithTimestamp(c.element(), new Instant(randomTimestamp));
-    }
-  }
-
-  /** A DoFn that converts a Word and Count into a BigQuery table row. */
-  static class FormatAsTableRowFn extends DoFn<KV<String, Long>, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = new TableRow()
-          .set("word", c.element().getKey())
-          .set("count", c.element().getValue())
-          // include a field for the window timestamp
-         .set("window_timestamp", c.timestamp().toString());
-      c.output(row);
-    }
-  }
-
-  /**
-   * Helper method that defines the BigQuery schema used for the output.
-   */
-  private static TableSchema getSchema() {
-    List<TableFieldSchema> fields = new ArrayList<>();
-    fields.add(new TableFieldSchema().setName("word").setType("STRING"));
-    fields.add(new TableFieldSchema().setName("count").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("window_timestamp").setType("TIMESTAMP"));
-    TableSchema schema = new TableSchema().setFields(fields);
-    return schema;
-  }
-
-  /**
-   * Concept #6: We'll stream the results to a BigQuery table. The BigQuery output source is one
-   * that supports both bounded and unbounded data. This is a helper method that creates a
-   * TableReference from input options, to tell the pipeline where to write its BigQuery results.
-   */
-  private static TableReference getTableReference(Options options) {
-    TableReference tableRef = new TableReference();
-    tableRef.setProjectId(options.getProject());
-    tableRef.setDatasetId(options.getBigQueryDataset());
-    tableRef.setTableId(options.getBigQueryTable());
-    return tableRef;
-  }
-
-  /**
-   * Options supported by {@link WindowedWordCount}.
-   *
-   * <p>Inherits standard example configuration options, which allow specification of the BigQuery
-   * table and the PubSub topic, as well as the {@link WordCount.WordCountOptions} support for
-   * specification of the input file.
-   */
-  public static interface Options
-        extends WordCount.WordCountOptions, DataflowExampleUtils.DataflowExampleUtilsOptions {
-    @Description("Fixed window duration, in minutes")
-    @Default.Integer(WINDOW_SIZE)
-    Integer getWindowSize();
-    void setWindowSize(Integer value);
-
-    @Description("Whether to run the pipeline with unbounded input")
-    boolean isUnbounded();
-    void setUnbounded(boolean value);
-  }
-
-  public static void main(String[] args) throws IOException {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    options.setBigQuerySchema(getSchema());
-    // DataflowExampleUtils creates the necessary input sources to simplify execution of this
-    // Pipeline.
-    DataflowExampleUtils exampleDataflowUtils = new DataflowExampleUtils(options,
-      options.isUnbounded());
-
-    Pipeline pipeline = Pipeline.create(options);
-
-    /**
-     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
-     * unbounded input source.
-     */
-    PCollection<String> input;
-    if (options.isUnbounded()) {
-      LOG.info("Reading from PubSub.");
-      /**
-       * Concept #3: Read from the PubSub topic. A topic will be created if it wasn't
-       * specified as an argument. The data elements' timestamps will come from the pubsub
-       * injection.
-       */
-      input = pipeline
-          .apply(PubsubIO.Read.topic(options.getPubsubTopic()));
-    } else {
-      /** Else, this is a bounded pipeline. Read from the GCS file. */
-      input = pipeline
-          .apply(TextIO.Read.from(options.getInputFile()))
-          // Concept #2: Add an element timestamp, using an artificial time just to show windowing.
-          // See AddTimestampFn for more detail on this.
-          .apply(ParDo.of(new AddTimestampFn()));
-    }
-
-    /**
-     * Concept #4: Window into fixed windows. The fixed window size for this example defaults to 1
-     * minute (you can change this with a command-line option). See the documentation for more
-     * information on how fixed windows work, and for information on the other types of windowing
-     * available (e.g., sliding windows).
-     */
-    PCollection<String> windowedWords = input
-      .apply(Window.<String>into(
-        FixedWindows.of(Duration.standardMinutes(options.getWindowSize()))));
-
-    /**
-     * Concept #5: Re-use our existing CountWords transform that does not have knowledge of
-     * windows over a PCollection containing windowed values.
-     */
-    PCollection<KV<String, Long>> wordCounts = windowedWords.apply(new WordCount.CountWords());
-
-    /**
-     * Concept #6: Format the results for a BigQuery table, then write to BigQuery.
-     * The BigQuery output source supports both bounded and unbounded data.
-     */
-    wordCounts.apply(ParDo.of(new FormatAsTableRowFn()))
-        .apply(BigQueryIO.Write.to(getTableReference(options)).withSchema(getSchema()));
-
-    PipelineResult result = pipeline.run();
-
-    /**
-     * To mock unbounded input from PubSub, we'll now start an auxiliary 'injector' pipeline that
-     * runs for a limited time, and publishes to the input PubSub topic.
-     *
-     * With an unbounded input source, you will need to explicitly shut down this pipeline when you
-     * are done with it, so that you do not continue to be charged for the instances. You can do
-     * this via a ctrl-C from the command line, or from the developer's console UI for Dataflow
-     * pipelines. The PubSub topic will also be deleted at this time.
-     */
-    exampleDataflowUtils.mockUnboundedSource(options.getInputFile(), result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
deleted file mode 100644
index 150b60d..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package};
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-
-/**
- * An example that counts words in Shakespeare and includes Dataflow best practices.
- *
- * <p>This class, {@link WordCount}, is the second in a series of four successively more detailed
- * 'word count' examples. You may first want to take a look at {@link MinimalWordCount}.
- * After you've looked at this example, then see the {@link DebuggingWordCount}
- * pipeline, for introduction of additional concepts.
- *
- * <p>For a detailed walkthrough of this example, see
- *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
- *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
- *   </a>
- *
- * <p>Basic concepts, also in the MinimalWordCount example:
- * Reading text files; counting a PCollection; writing to GCS.
- *
- * <p>New Concepts:
- * <pre>
- *   1. Executing a Pipeline both locally and using the Dataflow service
- *   2. Using ParDo with static DoFns defined out-of-line
- *   3. Building a composite transform
- *   4. Defining your own pipeline options
- * </pre>
- *
- * <p>Concept #1: you can execute this pipeline either locally or using the Dataflow service.
- * These are now command-line options and not hard-coded as they were in the MinimalWordCount
- * example.
- * To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and a local output file or output prefix on GCS:
- * <pre>{@code
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- * }
- * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
- *
- * <p>The input file defaults to {@code gs://dataflow-samples/shakespeare/kinglear.txt} and can be
- * overridden with {@code --inputFile}.
- */
-public class WordCount {
-
-  /**
-   * Concept #2: You can make your pipeline code less verbose by defining your DoFns statically out-
-   * of-line. This DoFn tokenizes lines of text into individual words; we pass it to a ParDo in the
-   * pipeline.
-   */
-  static class ExtractWordsFn extends DoFn<String, String> {
-    private final Aggregator<Long, Long> emptyLines =
-        createAggregator("emptyLines", new Sum.SumLongFn());
-
-    @Override
-    public void processElement(ProcessContext c) {
-      if (c.element().trim().isEmpty()) {
-        emptyLines.addValue(1L);
-      }
-
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  /** A DoFn that converts a Word and Count into a printable string. */
-  public static class FormatAsTextFn extends DoFn<KV<String, Long>, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(c.element().getKey() + ": " + c.element().getValue());
-    }
-  }
-
-  /**
-   * A PTransform that converts a PCollection containing lines of text into a PCollection of
-   * formatted word counts.
-   *
-   * <p>Concept #3: This is a custom composite transform that bundles two transforms (ParDo and
-   * Count) as a reusable PTransform subclass. Using composite transforms allows for easy reuse,
-   * modular testing, and an improved monitoring experience.
-   */
-  public static class CountWords extends PTransform<PCollection<String>,
-      PCollection<KV<String, Long>>> {
-    @Override
-    public PCollection<KV<String, Long>> apply(PCollection<String> lines) {
-
-      // Convert lines of text into individual words.
-      PCollection<String> words = lines.apply(
-          ParDo.of(new ExtractWordsFn()));
-
-      // Count the number of times each word occurs.
-      PCollection<KV<String, Long>> wordCounts =
-          words.apply(Count.<String>perElement());
-
-      return wordCounts;
-    }
-  }
-
-  /**
-   * Options supported by {@link WordCount}.
-   *
-   * <p>Concept #4: Defining your own configuration options. Here, you can add your own arguments
-   * to be processed by the command-line parser, and specify default values for them. You can then
-   * access the options values in your pipeline code.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  public static interface WordCountOptions extends PipelineOptions {
-    @Description("Path of the file to read from")
-    @Default.String("gs://dataflow-samples/shakespeare/kinglear.txt")
-    String getInputFile();
-    void setInputFile(String value);
-
-    @Description("Path of the file to write to")
-    @Default.InstanceFactory(OutputFactory.class)
-    String getOutput();
-    void setOutput(String value);
-
-    /**
-     * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.
-     */
-    public static class OutputFactory implements DefaultValueFactory<String> {
-      @Override
-      public String create(PipelineOptions options) {
-        DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-        if (dataflowOptions.getStagingLocation() != null) {
-          return GcsPath.fromUri(dataflowOptions.getStagingLocation())
-              .resolve("counts.txt").toString();
-        } else {
-          throw new IllegalArgumentException("Must specify --output or --stagingLocation");
-        }
-      }
-    }
-
-  }
-
-  public static void main(String[] args) {
-    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
-      .as(WordCountOptions.class);
-    Pipeline p = Pipeline.create(options);
-
-    // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the
-    // static FormatAsTextFn() to the ParDo transform.
-    p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
-     .apply(new CountWords())
-     .apply(ParDo.of(new FormatAsTextFn()))
-     .apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
deleted file mode 100644
index e182f4c..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleOptions.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package}.common;
-
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-
-/**
- * Options that can be used to configure the Dataflow examples.
- */
-public interface DataflowExampleOptions extends DataflowPipelineOptions {
-  @Description("Whether to keep jobs running on the Dataflow service after local process exit")
-  @Default.Boolean(false)
-  boolean getKeepJobsRunning();
-  void setKeepJobsRunning(boolean keepJobsRunning);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
deleted file mode 100644
index 9861769..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/DataflowExampleUtils.java
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package}.common;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.Bigquery.Datasets;
-import com.google.api.services.bigquery.Bigquery.Tables;
-import com.google.api.services.bigquery.model.Dataset;
-import com.google.api.services.bigquery.model.DatasetReference;
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.Topic;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineJob;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import javax.servlet.http.HttpServletResponse;
-
-/**
- * The utility class that sets up and tears down external resources, starts the Google Cloud Pub/Sub
- * injector, and cancels the streaming and the injector pipelines once the program terminates.
- *
- * <p>It is used to run Dataflow examples, such as TrafficMaxLaneFlow and TrafficRoutes.
- */
-public class DataflowExampleUtils {
-
-  private final DataflowPipelineOptions options;
-  private Bigquery bigQueryClient = null;
-  private Pubsub pubsubClient = null;
-  private Dataflow dataflowClient = null;
-  private Set<DataflowPipelineJob> jobsToCancel = Sets.newHashSet();
-  private List<String> pendingMessages = Lists.newArrayList();
-
-  /**
-   * Define an interface that supports the PubSub and BigQuery example options.
-   */
-  public static interface DataflowExampleUtilsOptions
-        extends DataflowExampleOptions, ExamplePubsubTopicOptions, ExampleBigQueryTableOptions {
-  }
-
-  public DataflowExampleUtils(DataflowPipelineOptions options) {
-    this.options = options;
-  }
-
-  /**
-   * Do resources and runner options setup.
-   */
-  public DataflowExampleUtils(DataflowPipelineOptions options, boolean isUnbounded)
-      throws IOException {
-    this.options = options;
-    setupResourcesAndRunner(isUnbounded);
-  }
-
-  /**
-   * Sets up external resources that are required by the example,
-   * such as Pub/Sub topics and BigQuery tables.
-   *
-   * @throws IOException if there is a problem setting up the resources
-   */
-  public void setup() throws IOException {
-    setupPubsubTopic();
-    setupBigQueryTable();
-  }
-
-  /**
-   * Set up external resources, and configure the runner appropriately.
-   */
-  public void setupResourcesAndRunner(boolean isUnbounded) throws IOException {
-    if (isUnbounded) {
-      options.setStreaming(true);
-    }
-    setup();
-    setupRunner();
-  }
-
-  /**
-   * Sets up the Google Cloud Pub/Sub topic.
-   *
-   * <p>If the topic doesn't exist, a new topic with the given name will be created.
-   *
-   * @throws IOException if there is a problem setting up the Pub/Sub topic
-   */
-  public void setupPubsubTopic() throws IOException {
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
-      pendingMessages.add("*******************Set Up Pubsub Topic*********************");
-      setupPubsubTopic(pubsubTopicOptions.getPubsubTopic());
-      pendingMessages.add("The Pub/Sub topic has been set up for this example: "
-          + pubsubTopicOptions.getPubsubTopic());
-    }
-  }
-
-  /**
-   * Sets up the BigQuery table with the given schema.
-   *
-   * <p>If the table already exists, the schema has to match the given one. Otherwise, the example
-   * will throw a RuntimeException. If the table doesn't exist, a new table with the given schema
-   * will be created.
-   *
-   * @throws IOException if there is a problem setting up the BigQuery table
-   */
-  public void setupBigQueryTable() throws IOException {
-    ExampleBigQueryTableOptions bigQueryTableOptions =
-        options.as(ExampleBigQueryTableOptions.class);
-    if (bigQueryTableOptions.getBigQueryDataset() != null
-        && bigQueryTableOptions.getBigQueryTable() != null
-        && bigQueryTableOptions.getBigQuerySchema() != null) {
-      pendingMessages.add("******************Set Up Big Query Table*******************");
-      setupBigQueryTable(bigQueryTableOptions.getProject(),
-                         bigQueryTableOptions.getBigQueryDataset(),
-                         bigQueryTableOptions.getBigQueryTable(),
-                         bigQueryTableOptions.getBigQuerySchema());
-      pendingMessages.add("The BigQuery table has been set up for this example: "
-          + bigQueryTableOptions.getProject()
-          + ":" + bigQueryTableOptions.getBigQueryDataset()
-          + "." + bigQueryTableOptions.getBigQueryTable());
-    }
-  }
-
-  /**
-   * Tears down external resources that can be deleted upon the example's completion.
-   */
-  private void tearDown() {
-    pendingMessages.add("*************************Tear Down*************************");
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (!pubsubTopicOptions.getPubsubTopic().isEmpty()) {
-      try {
-        deletePubsubTopic(pubsubTopicOptions.getPubsubTopic());
-        pendingMessages.add("The Pub/Sub topic has been deleted: "
-            + pubsubTopicOptions.getPubsubTopic());
-      } catch (IOException e) {
-        pendingMessages.add("Failed to delete the Pub/Sub topic : "
-            + pubsubTopicOptions.getPubsubTopic());
-      }
-    }
-
-    ExampleBigQueryTableOptions bigQueryTableOptions =
-        options.as(ExampleBigQueryTableOptions.class);
-    if (bigQueryTableOptions.getBigQueryDataset() != null
-        && bigQueryTableOptions.getBigQueryTable() != null
-        && bigQueryTableOptions.getBigQuerySchema() != null) {
-      pendingMessages.add("The BigQuery table might contain the example's output, "
-          + "and it is not deleted automatically: "
-          + bigQueryTableOptions.getProject()
-          + ":" + bigQueryTableOptions.getBigQueryDataset()
-          + "." + bigQueryTableOptions.getBigQueryTable());
-      pendingMessages.add("Please go to the Developers Console to delete it manually."
-          + " Otherwise, you may be charged for its usage.");
-    }
-  }
-
-  private void setupBigQueryTable(String projectId, String datasetId, String tableId,
-      TableSchema schema) throws IOException {
-    if (bigQueryClient == null) {
-      bigQueryClient = Transport.newBigQueryClient(options.as(BigQueryOptions.class)).build();
-    }
-
-    Datasets datasetService = bigQueryClient.datasets();
-    if (executeNullIfNotFound(datasetService.get(projectId, datasetId)) == null) {
-      Dataset newDataset = new Dataset().setDatasetReference(
-          new DatasetReference().setProjectId(projectId).setDatasetId(datasetId));
-      datasetService.insert(projectId, newDataset).execute();
-    }
-
-    Tables tableService = bigQueryClient.tables();
-    Table table = executeNullIfNotFound(tableService.get(projectId, datasetId, tableId));
-    if (table == null) {
-      Table newTable = new Table().setSchema(schema).setTableReference(
-          new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId));
-      tableService.insert(projectId, datasetId, newTable).execute();
-    } else if (!table.getSchema().equals(schema)) {
-      throw new RuntimeException(
-          "Table exists and schemas do not match, expecting: " + schema.toPrettyString()
-          + ", actual: " + table.getSchema().toPrettyString());
-    }
-  }
-
-  private void setupPubsubTopic(String topic) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) == null) {
-      pubsubClient.projects().topics().create(topic, new Topic().setName(topic)).execute();
-    }
-  }
-
-  /**
-   * Deletes the Google Cloud Pub/Sub topic.
-   *
-   * @throws IOException if there is a problem deleting the Pub/Sub topic
-   */
-  private void deletePubsubTopic(String topic) throws IOException {
-    if (pubsubClient == null) {
-      pubsubClient = Transport.newPubsubClient(options).build();
-    }
-    if (executeNullIfNotFound(pubsubClient.projects().topics().get(topic)) != null) {
-      pubsubClient.projects().topics().delete(topic).execute();
-    }
-  }
-
-  /**
-   * If this is an unbounded (streaming) pipeline, and both inputFile and pubsub topic are defined,
-   * start an 'injector' pipeline that publishes the contents of the file to the given topic, first
-   * creating the topic if necessary.
-   */
-  public void startInjectorIfNeeded(String inputFile) {
-    ExamplePubsubTopicOptions pubsubTopicOptions = options.as(ExamplePubsubTopicOptions.class);
-    if (pubsubTopicOptions.isStreaming()
-        && inputFile != null && !inputFile.isEmpty()
-        && pubsubTopicOptions.getPubsubTopic() != null
-        && !pubsubTopicOptions.getPubsubTopic().isEmpty()) {
-      runInjectorPipeline(inputFile, pubsubTopicOptions.getPubsubTopic());
-    }
-  }
-
-  /**
-   * Do some runner setup: check that the DirectPipelineRunner is not used in conjunction with
-   * streaming, and if streaming is specified, use the DataflowPipelineRunner. Return the streaming
-   * flag value.
-   */
-  public void setupRunner() {
-    if (options.isStreaming()) {
-      if (options.getRunner() == DirectPipelineRunner.class) {
-        throw new IllegalArgumentException(
-          "Processing of unbounded input sources is not supported with the DirectPipelineRunner.");
-      }
-      // In order to cancel the pipelines automatically,
-      // {@literal DataflowPipelineRunner} is forced to be used.
-      options.setRunner(DataflowPipelineRunner.class);
-    }
-  }
-
-  /**
-   * Runs the batch injector for the streaming pipeline.
-   *
-   * <p>The injector pipeline will read from the given text file, and inject data
-   * into the Google Cloud Pub/Sub topic.
-   */
-  public void runInjectorPipeline(String inputFile, String topic) {
-    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
-    copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(
-        options.as(ExamplePubsubTopicOptions.class).getInjectorNumWorkers());
-    copiedOptions.setJobName(options.getJobName() + "-injector");
-    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
-    injectorPipeline.apply(TextIO.Read.from(inputFile))
-                    .apply(IntraBundleParallelization
-                        .of(PubsubFileInjector.publish(topic))
-                        .withMaxParallelism(20));
-    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
-    jobsToCancel.add(injectorJob);
-  }
-
-  /**
-   * Runs the provided injector pipeline for the streaming pipeline.
-   */
-  public void runInjectorPipeline(Pipeline injectorPipeline) {
-    DataflowPipelineJob injectorJob = (DataflowPipelineJob) injectorPipeline.run();
-    jobsToCancel.add(injectorJob);
-  }
-
-  /**
-   * Start the auxiliary injector pipeline, then wait for this pipeline to finish.
-   */
-  public void mockUnboundedSource(String inputFile, PipelineResult result) {
-    startInjectorIfNeeded(inputFile);
-    waitToFinish(result);
-  }
-
-  /**
-   * If {@literal DataflowPipelineRunner} or {@literal BlockingDataflowPipelineRunner} is used,
-   * waits for the pipeline to finish and cancels it (and the injector) before the program exists.
-   */
-  public void waitToFinish(PipelineResult result) {
-    if (result instanceof DataflowPipelineJob) {
-      final DataflowPipelineJob job = (DataflowPipelineJob) result;
-      jobsToCancel.add(job);
-      if (!options.as(DataflowExampleOptions.class).getKeepJobsRunning()) {
-        addShutdownHook(jobsToCancel);
-      }
-      try {
-        job.waitToFinish(-1, TimeUnit.SECONDS, new MonitoringUtil.PrintHandler(System.out));
-      } catch (Exception e) {
-        throw new RuntimeException("Failed to wait for job to finish: " + job.getJobId());
-      }
-    } else {
-      // Do nothing if the given PipelineResult doesn't support waitToFinish(),
-      // such as EvaluationResults returned by DirectPipelineRunner.
-    }
-  }
-
-  private void addShutdownHook(final Collection<DataflowPipelineJob> jobs) {
-    if (dataflowClient == null) {
-      dataflowClient = options.getDataflowClient();
-    }
-
-    Runtime.getRuntime().addShutdownHook(new Thread() {
-      @Override
-      public void run() {
-        tearDown();
-        printPendingMessages();
-        for (DataflowPipelineJob job : jobs) {
-          System.out.println("Canceling example pipeline: " + job.getJobId());
-          try {
-            job.cancel();
-          } catch (IOException e) {
-            System.out.println("Failed to cancel the job,"
-                + " please go to the Developers Console to cancel it manually");
-            System.out.println(
-                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
-          }
-        }
-
-        for (DataflowPipelineJob job : jobs) {
-          boolean cancellationVerified = false;
-          for (int retryAttempts = 6; retryAttempts > 0; retryAttempts--) {
-            if (job.getState().isTerminal()) {
-              cancellationVerified = true;
-              System.out.println("Canceled example pipeline: " + job.getJobId());
-              break;
-            } else {
-              System.out.println(
-                  "The example pipeline is still running. Verifying the cancellation.");
-            }
-            try {
-              Thread.sleep(10000);
-            } catch (InterruptedException e) {
-              // Ignore
-            }
-          }
-          if (!cancellationVerified) {
-            System.out.println("Failed to verify the cancellation for job: " + job.getJobId());
-            System.out.println("Please go to the Developers Console to verify manually:");
-            System.out.println(
-                MonitoringUtil.getJobMonitoringPageURL(job.getProjectId(), job.getJobId()));
-          }
-        }
-      }
-    });
-  }
-
-  private void printPendingMessages() {
-    System.out.println();
-    System.out.println("***********************************************************");
-    System.out.println("***********************************************************");
-    for (String message : pendingMessages) {
-      System.out.println(message);
-    }
-    System.out.println("***********************************************************");
-    System.out.println("***********************************************************");
-  }
-
-  private static <T> T executeNullIfNotFound(
-      AbstractGoogleClientRequest<T> request) throws IOException {
-    try {
-      return request.execute();
-    } catch (GoogleJsonResponseException e) {
-      if (e.getStatusCode() == HttpServletResponse.SC_NOT_FOUND) {
-        return null;
-      } else {
-        throw e;
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
deleted file mode 100644
index bef5bfd..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExampleBigQueryTableOptions.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package}.common;
-
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure BigQuery tables in Dataflow examples.
- * The project defaults to the project being used to run the example.
- */
-public interface ExampleBigQueryTableOptions extends DataflowPipelineOptions {
-  @Description("BigQuery dataset name")
-  @Default.String("dataflow_examples")
-  String getBigQueryDataset();
-  void setBigQueryDataset(String dataset);
-
-  @Description("BigQuery table name")
-  @Default.InstanceFactory(BigQueryTableFactory.class)
-  String getBigQueryTable();
-  void setBigQueryTable(String table);
-
-  @Description("BigQuery table schema")
-  TableSchema getBigQuerySchema();
-  void setBigQuerySchema(TableSchema schema);
-
-  /**
-   * Returns the job name as the default BigQuery table name.
-   */
-  static class BigQueryTableFactory implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      return options.as(DataflowPipelineOptions.class).getJobName()
-          .replace('-', '_');
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
deleted file mode 100644
index 525de69..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package}.common;
-
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * Options that can be used to configure Pub/Sub topic in Dataflow examples.
- */
-public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
-  @Description("Pub/Sub topic")
-  @Default.InstanceFactory(PubsubTopicFactory.class)
-  String getPubsubTopic();
-  void setPubsubTopic(String topic);
-
-  @Description("Number of workers to use when executing the injector pipeline")
-  @Default.Integer(1)
-  int getInjectorNumWorkers();
-  void setInjectorNumWorkers(int numWorkers);
-
-  /**
-   * Returns a default Pub/Sub topic based on the project and the job names.
-   */
-  static class PubsubTopicFactory implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      DataflowPipelineOptions dataflowPipelineOptions =
-          options.as(DataflowPipelineOptions.class);
-      return "projects/" + dataflowPipelineOptions.getProject()
-          + "/topics/" + dataflowPipelineOptions.getJobName();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
deleted file mode 100644
index f6f80ae..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package}.common;
-
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.PublishRequest;
-import com.google.api.services.pubsub.model.PubsubMessage;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.common.collect.ImmutableMap;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-/**
- * A batch Dataflow pipeline for injecting a set of GCS files into
- * a PubSub topic line by line. Empty lines are skipped.
- *
- * <p>This is useful for testing streaming
- * pipelines. Note that since batch pipelines might retry chunks, this
- * does _not_ guarantee exactly-once injection of file data. Some lines may
- * be published multiple times.
- * </p>
- */
-public class PubsubFileInjector {
-
-  /**
-   * An incomplete {@code PubsubFileInjector} transform with unbound output topic.
-   */
-  public static class Unbound {
-    private final String timestampLabelKey;
-
-    Unbound() {
-      this.timestampLabelKey = null;
-    }
-
-    Unbound(String timestampLabelKey) {
-      this.timestampLabelKey = timestampLabelKey;
-    }
-
-    Unbound withTimestampLabelKey(String timestampLabelKey) {
-      return new Unbound(timestampLabelKey);
-    }
-
-    public Bound publish(String outputTopic) {
-      return new Bound(outputTopic, timestampLabelKey);
-    }
-  }
-
-  /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
-  public static class Bound extends DoFn<String, Void> {
-    private final String outputTopic;
-    private final String timestampLabelKey;
-    public transient Pubsub pubsub;
-
-    public Bound(String outputTopic, String timestampLabelKey) {
-      this.outputTopic = outputTopic;
-      this.timestampLabelKey = timestampLabelKey;
-    }
-
-    @Override
-    public void startBundle(Context context) {
-      this.pubsub =
-          Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
-              .build();
-    }
-
-    @Override
-    public void processElement(ProcessContext c) throws IOException {
-      if (c.element().isEmpty()) {
-        return;
-      }
-      PubsubMessage pubsubMessage = new PubsubMessage();
-      pubsubMessage.encodeData(c.element().getBytes());
-      if (timestampLabelKey != null) {
-        pubsubMessage.setAttributes(
-            ImmutableMap.of(timestampLabelKey, Long.toString(c.timestamp().getMillis())));
-      }
-      PublishRequest publishRequest = new PublishRequest();
-      publishRequest.setMessages(Arrays.asList(pubsubMessage));
-      this.pubsub.projects().topics().publish(outputTopic, publishRequest).execute();
-    }
-  }
-
-  /**
-   * Creates a {@code PubsubFileInjector} transform with the given timestamp label key.
-   */
-  public static Unbound withTimestampLabelKey(String timestampLabelKey) {
-    return new Unbound(timestampLabelKey);
-  }
-
-  /**
-   * Creates a {@code PubsubFileInjector} transform that publishes to the given output topic.
-   */
-  public static Bound publish(String outputTopic) {
-    return new Unbound().publish(outputTopic);
-  }
-
-  /**
-   * Command line parameter options.
-   */
-  private interface PubsubFileInjectorOptions extends PipelineOptions {
-    @Description("GCS location of files.")
-    @Validation.Required
-    String getInput();
-    void setInput(String value);
-
-    @Description("Topic to publish on.")
-    @Validation.Required
-    String getOutputTopic();
-    void setOutputTopic(String value);
-  }
-
-  /**
-   * Sets up and starts streaming pipeline.
-   */
-  public static void main(String[] args) {
-    PubsubFileInjectorOptions options = PipelineOptionsFactory.fromArgs(args)
-        .withValidation()
-        .as(PubsubFileInjectorOptions.class);
-
-    Pipeline pipeline = Pipeline.create(options);
-
-    pipeline
-        .apply(TextIO.Read.from(options.getInput()))
-        .apply(IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
-            .withMaxParallelism(20));
-
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
deleted file mode 100644
index 7a9aa4c..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package};
-
-import com.google.common.io.Files;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.nio.charset.StandardCharsets;
-
-/**
- * Tests for {@link DebuggingWordCount}.
- */
-@RunWith(JUnit4.class)
-public class DebuggingWordCountTest {
-  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  @Test
-  public void testDebuggingWordCount() throws Exception {
-    File file = tmpFolder.newFile();
-    Files.write("stomach secret Flourish message Flourish here Flourish", file,
-        StandardCharsets.UTF_8);
-    DebuggingWordCount.main(new String[]{"--inputFile=" + file.getAbsolutePath()});
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
deleted file mode 100644
index 45555ce..0000000
--- a/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package ${package};
-
-import ${package}.WordCount.CountWords;
-import ${package}.WordCount.ExtractWordsFn;
-import ${package}.WordCount.FormatAsTextFn;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests of WordCount.
- */
-@RunWith(JUnit4.class)
-public class WordCountTest {
-
-  /** Example test that tests a specific DoFn. */
-  @Test
-  public void testExtractWordsFn() {
-    DoFnTester<String, String> extractWordsFn =
-        DoFnTester.of(new ExtractWordsFn());
-
-    Assert.assertThat(extractWordsFn.processBatch(" some  input  words "),
-                      CoreMatchers.hasItems("some", "input", "words"));
-    Assert.assertThat(extractWordsFn.processBatch(" "),
-                      CoreMatchers.<String>hasItems());
-    Assert.assertThat(extractWordsFn.processBatch(" some ", " input", " words"),
-                      CoreMatchers.hasItems("some", "input", "words"));
-  }
-
-  static final String[] WORDS_ARRAY = new String[] {
-    "hi there", "hi", "hi sue bob",
-    "hi sue", "", "bob hi"};
-
-  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
-
-  static final String[] COUNTS_ARRAY = new String[] {
-      "hi: 5", "there: 1", "sue: 2", "bob: 2"};
-
-  /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testCountWords() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
-
-    PCollection<String> output = input.apply(new CountWords())
-      .apply(ParDo.of(new FormatAsTextFn()));
-
-    DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
deleted file mode 100644
index c59e77a..0000000
--- a/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
+++ /dev/null
@@ -1,5 +0,0 @@
-package=it.pkg
-version=0.1-SNAPSHOT
-groupId=archetype.it
-artifactId=basic
-targetPlatform=1.7

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
----------------------------------------------------------------------
diff --git a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt b/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
deleted file mode 100644
index 0b59873..0000000
--- a/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
+++ /dev/null
@@ -1 +0,0 @@
-verify

[45/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
deleted file mode 100644
index ab7df6f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BigQueryIO.java
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.api.client.json.JsonFactory;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.QueryRequest;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.TableRowJsonCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
-import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.BigQueryTableInserter;
-import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Reshuffle;
-import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.UUID;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link PTransform}s for reading and writing
- * <a href="https://developers.google.com/bigquery/">BigQuery</a> tables.
- *
- * <h3>Table References</h3>
- * <p>A fully-qualified BigQuery table name consists of three components:
- * <ul>
- *   <li>{@code projectId}: the Cloud project id (defaults to
- *       {@link GcpOptions#getProject()}).
- *   <li>{@code datasetId}: the BigQuery dataset id, unique within a project.
- *   <li>{@code tableId}: a table id, unique within a dataset.
- * </ul>
- *
- * <p>BigQuery table references are stored as a {@link TableReference}, which comes
- * from the <a href="https://cloud.google.com/bigquery/client-libraries">
- * BigQuery Java Client API</a>.
- * Tables can be referred to as Strings, with or without the {@code projectId}.
- * A helper function is provided ({@link BigQueryIO#parseTableSpec(String)})
- * that parses the following string forms into a {@link TableReference}:
- *
- * <ul>
- *   <li>[{@code project_id}]:[{@code dataset_id}].[{@code table_id}]
- *   <li>[{@code dataset_id}].[{@code table_id}]
- * </ul>
- *
- * <h3>Reading</h3>
- * <p>To read from a BigQuery table, apply a {@link BigQueryIO.Read} transformation.
- * This produces a {@link PCollection} of {@link TableRow TableRows} as output:
- * <pre>{@code
- * PCollection<TableRow> shakespeare = pipeline.apply(
- *     BigQueryIO.Read.named("Read")
- *                    .from("clouddataflow-readonly:samples.weather_stations"));
- * }</pre>
- *
- * <p>See {@link TableRow} for more information on the {@link TableRow} object.
- *
- * <p>Users may provide a query to read from rather than reading all of a BigQuery table. If
- * specified, the result obtained by executing the specified query will be used as the data of the
- * input transform.
- *
- * <pre>{@code
- * PCollection<TableRow> shakespeare = pipeline.apply(
- *     BigQueryIO.Read.named("Read")
- *                    .fromQuery("SELECT year, mean_temp FROM samples.weather_stations"));
- * }</pre>
- *
- * <p>When creating a BigQuery input transform, users should provide either a query or a table.
- * Pipeline construction will fail with a validation error if neither or both are specified.
- *
- * <h3>Writing</h3>
- * <p>To write to a BigQuery table, apply a {@link BigQueryIO.Write} transformation.
- * This consumes a {@link PCollection} of {@link TableRow TableRows} as input.
- * <pre>{@code
- * PCollection<TableRow> quotes = ...
- *
- * List<TableFieldSchema> fields = new ArrayList<>();
- * fields.add(new TableFieldSchema().setName("source").setType("STRING"));
- * fields.add(new TableFieldSchema().setName("quote").setType("STRING"));
- * TableSchema schema = new TableSchema().setFields(fields);
- *
- * quotes.apply(BigQueryIO.Write
- *     .named("Write")
- *     .to("my-project:output.output_table")
- *     .withSchema(schema)
- *     .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
- * }</pre>
- *
- * <p>See {@link BigQueryIO.Write} for details on how to specify if a write should
- * append to an existing table, replace the table, or verify that the table is
- * empty. Note that the dataset being written to must already exist. Write
- * dispositions are not supported in streaming mode.
- *
- * <h3>Sharding BigQuery output tables</h3>
- * <p>A common use case is to dynamically generate BigQuery table names based on
- * the current window. To support this,
- * {@link BigQueryIO.Write#to(SerializableFunction)}
- * accepts a function mapping the current window to a tablespec. For example,
- * here's code that outputs daily tables to BigQuery:
- * <pre>{@code
- * PCollection<TableRow> quotes = ...
- * quotes.apply(Window.<TableRow>into(CalendarWindows.days(1)))
- *       .apply(BigQueryIO.Write
- *         .named("Write")
- *         .withSchema(schema)
- *         .to(new SerializableFunction<BoundedWindow, String>() {
- *           public String apply(BoundedWindow window) {
- *             // The cast below is safe because CalendarWindows.days(1) produces IntervalWindows.
- *             String dayString = DateTimeFormat.forPattern("yyyy_MM_dd")
- *                  .withZone(DateTimeZone.UTC)
- *                  .print(((IntervalWindow) window).start());
- *             return "my-project:output.output_table_" + dayString;
- *           }
- *         }));
- * }</pre>
- *
- * <p>Per-window tables are not yet supported in batch mode.
- *
- * <h3>Permissions</h3>
- * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
- * Dataflow job. Please refer to the documentation of corresponding {@link PipelineRunner}s for
- * more details.
- *
- * <p>Please see <a href="https://cloud.google.com/bigquery/access-control">BigQuery Access Control
- * </a> for security and permission related information specific to BigQuery.
- */
-public class BigQueryIO {
-  private static final Logger LOG = LoggerFactory.getLogger(BigQueryIO.class);
-
-  /**
-   * Singleton instance of the JSON factory used to read and write JSON
-   * formatted rows.
-   */
-  private static final JsonFactory JSON_FACTORY = Transport.getJsonFactory();
-
-  /**
-   * Project IDs must contain 6-63 lowercase letters, digits, or dashes.
-   * IDs must start with a letter and may not end with a dash.
-   * This regex isn't exact - this allows for patterns that would be rejected by
-   * the service, but this is sufficient for basic parsing of table references.
-   */
-  private static final String PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]{4,61}[a-z0-9]";
-
-  /**
-   * Regular expression that matches Dataset IDs.
-   */
-  private static final String DATASET_REGEXP = "[-\\w.]{1,1024}";
-
-  /**
-   * Regular expression that matches Table IDs.
-   */
-  private static final String TABLE_REGEXP = "[-\\w$@]{1,1024}";
-
-  /**
-   * Matches table specifications in the form {@code "[project_id]:[dataset_id].[table_id]"} or
-   * {@code "[dataset_id].[table_id]"}.
-   */
-  private static final String DATASET_TABLE_REGEXP =
-      String.format("((?<PROJECT>%s):)?(?<DATASET>%s)\\.(?<TABLE>%s)", PROJECT_ID_REGEXP,
-          DATASET_REGEXP, TABLE_REGEXP);
-
-  private static final Pattern TABLE_SPEC = Pattern.compile(DATASET_TABLE_REGEXP);
-
-  // TODO: make this private and remove improper access from BigQueryIOTranslator.
-  public static final String SET_PROJECT_FROM_OPTIONS_WARNING =
-      "No project specified for BigQuery table \"%1$s.%2$s\". Assuming it is in \"%3$s\". If the"
-      + " table is in a different project please specify it as a part of the BigQuery table"
-      + " definition.";
-
-  private static final String RESOURCE_NOT_FOUND_ERROR =
-      "BigQuery %1$s not found for table \"%2$s\" . Please create the %1$s before pipeline"
-          + " execution. If the %1$s is created by an earlier stage of the pipeline, this"
-          + " validation can be disabled using #withoutValidation.";
-
-  private static final String UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR =
-      "Unable to confirm BigQuery %1$s presence for table \"%2$s\". If the %1$s is created by"
-          + " an earlier stage of the pipeline, this validation can be disabled using"
-          + " #withoutValidation.";
-
-  /**
-   * Parse a table specification in the form
-   * {@code "[project_id]:[dataset_id].[table_id]"} or {@code "[dataset_id].[table_id]"}.
-   *
-   * <p>If the project id is omitted, the default project id is used.
-   */
-  public static TableReference parseTableSpec(String tableSpec) {
-    Matcher match = TABLE_SPEC.matcher(tableSpec);
-    if (!match.matches()) {
-      throw new IllegalArgumentException(
-          "Table reference is not in [project_id]:[dataset_id].[table_id] "
-          + "format: " + tableSpec);
-    }
-
-    TableReference ref = new TableReference();
-    ref.setProjectId(match.group("PROJECT"));
-
-    return ref.setDatasetId(match.group("DATASET")).setTableId(match.group("TABLE"));
-  }
-
-  /**
-   * Returns a canonical string representation of the {@link TableReference}.
-   */
-  public static String toTableSpec(TableReference ref) {
-    StringBuilder sb = new StringBuilder();
-    if (ref.getProjectId() != null) {
-      sb.append(ref.getProjectId());
-      sb.append(":");
-    }
-
-    sb.append(ref.getDatasetId()).append('.').append(ref.getTableId());
-    return sb.toString();
-  }
-
-  /**
-   * A {@link PTransform} that reads from a BigQuery table and returns a
-   * {@link PCollection} of {@link TableRow TableRows} containing each of the rows of the table.
-   *
-   * <p>Each {@link TableRow} contains values indexed by column name. Here is a
-   * sample processing function that processes a "line" column from rows:
-   * <pre>{@code
-   * static class ExtractWordsFn extends DoFn<TableRow, String> {
-   *   public void processElement(ProcessContext c) {
-   *     // Get the "line" field of the TableRow object, split it into words, and emit them.
-   *     TableRow row = c.element();
-   *     String[] words = row.get("line").toString().split("[^a-zA-Z']+");
-   *     for (String word : words) {
-   *       if (!word.isEmpty()) {
-   *         c.output(word);
-   *       }
-   *     }
-   *   }
-   * }}</pre>
-   */
-  public static class Read {
-    /**
-     * Returns a {@link Read.Bound} with the given name. The BigQuery table or query to be read
-     * from has not yet been configured.
-     */
-    public static Bound named(String name) {
-      return new Bound().named(name);
-    }
-
-    /**
-     * Reads a BigQuery table specified as {@code "[project_id]:[dataset_id].[table_id]"} or
-     * {@code "[dataset_id].[table_id]"} for tables within the current project.
-     */
-    public static Bound from(String tableSpec) {
-      return new Bound().from(tableSpec);
-    }
-
-    /**
-     * Reads results received after executing the given query.
-     */
-    public static Bound fromQuery(String query) {
-      return new Bound().fromQuery(query);
-    }
-
-    /**
-     * Reads a BigQuery table specified as a {@link TableReference} object.
-     */
-    public static Bound from(TableReference table) {
-      return new Bound().from(table);
-    }
-
-    /**
-     * Disables BigQuery table validation, which is enabled by default.
-     */
-    public static Bound withoutValidation() {
-      return new Bound().withoutValidation();
-    }
-
-    /**
-     * A {@link PTransform} that reads from a BigQuery table and returns a bounded
-     * {@link PCollection} of {@link TableRow TableRows}.
-     */
-    public static class Bound extends PTransform<PInput, PCollection<TableRow>> {
-      TableReference table;
-      final String query;
-      final boolean validate;
-      @Nullable
-      Boolean flattenResults;
-
-      private static final String QUERY_VALIDATION_FAILURE_ERROR =
-          "Validation of query \"%1$s\" failed. If the query depends on an earlier stage of the"
-          + " pipeline, This validation can be disabled using #withoutValidation.";
-
-      private Bound() {
-        this(null, null, null, true, null);
-      }
-
-      private Bound(String name, String query, TableReference reference, boolean validate,
-          Boolean flattenResults) {
-        super(name);
-        this.table = reference;
-        this.query = query;
-        this.validate = validate;
-        this.flattenResults = flattenResults;
-      }
-
-      /**
-       * Returns a copy of this transform using the name associated with this transformation.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound named(String name) {
-        return new Bound(name, query, table, validate, flattenResults);
-      }
-
-      /**
-       * Returns a copy of this transform that reads from the specified table. Refer to
-       * {@link #parseTableSpec(String)} for the specification format.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound from(String tableSpec) {
-        return from(parseTableSpec(tableSpec));
-      }
-
-      /**
-       * Returns a copy of this transform that reads from the specified table.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound from(TableReference table) {
-        return new Bound(name, query, table, validate, flattenResults);
-      }
-
-      /**
-       * Returns a copy of this transform that reads the results of the specified query.
-       *
-       * <p>Does not modify this object.
-       *
-       * <p>By default, the query results will be flattened -- see
-       * "flattenResults" in the <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">
-       * Jobs documentation</a> for more information.  To disable flattening, use
-       * {@link BigQueryIO.Read.Bound#withoutResultFlattening}.
-       */
-      public Bound fromQuery(String query) {
-        return new Bound(name, query, table, validate,
-            MoreObjects.firstNonNull(flattenResults, Boolean.TRUE));
-      }
-
-      /**
-       * Disable table validation.
-       */
-      public Bound withoutValidation() {
-        return new Bound(name, query, table, false, flattenResults);
-      }
-
-      /**
-       * Disable <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs">
-       * flattening of query results</a>.
-       *
-       * <p>Only valid when a query is used ({@link #fromQuery}). Setting this option when reading
-       * from a table will cause an error during validation.
-       */
-      public Bound withoutResultFlattening() {
-        return new Bound(name, query, table, validate, false);
-      }
-
-      /**
-       * Validates the current {@link PTransform}.
-       */
-      @Override
-      public void validate(PInput input) {
-        if (table == null && query == null) {
-          throw new IllegalStateException(
-              "Invalid BigQuery read operation, either table reference or query has to be set");
-        } else if (table != null && query != null) {
-          throw new IllegalStateException("Invalid BigQuery read operation. Specifies both a"
-              + " query and a table, only one of these should be provided");
-        } else if (table != null && flattenResults != null) {
-          throw new IllegalStateException("Invalid BigQuery read operation. Specifies a"
-              + " table with a result flattening preference, which is not configurable");
-        } else if (query != null && flattenResults == null) {
-          throw new IllegalStateException("Invalid BigQuery read operation. Specifies a"
-              + " query without a result flattening preference");
-        }
-
-        BigQueryOptions bqOptions = input.getPipeline().getOptions().as(BigQueryOptions.class);
-        if (table != null && table.getProjectId() == null) {
-          // If user does not specify a project we assume the table to be located in the project
-          // that owns the Dataflow job.
-          LOG.warn(String.format(SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
-              table.getTableId(), bqOptions.getProject()));
-          table.setProjectId(bqOptions.getProject());
-        }
-
-        if (validate) {
-          // Check for source table/query presence for early failure notification.
-          // Note that a presence check can fail if the table or dataset are created by earlier
-          // stages of the pipeline or if a query depends on earlier stages of a pipeline. For these
-          // cases the withoutValidation method can be used to disable the check.
-          if (table != null) {
-            verifyDatasetPresence(bqOptions, table);
-            verifyTablePresence(bqOptions, table);
-          }
-          if (query != null) {
-            dryRunQuery(bqOptions, query);
-          }
-        }
-      }
-
-      private static void dryRunQuery(BigQueryOptions options, String query) {
-        Bigquery client = Transport.newBigQueryClient(options).build();
-        QueryRequest request = new QueryRequest();
-        request.setQuery(query);
-        request.setDryRun(true);
-
-        try {
-          BigQueryTableRowIterator.executeWithBackOff(
-              client.jobs().query(options.getProject(), request), QUERY_VALIDATION_FAILURE_ERROR,
-              query);
-        } catch (Exception e) {
-          throw new IllegalArgumentException(
-              String.format(QUERY_VALIDATION_FAILURE_ERROR, query), e);
-        }
-      }
-
-      @Override
-      public PCollection<TableRow> apply(PInput input) {
-        return PCollection.<TableRow>createPrimitiveOutputInternal(
-            input.getPipeline(),
-            WindowingStrategy.globalDefault(),
-            IsBounded.BOUNDED)
-            // Force the output's Coder to be what the read is using, and
-            // unchangeable later, to ensure that we read the input in the
-            // format specified by the Read transform.
-            .setCoder(TableRowJsonCoder.of());
-      }
-
-      @Override
-      protected Coder<TableRow> getDefaultOutputCoder() {
-        return TableRowJsonCoder.of();
-      }
-
-      static {
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateReadHelper(transform, context);
-              }
-            });
-      }
-
-      /**
-       * Returns the table to write, or {@code null} if reading from a query instead.
-       */
-      public TableReference getTable() {
-        return table;
-      }
-
-      /**
-       * Returns the query to be read, or {@code null} if reading from a table instead.
-       */
-      public String getQuery() {
-        return query;
-      }
-
-      /**
-       * Returns true if table validation is enabled.
-       */
-      public boolean getValidate() {
-        return validate;
-      }
-
-      /**
-       * Returns true/false if result flattening is enabled/disabled, or null if not applicable.
-       */
-      public Boolean getFlattenResults() {
-        return flattenResults;
-      }
-    }
-
-    /** Disallow construction of utility class. */
-    private Read() {}
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@link PTransform} that writes a {@link PCollection} containing {@link TableRow TableRows}
-   * to a BigQuery table.
-   *
-   * <p>In BigQuery, each table has an encosing dataset. The dataset being written must already
-   * exist.
-   *
-   * <p>By default, tables will be created if they do not exist, which corresponds to a
-   * {@link CreateDisposition#CREATE_IF_NEEDED} disposition that matches the default of BigQuery's
-   * Jobs API. A schema must be provided (via {@link BigQueryIO.Write#withSchema(TableSchema)}),
-   * or else the transform may fail at runtime with an {@link IllegalArgumentException}.
-   *
-   * <p>By default, writes require an empty table, which corresponds to
-   * a {@link WriteDisposition#WRITE_EMPTY} disposition that matches the
-   * default of BigQuery's Jobs API.
-   *
-   * <p>Here is a sample transform that produces TableRow values containing
-   * "word" and "count" columns:
-   * <pre>{@code
-   * static class FormatCountsFn extends DoFn<KV<String, Long>, TableRow> {
-   *   public void processElement(ProcessContext c) {
-   *     TableRow row = new TableRow()
-   *         .set("word", c.element().getKey())
-   *         .set("count", c.element().getValue().intValue());
-   *     c.output(row);
-   *   }
-   * }}</pre>
-   */
-  public static class Write {
-    /**
-     * An enumeration type for the BigQuery create disposition strings.
-     *
-     * @see <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.createDisposition">
-     * <code>configuration.query.createDisposition</code> in the BigQuery Jobs API</a>
-     */
-    public enum CreateDisposition {
-      /**
-       * Specifics that tables should not be created.
-       *
-       * <p>If the output table does not exist, the write fails.
-       */
-      CREATE_NEVER,
-
-      /**
-       * Specifies that tables should be created if needed. This is the default
-       * behavior.
-       *
-       * <p>Requires that a table schema is provided via {@link BigQueryIO.Write#withSchema}.
-       * This precondition is checked before starting a job. The schema is
-       * not required to match an existing table's schema.
-       *
-       * <p>When this transformation is executed, if the output table does not
-       * exist, the table is created from the provided schema. Note that even if
-       * the table exists, it may be recreated if necessary when paired with a
-       * {@link WriteDisposition#WRITE_TRUNCATE}.
-       */
-      CREATE_IF_NEEDED
-    }
-
-    /**
-     * An enumeration type for the BigQuery write disposition strings.
-     *
-     * @see <a href="https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query.writeDisposition">
-     * <code>configuration.query.writeDisposition</code> in the BigQuery Jobs API</a>
-     */
-    public enum WriteDisposition {
-      /**
-       * Specifies that write should replace a table.
-       *
-       * <p>The replacement may occur in multiple steps - for instance by first
-       * removing the existing table, then creating a replacement, then filling
-       * it in. This is not an atomic operation, and external programs may
-       * see the table in any of these intermediate steps.
-       */
-      WRITE_TRUNCATE,
-
-      /**
-       * Specifies that rows may be appended to an existing table.
-       */
-      WRITE_APPEND,
-
-      /**
-       * Specifies that the output table must be empty. This is the default
-       * behavior.
-       *
-       * <p>If the output table is not empty, the write fails at runtime.
-       *
-       * <p>This check may occur long before data is written, and does not
-       * guarantee exclusive access to the table. If two programs are run
-       * concurrently, each specifying the same output table and
-       * a {@link WriteDisposition} of {@link WriteDisposition#WRITE_EMPTY}, it is possible
-       * for both to succeed.
-       */
-      WRITE_EMPTY
-    }
-
-    /**
-     * Creates a write transformation with the given transform name. The BigQuery table to be
-     * written has not yet been configured.
-     */
-    public static Bound named(String name) {
-      return new Bound().named(name);
-    }
-
-    /**
-     * Creates a write transformation for the given table specification.
-     *
-     * <p>Refer to {@link #parseTableSpec(String)} for the specification format.
-     */
-    public static Bound to(String tableSpec) {
-      return new Bound().to(tableSpec);
-    }
-
-    /** Creates a write transformation for the given table. */
-    public static Bound to(TableReference table) {
-      return new Bound().to(table);
-    }
-
-    /**
-     * Creates a write transformation from a function that maps windows to table specifications.
-     * Each time a new window is encountered, this function will be called and the resulting table
-     * will be created. Records within that window will be written to the associated table.
-     *
-     * <p>See {@link #parseTableSpec(String)} for the format that {@code tableSpecFunction} should
-     * return.
-     *
-     * <p>{@code tableSpecFunction} should be deterministic. When given the same window, it should
-     * always return the same table specification.
-     */
-    public static Bound to(SerializableFunction<BoundedWindow, String> tableSpecFunction) {
-      return new Bound().to(tableSpecFunction);
-    }
-
-    /**
-     * Creates a write transformation from a function that maps windows to {@link TableReference}
-     * objects.
-     *
-     * <p>{@code tableRefFunction} should be deterministic. When given the same window, it should
-     * always return the same table reference.
-     */
-    public static Bound toTableReference(
-        SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
-      return new Bound().toTableReference(tableRefFunction);
-    }
-
-    /**
-     * Creates a write transformation with the specified schema to use in table creation.
-     *
-     * <p>The schema is <i>required</i> only if writing to a table that does not already
-     * exist, and {@link CreateDisposition} is set to
-     * {@link CreateDisposition#CREATE_IF_NEEDED}.
-     */
-    public static Bound withSchema(TableSchema schema) {
-      return new Bound().withSchema(schema);
-    }
-
-    /** Creates a write transformation with the specified options for creating the table. */
-    public static Bound withCreateDisposition(CreateDisposition disposition) {
-      return new Bound().withCreateDisposition(disposition);
-    }
-
-    /** Creates a write transformation with the specified options for writing to the table. */
-    public static Bound withWriteDisposition(WriteDisposition disposition) {
-      return new Bound().withWriteDisposition(disposition);
-    }
-
-    /**
-     * Creates a write transformation with BigQuery table validation disabled.
-     */
-    public static Bound withoutValidation() {
-      return new Bound().withoutValidation();
-    }
-
-    /**
-     * A {@link PTransform} that can write either a bounded or unbounded
-     * {@link PCollection} of {@link TableRow TableRows} to a BigQuery table.
-     */
-    public static class Bound extends PTransform<PCollection<TableRow>, PDone> {
-      final TableReference table;
-
-      final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
-
-      // Table schema. The schema is required only if the table does not exist.
-      final TableSchema schema;
-
-      // Options for creating the table. Valid values are CREATE_IF_NEEDED and
-      // CREATE_NEVER.
-      final CreateDisposition createDisposition;
-
-      // Options for writing to the table. Valid values are WRITE_TRUNCATE,
-      // WRITE_APPEND and WRITE_EMPTY.
-      final WriteDisposition writeDisposition;
-
-      // An option to indicate if table validation is desired. Default is true.
-      final boolean validate;
-
-      private static class TranslateTableSpecFunction implements
-          SerializableFunction<BoundedWindow, TableReference> {
-        private SerializableFunction<BoundedWindow, String> tableSpecFunction;
-
-        TranslateTableSpecFunction(SerializableFunction<BoundedWindow, String> tableSpecFunction) {
-          this.tableSpecFunction = tableSpecFunction;
-        }
-
-        @Override
-        public TableReference apply(BoundedWindow value) {
-          return parseTableSpec(tableSpecFunction.apply(value));
-        }
-      }
-
-      /**
-       * @deprecated Should be private. Instead, use one of the factory methods in
-       * {@link BigQueryIO.Write}, such as {@link BigQueryIO.Write#to(String)}, to create an
-       * instance of this class.
-       */
-      @Deprecated
-      public Bound() {
-        this(null, null, null, null, CreateDisposition.CREATE_IF_NEEDED,
-            WriteDisposition.WRITE_EMPTY, true);
-      }
-
-      private Bound(String name, TableReference ref,
-          SerializableFunction<BoundedWindow, TableReference> tableRefFunction, TableSchema schema,
-          CreateDisposition createDisposition, WriteDisposition writeDisposition,
-          boolean validate) {
-        super(name);
-        this.table = ref;
-        this.tableRefFunction = tableRefFunction;
-        this.schema = schema;
-        this.createDisposition = createDisposition;
-        this.writeDisposition = writeDisposition;
-        this.validate = validate;
-      }
-
-      /**
-       * Returns a copy of this write transformation, but with the specified transform name.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound named(String name) {
-        return new Bound(name, table, tableRefFunction, schema, createDisposition,
-            writeDisposition, validate);
-      }
-
-      /**
-       * Returns a copy of this write transformation, but writing to the specified table. Refer to
-       * {@link #parseTableSpec(String)} for the specification format.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound to(String tableSpec) {
-        return to(parseTableSpec(tableSpec));
-      }
-
-      /**
-       * Returns a copy of this write transformation, but writing to the specified table.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound to(TableReference table) {
-        return new Bound(name, table, tableRefFunction, schema, createDisposition,
-            writeDisposition, validate);
-      }
-
-      /**
-       * Returns a copy of this write transformation, but using the specified function to determine
-       * which table to write to for each window.
-       *
-       * <p>Does not modify this object.
-       *
-       * <p>{@code tableSpecFunction} should be deterministic. When given the same window, it
-       * should always return the same table specification.
-       */
-      public Bound to(
-          SerializableFunction<BoundedWindow, String> tableSpecFunction) {
-        return toTableReference(new TranslateTableSpecFunction(tableSpecFunction));
-      }
-
-      /**
-       * Returns a copy of this write transformation, but using the specified function to determine
-       * which table to write to for each window.
-       *
-       * <p>Does not modify this object.
-       *
-       * <p>{@code tableRefFunction} should be deterministic. When given the same window, it should
-       * always return the same table reference.
-       */
-      public Bound toTableReference(
-          SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
-        return new Bound(name, table, tableRefFunction, schema, createDisposition,
-            writeDisposition, validate);
-      }
-
-      /**
-       * Returns a copy of this write transformation, but using the specified schema for rows
-       * to be written.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound withSchema(TableSchema schema) {
-        return new Bound(name, table, tableRefFunction, schema, createDisposition,
-            writeDisposition, validate);
-      }
-
-      /**
-       * Returns a copy of this write transformation, but using the specified create disposition.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound withCreateDisposition(CreateDisposition createDisposition) {
-        return new Bound(name, table, tableRefFunction, schema, createDisposition,
-            writeDisposition, validate);
-      }
-
-      /**
-       * Returns a copy of this write transformation, but using the specified write disposition.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound withWriteDisposition(WriteDisposition writeDisposition) {
-        return new Bound(name, table, tableRefFunction, schema, createDisposition,
-            writeDisposition, validate);
-      }
-
-      /**
-       * Returns a copy of this write transformation, but without BigQuery table validation.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound withoutValidation() {
-        return new Bound(name, table, tableRefFunction, schema, createDisposition,
-            writeDisposition, false);
-      }
-
-      private static void verifyTableEmpty(
-          BigQueryOptions options,
-          TableReference table) {
-        try {
-          Bigquery client = Transport.newBigQueryClient(options).build();
-          BigQueryTableInserter inserter = new BigQueryTableInserter(client);
-          if (!inserter.isEmpty(table)) {
-            throw new IllegalArgumentException(
-                "BigQuery table is not empty: " + BigQueryIO.toTableSpec(table));
-          }
-        } catch (IOException e) {
-          ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-          if (errorExtractor.itemNotFound(e)) {
-            // Nothing to do. If the table does not exist, it is considered empty.
-          } else {
-            throw new RuntimeException(
-                "unable to confirm BigQuery table emptiness for table "
-                    + BigQueryIO.toTableSpec(table), e);
-          }
-        }
-      }
-
-      @Override
-      public PDone apply(PCollection<TableRow> input) {
-        BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
-
-        if (table == null && tableRefFunction == null) {
-          throw new IllegalStateException(
-              "must set the table reference of a BigQueryIO.Write transform");
-        }
-        if (table != null && tableRefFunction != null) {
-          throw new IllegalStateException(
-              "Cannot set both a table reference and a table function for a BigQueryIO.Write "
-                + "transform");
-        }
-
-        if (createDisposition == CreateDisposition.CREATE_IF_NEEDED && schema == null) {
-          throw new IllegalArgumentException("CreateDisposition is CREATE_IF_NEEDED, "
-              + "however no schema was provided.");
-        }
-
-        if (table != null && table.getProjectId() == null) {
-          // If user does not specify a project we assume the table to be located in the project
-          // that owns the Dataflow job.
-          String projectIdFromOptions = options.getProject();
-          LOG.warn(String.format(BigQueryIO.SET_PROJECT_FROM_OPTIONS_WARNING, table.getDatasetId(),
-              table.getTableId(), projectIdFromOptions));
-          table.setProjectId(projectIdFromOptions);
-        }
-
-        // Check for destination table presence and emptiness for early failure notification.
-        // Note that a presence check can fail if the table or dataset are created by earlier stages
-        // of the pipeline. For these cases the withoutValidation method can be used to disable
-        // the check.
-        // Unfortunately we can't validate anything early in case tableRefFunction is specified.
-        if (table != null && validate) {
-          verifyDatasetPresence(options, table);
-          if (getCreateDisposition() == BigQueryIO.Write.CreateDisposition.CREATE_NEVER) {
-            verifyTablePresence(options, table);
-          }
-          if (getWriteDisposition() == BigQueryIO.Write.WriteDisposition.WRITE_EMPTY) {
-            verifyTableEmpty(options, table);
-          }
-        }
-
-        // In streaming, BigQuery write is taken care of by StreamWithDeDup transform.
-        // We also currently do this if a tablespec function is specified.
-        if (options.isStreaming() || tableRefFunction != null) {
-          if (createDisposition == CreateDisposition.CREATE_NEVER) {
-            throw new IllegalArgumentException("CreateDispostion.CREATE_NEVER is not "
-                + "supported for unbounded PCollections or when using tablespec functions.");
-          }
-
-          if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
-            throw new IllegalArgumentException("WriteDisposition.WRITE_TRUNCATE is not "
-                + "supported for unbounded PCollections or when using tablespec functions.");
-          }
-
-          return input.apply(new StreamWithDeDup(table, tableRefFunction, schema));
-        }
-
-        return PDone.in(input.getPipeline());
-      }
-
-      @Override
-      protected Coder<Void> getDefaultOutputCoder() {
-        return VoidCoder.of();
-      }
-
-      static {
-        DirectPipelineRunner.registerDefaultTransformEvaluator(
-            Bound.class, new DirectPipelineRunner.TransformEvaluator<Bound>() {
-              @Override
-              public void evaluate(
-                  Bound transform, DirectPipelineRunner.EvaluationContext context) {
-                evaluateWriteHelper(transform, context);
-              }
-            });
-      }
-
-      /** Returns the create disposition. */
-      public CreateDisposition getCreateDisposition() {
-        return createDisposition;
-      }
-
-      /** Returns the write disposition. */
-      public WriteDisposition getWriteDisposition() {
-        return writeDisposition;
-      }
-
-      /** Returns the table schema. */
-      public TableSchema getSchema() {
-        return schema;
-      }
-
-      /** Returns the table reference, or {@code null} if a . */
-      public TableReference getTable() {
-        return table;
-      }
-
-      /** Returns {@code true} if table validation is enabled. */
-      public boolean getValidate() {
-        return validate;
-      }
-    }
-
-    /** Disallow construction of utility class. */
-    private Write() {}
-  }
-
-  private static void verifyDatasetPresence(BigQueryOptions options, TableReference table) {
-    try {
-      Bigquery client = Transport.newBigQueryClient(options).build();
-      BigQueryTableRowIterator.executeWithBackOff(
-          client.datasets().get(table.getProjectId(), table.getDatasetId()),
-          RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table));
-    } catch (Exception e) {
-      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-      if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
-        throw new IllegalArgumentException(
-            String.format(RESOURCE_NOT_FOUND_ERROR, "dataset", BigQueryIO.toTableSpec(table)),
-            e);
-      } else {
-        throw new RuntimeException(
-            String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "dataset",
-                BigQueryIO.toTableSpec(table)),
-            e);
-      }
-    }
-  }
-
-  private static void verifyTablePresence(BigQueryOptions options, TableReference table) {
-    try {
-      Bigquery client = Transport.newBigQueryClient(options).build();
-      BigQueryTableRowIterator.executeWithBackOff(
-          client.tables().get(table.getProjectId(), table.getDatasetId(), table.getTableId()),
-          RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table));
-    } catch (Exception e) {
-      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-      if ((e instanceof IOException) && errorExtractor.itemNotFound((IOException) e)) {
-        throw new IllegalArgumentException(
-            String.format(RESOURCE_NOT_FOUND_ERROR, "table", BigQueryIO.toTableSpec(table)), e);
-      } else {
-        throw new RuntimeException(
-            String.format(UNABLE_TO_CONFIRM_PRESENCE_OF_RESOURCE_ERROR, "table",
-                BigQueryIO.toTableSpec(table)),
-            e);
-      }
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Implementation of DoFn to perform streaming BigQuery write.
-   */
-  @SystemDoFnInternal
-  private static class StreamingWriteFn
-      extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
-    /** TableSchema in JSON. Use String to make the class Serializable. */
-    private final String jsonTableSchema;
-
-    /** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
-    private transient Map<String, List<TableRow>> tableRows;
-
-    /** The list of unique ids for each BigQuery table row. */
-    private transient Map<String, List<String>> uniqueIdsForTableRows;
-
-    /** The list of tables created so far, so we don't try the creation
-        each time. */
-    private static Set<String> createdTables =
-        Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
-
-    /** Tracks bytes written, exposed as "ByteCount" Counter. */
-    private Aggregator<Long, Long> byteCountAggregator =
-        createAggregator("ByteCount", new Sum.SumLongFn());
-
-    /** Constructor. */
-    StreamingWriteFn(TableSchema schema) {
-      try {
-        jsonTableSchema = JSON_FACTORY.toString(schema);
-      } catch (IOException e) {
-        throw new RuntimeException("Cannot initialize BigQuery streaming writer.", e);
-      }
-    }
-
-    /** Prepares a target BigQuery table. */
-    @Override
-    public void startBundle(Context context) {
-      tableRows = new HashMap<>();
-      uniqueIdsForTableRows = new HashMap<>();
-    }
-
-    /** Accumulates the input into JsonTableRows and uniqueIdsForTableRows. */
-    @Override
-    public void processElement(ProcessContext context) {
-      String tableSpec = context.element().getKey().getKey();
-      List<TableRow> rows = getOrCreateMapListValue(tableRows, tableSpec);
-      List<String> uniqueIds = getOrCreateMapListValue(uniqueIdsForTableRows, tableSpec);
-
-      rows.add(context.element().getValue().tableRow);
-      uniqueIds.add(context.element().getValue().uniqueId);
-    }
-
-    /** Writes the accumulated rows into BigQuery with streaming API. */
-    @Override
-    public void finishBundle(Context context) throws Exception {
-      BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
-      Bigquery client = Transport.newBigQueryClient(options).build();
-
-      for (String tableSpec : tableRows.keySet()) {
-        TableReference tableReference = getOrCreateTable(options, tableSpec);
-        flushRows(client, tableReference, tableRows.get(tableSpec),
-            uniqueIdsForTableRows.get(tableSpec));
-      }
-      tableRows.clear();
-      uniqueIdsForTableRows.clear();
-    }
-
-    public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec)
-        throws IOException {
-      TableReference tableReference = parseTableSpec(tableSpec);
-      if (!createdTables.contains(tableSpec)) {
-        synchronized (createdTables) {
-          // Another thread may have succeeded in creating the table in the meanwhile, so
-          // check again. This check isn't needed for correctness, but we add it to prevent
-          // every thread from attempting a create and overwhelming our BigQuery quota.
-          if (!createdTables.contains(tableSpec)) {
-            TableSchema tableSchema = JSON_FACTORY.fromString(jsonTableSchema, TableSchema.class);
-            Bigquery client = Transport.newBigQueryClient(options).build();
-            BigQueryTableInserter inserter = new BigQueryTableInserter(client);
-            inserter.getOrCreateTable(tableReference, WriteDisposition.WRITE_APPEND,
-                CreateDisposition.CREATE_IF_NEEDED, tableSchema);
-            createdTables.add(tableSpec);
-          }
-        }
-      }
-      return tableReference;
-    }
-
-    /** Writes the accumulated rows into BigQuery with streaming API. */
-    private void flushRows(Bigquery client, TableReference tableReference,
-        List<TableRow> tableRows, List<String> uniqueIds) {
-      if (!tableRows.isEmpty()) {
-        try {
-          BigQueryTableInserter inserter = new BigQueryTableInserter(client);
-          inserter.insertAll(tableReference, tableRows, uniqueIds, byteCountAggregator);
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    }
-  }
-
-  private static class ShardedKey<K> {
-    private final K key;
-    private final int shardNumber;
-
-    public static <K> ShardedKey<K> of(K key, int shardNumber) {
-      return new ShardedKey<K>(key, shardNumber);
-    }
-
-    private ShardedKey(K key, int shardNumber) {
-      this.key = key;
-      this.shardNumber = shardNumber;
-    }
-
-    public K getKey() {
-      return key;
-    }
-
-    public int getShardNumber() {
-      return shardNumber;
-    }
-  }
-
-  /**
-   * A {@link Coder} for {@link ShardedKey}, using a wrapped key {@link Coder}.
-   */
-  private static class ShardedKeyCoder<KeyT>
-      extends StandardCoder<ShardedKey<KeyT>> {
-    public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
-      return new ShardedKeyCoder<>(keyCoder);
-    }
-
-    @JsonCreator
-    public static <KeyT> ShardedKeyCoder<KeyT> of(
-         @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<KeyT>> components) {
-      Preconditions.checkArgument(components.size() == 1,
-          "Expecting 1 component, got " + components.size());
-      return of(components.get(0));
-    }
-
-    protected ShardedKeyCoder(Coder<KeyT> keyCoder) {
-      this.keyCoder = keyCoder;
-      this.shardNumberCoder = VarIntCoder.of();
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(keyCoder);
-    }
-
-    @Override
-    public void encode(ShardedKey<KeyT> key, OutputStream outStream, Context context)
-        throws IOException {
-      keyCoder.encode(key.getKey(), outStream, context.nested());
-      shardNumberCoder.encode(key.getShardNumber(), outStream, context);
-    }
-
-    @Override
-    public ShardedKey<KeyT> decode(InputStream inStream, Context context)
-        throws IOException {
-      return new ShardedKey<KeyT>(
-          keyCoder.decode(inStream, context.nested()),
-          shardNumberCoder.decode(inStream, context));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      keyCoder.verifyDeterministic();
-    }
-
-    Coder<KeyT> keyCoder;
-    VarIntCoder shardNumberCoder;
-  }
-
-  private static class TableRowInfoCoder extends AtomicCoder<TableRowInfo> {
-    private static final TableRowInfoCoder INSTANCE = new TableRowInfoCoder();
-
-    @JsonCreator
-    public static TableRowInfoCoder of() {
-      return INSTANCE;
-    }
-
-    @Override
-    public void encode(TableRowInfo value, OutputStream outStream, Context context)
-      throws IOException {
-      if (value == null) {
-        throw new CoderException("cannot encode a null value");
-      }
-      tableRowCoder.encode(value.tableRow, outStream, context.nested());
-      idCoder.encode(value.uniqueId, outStream, context.nested());
-    }
-
-    @Override
-    public TableRowInfo decode(InputStream inStream, Context context)
-      throws IOException {
-      return new TableRowInfo(
-          tableRowCoder.decode(inStream, context.nested()),
-          idCoder.decode(inStream, context.nested()));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      throw new NonDeterministicException(this, "TableRows are not deterministic.");
-    }
-
-    TableRowJsonCoder tableRowCoder = TableRowJsonCoder.of();
-    StringUtf8Coder idCoder = StringUtf8Coder.of();
-  }
-
-  private static class TableRowInfo {
-    TableRowInfo(TableRow tableRow, String uniqueId) {
-      this.tableRow = tableRow;
-      this.uniqueId = uniqueId;
-    }
-
-    final TableRow tableRow;
-    final String uniqueId;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Fn that tags each table row with a unique id and destination table.
-   * To avoid calling UUID.randomUUID() for each element, which can be costly,
-   * a randomUUID is generated only once per bucket of data. The actual unique
-   * id is created by concatenating this randomUUID with a sequential number.
-   */
-  private static class TagWithUniqueIdsAndTable
-      extends DoFn<TableRow, KV<ShardedKey<String>, TableRowInfo>>
-      implements DoFn.RequiresWindowAccess {
-    /** TableSpec to write to. */
-    private final String tableSpec;
-
-    /** User function mapping windows to {@link TableReference} in JSON. */
-    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
-
-    private transient String randomUUID;
-    private transient long sequenceNo = 0L;
-
-    TagWithUniqueIdsAndTable(BigQueryOptions options, TableReference table,
-        SerializableFunction<BoundedWindow, TableReference> tableRefFunction) {
-      Preconditions.checkArgument(table == null ^ tableRefFunction == null,
-          "Exactly one of table or tableRefFunction should be set");
-      if (table != null) {
-        if (table.getProjectId() == null) {
-          table.setProjectId(options.as(BigQueryOptions.class).getProject());
-        }
-        this.tableSpec = toTableSpec(table);
-      } else {
-        tableSpec = null;
-      }
-      this.tableRefFunction = tableRefFunction;
-    }
-
-
-    @Override
-    public void startBundle(Context context) {
-      randomUUID = UUID.randomUUID().toString();
-    }
-
-    /** Tag the input with a unique id. */
-    @Override
-    public void processElement(ProcessContext context) throws IOException {
-      String uniqueId = randomUUID + sequenceNo++;
-      ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
-      String tableSpec = tableSpecFromWindow(
-          context.getPipelineOptions().as(BigQueryOptions.class), context.window());
-      // We output on keys 0-50 to ensure that there's enough batching for
-      // BigQuery.
-      context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, 50)),
-          new TableRowInfo(context.element(), uniqueId)));
-    }
-
-    private String tableSpecFromWindow(BigQueryOptions options, BoundedWindow window) {
-      if (tableSpec != null) {
-        return tableSpec;
-      } else {
-        TableReference table = tableRefFunction.apply(window);
-        if (table.getProjectId() == null) {
-          table.setProjectId(options.getProject());
-        }
-        return toTableSpec(table);
-      }
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-  * PTransform that performs streaming BigQuery write. To increase consistency,
-  * it leverages BigQuery best effort de-dup mechanism.
-   */
-  private static class StreamWithDeDup extends PTransform<PCollection<TableRow>, PDone> {
-    private final transient TableReference tableReference;
-    private final SerializableFunction<BoundedWindow, TableReference> tableRefFunction;
-    private final transient TableSchema tableSchema;
-
-    /** Constructor. */
-    StreamWithDeDup(TableReference tableReference,
-        SerializableFunction<BoundedWindow, TableReference> tableRefFunction,
-        TableSchema tableSchema) {
-      this.tableReference = tableReference;
-      this.tableRefFunction = tableRefFunction;
-      this.tableSchema = tableSchema;
-    }
-
-    @Override
-    protected Coder<Void> getDefaultOutputCoder() {
-      return VoidCoder.of();
-    }
-
-    @Override
-    public PDone apply(PCollection<TableRow> input) {
-      // A naive implementation would be to simply stream data directly to BigQuery.
-      // However, this could occasionally lead to duplicated data, e.g., when
-      // a VM that runs this code is restarted and the code is re-run.
-
-      // The above risk is mitigated in this implementation by relying on
-      // BigQuery built-in best effort de-dup mechanism.
-
-      // To use this mechanism, each input TableRow is tagged with a generated
-      // unique id, which is then passed to BigQuery and used to ignore duplicates.
-
-      PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply(ParDo.of(
-          new TagWithUniqueIdsAndTable(input.getPipeline().getOptions().as(BigQueryOptions.class),
-              tableReference, tableRefFunction)));
-
-      // To prevent having the same TableRow processed more than once with regenerated
-      // different unique ids, this implementation relies on "checkpointing", which is
-      // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
-      // performed by Reshuffle.
-      tagged
-          .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
-          .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
-          .apply(ParDo.of(new StreamingWriteFn(tableSchema)));
-
-      // Note that the implementation to return PDone here breaks the
-      // implicit assumption about the job execution order. If a user
-      // implements a PTransform that takes PDone returned here as its
-      // input, the transform may not necessarily be executed after
-      // the BigQueryIO.Write.
-
-      return PDone.in(input.getPipeline());
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /** Disallow construction of utility class. */
-  private BigQueryIO() {}
-
-  /**
-   * Direct mode read evaluator.
-   *
-   * <p>This loads the entire table into an in-memory PCollection.
-   */
-  private static void evaluateReadHelper(
-      Read.Bound transform, DirectPipelineRunner.EvaluationContext context) {
-    BigQueryOptions options = context.getPipelineOptions();
-    Bigquery client = Transport.newBigQueryClient(options).build();
-    if (transform.table != null && transform.table.getProjectId() == null) {
-      transform.table.setProjectId(options.getProject());
-    }
-
-    BigQueryTableRowIterator iterator;
-    if (transform.query != null) {
-      LOG.info("Reading from BigQuery query {}", transform.query);
-      iterator =
-          BigQueryTableRowIterator.fromQuery(
-              transform.query, options.getProject(), client, transform.getFlattenResults());
-    } else {
-      LOG.info("Reading from BigQuery table {}", toTableSpec(transform.table));
-      iterator = BigQueryTableRowIterator.fromTable(transform.table, client);
-    }
-
-    try (BigQueryTableRowIterator ignored = iterator) {
-      List<TableRow> elems = new ArrayList<>();
-      iterator.open();
-      while (iterator.advance()) {
-        elems.add(iterator.getCurrent());
-      }
-      LOG.info("Number of records read from BigQuery: {}", elems.size());
-      context.setPCollection(context.getOutput(transform), elems);
-    } catch (IOException | InterruptedException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  private static <K, V> List<V> getOrCreateMapListValue(Map<K, List<V>> map, K key) {
-    List<V> value = map.get(key);
-    if (value == null) {
-      value = new ArrayList<>();
-      map.put(key, value);
-    }
-    return value;
-  }
-
-  /**
-   * Direct mode write evaluator.
-   *
-   * <p>This writes the entire table in a single BigQuery request.
-   * The table will be created if necessary.
-   */
-  private static void evaluateWriteHelper(
-      Write.Bound transform, DirectPipelineRunner.EvaluationContext context) {
-    BigQueryOptions options = context.getPipelineOptions();
-    Bigquery client = Transport.newBigQueryClient(options).build();
-    BigQueryTableInserter inserter = new BigQueryTableInserter(client);
-
-    try {
-      Map<TableReference, List<TableRow>> tableRows = new HashMap<>();
-      for (WindowedValue<TableRow> windowedValue : context.getPCollectionWindowedValues(
-          context.getInput(transform))) {
-        for (BoundedWindow window : windowedValue.getWindows()) {
-          TableReference ref;
-          if (transform.tableRefFunction != null) {
-            ref = transform.tableRefFunction.apply(window);
-          } else {
-            ref = transform.table;
-          }
-          if (ref.getProjectId() == null) {
-            ref.setProjectId(options.getProject());
-          }
-
-          List<TableRow> rows = getOrCreateMapListValue(tableRows, ref);
-          rows.add(windowedValue.getValue());
-        }
-      }
-
-      for (TableReference ref : tableRows.keySet()) {
-        LOG.info("Writing to BigQuery table {}", toTableSpec(ref));
-        // {@link BigQueryTableInserter#getOrCreateTable} validates {@link CreateDisposition}
-        // and {@link WriteDisposition}.
-        // For each {@link TableReference}, it can only be called before rows are written.
-        inserter.getOrCreateTable(
-            ref, transform.writeDisposition, transform.createDisposition, transform.schema);
-        inserter.insertAll(ref, tableRows.get(ref));
-      }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
deleted file mode 100644
index f4a9c7d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BlockBasedSource.java
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@code BlockBasedSource} is a {@link FileBasedSource} where a file consists of blocks of
- * records.
- *
- * <p>{@code BlockBasedSource} should be derived from when a file format does not support efficient
- * seeking to a record in the file, but can support efficient seeking to a block. Alternatively,
- * records in the file cannot be offset-addressed, but blocks can (it is not possible to say
- * that record {code i} starts at offset {@code m}, but it is possible to say that block {@code j}
- *  starts at offset {@code n}).
- *
- * <p>The records that will be read from a {@code BlockBasedSource} that corresponds to a subrange
- * of a file {@code [startOffset, endOffset)} are those records such that the record is contained in
- * a block that starts at offset {@code i}, where {@code i >= startOffset} and
- * {@code i < endOffset}. In other words, a record will be read from the source if its first byte is
- * contained in a block that begins within the range described by the source.
- *
- * <p>This entails that it is possible to determine the start offsets of all blocks in a file.
- *
- * <p>Progress reporting for reading from a {@code BlockBasedSource} is inaccurate. A {@link
- * BlockBasedReader} reports its current offset as {@code (offset of current block) + (current block
- * size) * (fraction of block consumed)}. However, only the offset of the current block is required
- * to be accurately reported by subclass implementations. As such, in the worst case, the current
- * offset is only updated at block boundaries.
- *
- * <p>{@code BlockBasedSource} supports dynamic splitting. However, because records in a {@code
- * BlockBasedSource} are not required to have offsets and progress reporting is inaccurate, {@code
- * BlockBasedReader} only supports splitting at block boundaries.
- * In other words, {@link BlockBasedReader#atSplitPoint} returns true iff the current record is the
- * first record in a block. See {@link FileBasedSource.FileBasedReader} for discussion about split
- * points.
- *
- * @param <T> The type of records to be read from the source.
- */
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public abstract class BlockBasedSource<T> extends FileBasedSource<T> {
-  /**
-   * Creates a {@code BlockBasedSource} based on a file name or pattern. Subclasses must call this
-   * constructor when creating a {@code BlockBasedSource} for a file pattern. See
-   * {@link FileBasedSource} for more information.
-   */
-  public BlockBasedSource(String fileOrPatternSpec, long minBundleSize) {
-    super(fileOrPatternSpec, minBundleSize);
-  }
-
-  /**
-   * Creates a {@code BlockBasedSource} for a single file. Subclasses must call this constructor
-   * when implementing {@link BlockBasedSource#createForSubrangeOfFile}. See documentation in
-   * {@link FileBasedSource}.
-   */
-  public BlockBasedSource(String fileName, long minBundleSize, long startOffset, long endOffset) {
-    super(fileName, minBundleSize, startOffset, endOffset);
-  }
-
-  /**
-   * Creates a {@code BlockBasedSource} for the specified range in a single file.
-   */
-  @Override
-  protected abstract BlockBasedSource<T> createForSubrangeOfFile(
-      String fileName, long start, long end);
-
-  /**
-   * Creates a {@code BlockBasedReader}.
-   */
-  @Override
-  protected abstract BlockBasedReader<T> createSingleFileReader(PipelineOptions options);
-
-  /**
-   * A {@code Block} represents a block of records that can be read.
-   */
-  @Experimental(Experimental.Kind.SOURCE_SINK)
-  protected abstract static class Block<T> {
-    /**
-     * Returns the current record.
-     */
-    public abstract T getCurrentRecord();
-
-    /**
-     * Reads the next record from the block and returns true iff one exists.
-     */
-    public abstract boolean readNextRecord() throws IOException;
-
-    /**
-     * Returns the fraction of the block already consumed, if possible, as a value in
-     * {@code [0, 1]}. It should not include the current record. Successive results from this method
-     * must be monotonically increasing.
-     *
-     * <p>If it is not possible to compute the fraction of the block consumed this method may
-     * return zero. For example, when the total number of records in the block is unknown.
-     */
-    public abstract double getFractionOfBlockConsumed();
-  }
-
-  /**
-   * A {@code Reader} that reads records from a {@link BlockBasedSource}. If the source is a
-   * subrange of a file, the blocks that will be read by this reader are those such that the first
-   * byte of the block is within the range {@code [start, end)}.
-   */
-  @Experimental(Experimental.Kind.SOURCE_SINK)
-  protected abstract static class BlockBasedReader<T> extends FileBasedReader<T> {
-    private boolean atSplitPoint;
-
-    protected BlockBasedReader(BlockBasedSource<T> source) {
-      super(source);
-    }
-
-    /**
-     * Read the next block from the input.
-     */
-    public abstract boolean readNextBlock() throws IOException;
-
-    /**
-     * Returns the current block (the block that was read by the last successful call to
-     * {@link BlockBasedReader#readNextBlock}). May return null initially, or if no block has been
-     * successfully read.
-     */
-    @Nullable
-    public abstract Block<T> getCurrentBlock();
-
-    /**
-     * Returns the size of the current block in bytes as it is represented in the underlying file,
-     * if possible. This method may return {@code 0} if the size of the current block is unknown.
-     *
-     * <p>The size returned by this method must be such that for two successive blocks A and B,
-     * {@code offset(A) + size(A) <= offset(B)}. If this is not satisfied, the progress reported
-     * by the {@code BlockBasedReader} will be non-monotonic and will interfere with the quality
-     * (but not correctness) of dynamic work rebalancing.
-     *
-     * <p>This method and {@link Block#getFractionOfBlockConsumed} are used to provide an estimate
-     * of progress within a block ({@code getCurrentBlock().getFractionOfBlockConsumed() *
-     * getCurrentBlockSize()}). It is acceptable for the result of this computation to be {@code 0},
-     * but progress estimation will be inaccurate.
-     */
-    public abstract long getCurrentBlockSize();
-
-    /**
-     * Returns the largest offset such that starting to read from that offset includes the current
-     * block.
-     */
-    public abstract long getCurrentBlockOffset();
-
-    @Override
-    public final T getCurrent() throws NoSuchElementException {
-      Block<T> currentBlock = getCurrentBlock();
-      if (currentBlock == null) {
-        throw new NoSuchElementException(
-            "No block has been successfully read from " + getCurrentSource());
-      }
-      return currentBlock.getCurrentRecord();
-    }
-
-    /**
-     * Returns true if the reader is at a split point. A {@code BlockBasedReader} is at a split
-     * point if the current record is the first record in a block. In other words, split points
-     * are block boundaries.
-     */
-    @Override
-    protected boolean isAtSplitPoint() {
-      return atSplitPoint;
-    }
-
-    /**
-     * Reads the next record from the {@link #getCurrentBlock() current block} if
-     * possible. Will call {@link #readNextBlock()} to advance to the next block if not.
-     *
-     * <p>The first record read from a block is treated as a split point.
-     */
-    @Override
-    protected final boolean readNextRecord() throws IOException {
-      atSplitPoint = false;
-
-      while (getCurrentBlock() == null || !getCurrentBlock().readNextRecord()) {
-        if (!readNextBlock()) {
-          return false;
-        }
-        // The first record in a block is a split point.
-        atSplitPoint = true;
-      }
-      return true;
-    }
-
-    @Override
-    public Double getFractionConsumed() {
-      if (getCurrentSource().getEndOffset() == Long.MAX_VALUE) {
-        return null;
-      }
-      Block<T> currentBlock = getCurrentBlock();
-      if (currentBlock == null) {
-        // There is no current block (i.e., the read has not yet begun).
-        return 0.0;
-      }
-      long currentBlockOffset = getCurrentBlockOffset();
-      long startOffset = getCurrentSource().getStartOffset();
-      long endOffset = getCurrentSource().getEndOffset();
-      double fractionAtBlockStart =
-          ((double) (currentBlockOffset - startOffset)) / (endOffset - startOffset);
-      double fractionAtBlockEnd =
-          ((double) (currentBlockOffset + getCurrentBlockSize() - startOffset)
-              / (endOffset - startOffset));
-      return Math.min(
-          1.0,
-          fractionAtBlockStart
-          + currentBlock.getFractionOfBlockConsumed()
-            * (fractionAtBlockEnd - fractionAtBlockStart));
-    }
-
-    @Override
-    protected long getCurrentOffset() {
-      return getCurrentBlockOffset();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
deleted file mode 100644
index 52c730c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/BoundedReadFromUnboundedSource.java
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
-
-import com.google.api.client.util.BackOff;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.IntervalBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PInput;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-
-/**
- * {@link PTransform} that reads a bounded amount of data from an {@link UnboundedSource},
- * specified as one or both of a maximum number of elements or a maximum period of time to read.
- *
- * <p>Created by {@link Read}.
- */
-class BoundedReadFromUnboundedSource<T> extends PTransform<PInput, PCollection<T>> {
-  private final UnboundedSource<T, ?> source;
-  private final long maxNumRecords;
-  private final Duration maxReadTime;
-
-  /**
-   * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
-   * of data from the given {@link UnboundedSource}.  The bound is specified as a number
-   * of records to read.
-   *
-   * <p>This may take a long time to execute if the splits of this source are slow to read
-   * records.
-   */
-  public BoundedReadFromUnboundedSource<T> withMaxNumRecords(long maxNumRecords) {
-    return new BoundedReadFromUnboundedSource<T>(source, maxNumRecords, maxReadTime);
-  }
-
-  /**
-   * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount
-   * of data from the given {@link UnboundedSource}.  The bound is specified as an amount
-   * of time to read for.  Each split of the source will read for this much time.
-   */
-  public BoundedReadFromUnboundedSource<T> withMaxReadTime(Duration maxReadTime) {
-    return new BoundedReadFromUnboundedSource<T>(source, maxNumRecords, maxReadTime);
-  }
-
-  BoundedReadFromUnboundedSource(
-      UnboundedSource<T, ?> source, long maxNumRecords, Duration maxReadTime) {
-    this.source = source;
-    this.maxNumRecords = maxNumRecords;
-    this.maxReadTime = maxReadTime;
-  }
-
-  @Override
-  public PCollection<T> apply(PInput input) {
-    PCollection<ValueWithRecordId<T>> read = Pipeline.applyTransform(input,
-        Read.from(new UnboundedToBoundedSourceAdapter<>(source, maxNumRecords, maxReadTime)));
-    if (source.requiresDeduping()) {
-      read = read.apply(RemoveDuplicates.withRepresentativeValueFn(
-          new SerializableFunction<ValueWithRecordId<T>, byte[]>() {
-            @Override
-            public byte[] apply(ValueWithRecordId<T> input) {
-              return input.getId();
-            }
-          }));
-    }
-    return read.apply(ValueWithRecordId.<T>stripIds());
-  }
-
-  @Override
-  protected Coder<T> getDefaultOutputCoder() {
-    return source.getDefaultOutputCoder();
-  }
-
-  @Override
-  public String getKindString() {
-    return "Read(" + approximateSimpleName(source.getClass()) + ")";
-  }
-
-  private static class UnboundedToBoundedSourceAdapter<T>
-      extends BoundedSource<ValueWithRecordId<T>> {
-    private final UnboundedSource<T, ?> source;
-    private final long maxNumRecords;
-    private final Duration maxReadTime;
-
-    private UnboundedToBoundedSourceAdapter(
-        UnboundedSource<T, ?> source, long maxNumRecords, Duration maxReadTime) {
-      this.source = source;
-      this.maxNumRecords = maxNumRecords;
-      this.maxReadTime = maxReadTime;
-    }
-
-    /**
-     * Divide the given number of records into {@code numSplits} approximately
-     * equal parts that sum to {@code numRecords}.
-     */
-    private static long[] splitNumRecords(long numRecords, int numSplits) {
-      long[] splitNumRecords = new long[numSplits];
-      for (int i = 0; i < numSplits; i++) {
-        splitNumRecords[i] = numRecords / numSplits;
-      }
-      for (int i = 0; i < numRecords % numSplits; i++) {
-        splitNumRecords[i] = splitNumRecords[i] + 1;
-      }
-      return splitNumRecords;
-    }
-
-    /**
-     * Pick a number of initial splits based on the number of records expected to be processed.
-     */
-    private static int numInitialSplits(long numRecords) {
-      final int maxSplits = 100;
-      final long recordsPerSplit = 10000;
-      return (int) Math.min(maxSplits, numRecords / recordsPerSplit + 1);
-    }
-
-    @Override
-    public List<? extends BoundedSource<ValueWithRecordId<T>>> splitIntoBundles(
-        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      List<UnboundedToBoundedSourceAdapter<T>> result = new ArrayList<>();
-      int numInitialSplits = numInitialSplits(maxNumRecords);
-      List<? extends UnboundedSource<T, ?>> splits =
-          source.generateInitialSplits(numInitialSplits, options);
-      int numSplits = splits.size();
-      long[] numRecords = splitNumRecords(maxNumRecords, numSplits);
-      for (int i = 0; i < numSplits; i++) {
-        result.add(
-            new UnboundedToBoundedSourceAdapter<T>(splits.get(i), numRecords[i], maxReadTime));
-      }
-      return result;
-    }
-
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) {
-      // No way to estimate bytes, so returning 0.
-      return 0L;
-    }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) {
-      return false;
-    }
-
-    @Override
-    public Coder<ValueWithRecordId<T>> getDefaultOutputCoder() {
-      return ValueWithRecordId.ValueWithRecordIdCoder.of(source.getDefaultOutputCoder());
-    }
-
-    @Override
-    public void validate() {
-      source.validate();
-    }
-
-    @Override
-    public BoundedReader<ValueWithRecordId<T>> createReader(PipelineOptions options) {
-      return new Reader(source.createReader(options, null));
-    }
-
-    private class Reader extends BoundedReader<ValueWithRecordId<T>> {
-      private long recordsRead = 0L;
-      private Instant endTime = Instant.now().plus(maxReadTime);
-      private UnboundedSource.UnboundedReader<T> reader;
-
-      private Reader(UnboundedSource.UnboundedReader<T> reader) {
-        this.recordsRead = 0L;
-        if (maxReadTime != null) {
-          this.endTime = Instant.now().plus(maxReadTime);
-        } else {
-          this.endTime = null;
-        }
-        this.reader = reader;
-      }
-
-      @Override
-      public boolean start() throws IOException {
-        if (maxNumRecords <= 0 || (maxReadTime != null && maxReadTime.getMillis() == 0)) {
-          return false;
-        }
-
-        recordsRead++;
-        if (reader.start()) {
-          return true;
-        } else {
-          return advanceWithBackoff();
-        }
-      }
-
-      @Override
-      public boolean advance() throws IOException {
-        if (recordsRead >= maxNumRecords) {
-          finalizeCheckpoint();
-          return false;
-        }
-        recordsRead++;
-        return advanceWithBackoff();
-      }
-
-      private boolean advanceWithBackoff() throws IOException {
-        // Try reading from the source with exponential backoff
-        BackOff backoff = new IntervalBoundedExponentialBackOff(10000, 10);
-        long nextSleep = backoff.nextBackOffMillis();
-        while (nextSleep != BackOff.STOP) {
-          if (endTime != null && Instant.now().isAfter(endTime)) {
-            finalizeCheckpoint();
-            return false;
-          }
-          if (reader.advance()) {
-            return true;
-          }
-          try {
-            Thread.sleep(nextSleep);
-          } catch (InterruptedException e) {}
-          nextSleep = backoff.nextBackOffMillis();
-        }
-        finalizeCheckpoint();
-        return false;
-      }
-
-      private void finalizeCheckpoint() throws IOException {
-        reader.getCheckpointMark().finalizeCheckpoint();
-      }
-
-      @Override
-      public ValueWithRecordId<T> getCurrent() throws NoSuchElementException {
-        return new ValueWithRecordId<>(reader.getCurrent(), reader.getCurrentRecordId());
-      }
-
-      @Override
-      public Instant getCurrentTimestamp() throws NoSuchElementException {
-        return reader.getCurrentTimestamp();
-      }
-
-      @Override
-      public void close() throws IOException {
-        reader.close();
-      }
-
-      @Override
-      public BoundedSource<ValueWithRecordId<T>> getCurrentSource() {
-        return UnboundedToBoundedSourceAdapter.this;
-      }
-    }
-  }
-}

[36/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
deleted file mode 100644
index 527f712..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ProxyInvocationHandler.java
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.JsonIgnorePredicate;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory.Registration;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
-import com.google.common.base.Defaults;
-import com.google.common.base.Function;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ClassToInstanceMap;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-import com.google.common.collect.MutableClassToInstanceMap;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.DeserializationContext;
-import com.fasterxml.jackson.databind.JavaType;
-import com.fasterxml.jackson.databind.JsonDeserializer;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.JsonSerializer;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.SerializerProvider;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-import java.beans.PropertyDescriptor;
-import java.io.IOException;
-import java.lang.annotation.Annotation;
-import java.lang.reflect.InvocationHandler;
-import java.lang.reflect.Method;
-import java.lang.reflect.Proxy;
-import java.lang.reflect.Type;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * Represents and {@link InvocationHandler} for a {@link Proxy}. The invocation handler uses bean
- * introspection of the proxy class to store and retrieve values based off of the property name.
- *
- * <p>Unset properties use the {@code @Default} metadata on the getter to return values. If there
- * is no {@code @Default} annotation on the getter, then a <a
- * href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">default</a> as
- * per the Java Language Specification for the expected return type is returned.
- *
- * <p>In addition to the getter/setter pairs, this proxy invocation handler supports
- * {@link Object#equals(Object)}, {@link Object#hashCode()}, {@link Object#toString()} and
- * {@link PipelineOptions#as(Class)}.
- */
-@ThreadSafe
-class ProxyInvocationHandler implements InvocationHandler {
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-  /**
-   * No two instances of this class are considered equivalent hence we generate a random hash code
-   * between 0 and {@link Integer#MAX_VALUE}.
-   */
-  private final int hashCode = (int) (Math.random() * Integer.MAX_VALUE);
-  private final Set<Class<? extends PipelineOptions>> knownInterfaces;
-  private final ClassToInstanceMap<PipelineOptions> interfaceToProxyCache;
-  private final Map<String, Object> options;
-  private final Map<String, JsonNode> jsonOptions;
-  private final Map<String, String> gettersToPropertyNames;
-  private final Map<String, String> settersToPropertyNames;
-
-  ProxyInvocationHandler(Map<String, Object> options) {
-    this(options, Maps.<String, JsonNode>newHashMap());
-  }
-
-  private ProxyInvocationHandler(Map<String, Object> options, Map<String, JsonNode> jsonOptions) {
-    this.options = options;
-    this.jsonOptions = jsonOptions;
-    this.knownInterfaces = new HashSet<>(PipelineOptionsFactory.getRegisteredOptions());
-    gettersToPropertyNames = Maps.newHashMap();
-    settersToPropertyNames = Maps.newHashMap();
-    interfaceToProxyCache = MutableClassToInstanceMap.create();
-  }
-
-  @Override
-  public Object invoke(Object proxy, Method method, Object[] args) {
-    if (args == null && "toString".equals(method.getName())) {
-      return toString();
-    } else if (args != null && args.length == 1 && "equals".equals(method.getName())) {
-      return equals(args[0]);
-    } else if (args == null && "hashCode".equals(method.getName())) {
-      return hashCode();
-    } else if (args != null && "as".equals(method.getName()) && args[0] instanceof Class) {
-      @SuppressWarnings("unchecked")
-      Class<? extends PipelineOptions> clazz = (Class<? extends PipelineOptions>) args[0];
-      return as(clazz);
-    } else if (args != null && "cloneAs".equals(method.getName()) && args[0] instanceof Class) {
-      @SuppressWarnings("unchecked")
-      Class<? extends PipelineOptions> clazz = (Class<? extends PipelineOptions>) args[0];
-      return cloneAs(proxy, clazz);
-    }
-    String methodName = method.getName();
-    synchronized (this) {
-      if (gettersToPropertyNames.keySet().contains(methodName)) {
-        String propertyName = gettersToPropertyNames.get(methodName);
-        if (!options.containsKey(propertyName)) {
-          // Lazy bind the default to the method.
-          Object value = jsonOptions.containsKey(propertyName)
-              ? getValueFromJson(propertyName, method)
-              : getDefault((PipelineOptions) proxy, method);
-          options.put(propertyName, value);
-        }
-        return options.get(propertyName);
-      } else if (settersToPropertyNames.containsKey(methodName)) {
-        options.put(settersToPropertyNames.get(methodName), args[0]);
-        return Void.TYPE;
-      }
-    }
-    throw new RuntimeException("Unknown method [" + method + "] invoked with args ["
-        + Arrays.toString(args) + "].");
-  }
-
-  /**
-   * Backing implementation for {@link PipelineOptions#as(Class)}.
-   *
-   * @param iface The interface that the returned object needs to implement.
-   * @return An object that implements the interface <T>.
-   */
-  synchronized <T extends PipelineOptions> T as(Class<T> iface) {
-    Preconditions.checkNotNull(iface);
-    Preconditions.checkArgument(iface.isInterface());
-    if (!interfaceToProxyCache.containsKey(iface)) {
-      Registration<T> registration =
-          PipelineOptionsFactory.validateWellFormed(iface, knownInterfaces);
-      List<PropertyDescriptor> propertyDescriptors = registration.getPropertyDescriptors();
-      Class<T> proxyClass = registration.getProxyClass();
-      gettersToPropertyNames.putAll(generateGettersToPropertyNames(propertyDescriptors));
-      settersToPropertyNames.putAll(generateSettersToPropertyNames(propertyDescriptors));
-      knownInterfaces.add(iface);
-      interfaceToProxyCache.putInstance(iface,
-          InstanceBuilder.ofType(proxyClass)
-              .fromClass(proxyClass)
-              .withArg(InvocationHandler.class, this)
-              .build());
-    }
-    return interfaceToProxyCache.getInstance(iface);
-  }
-
-  /**
-   * Backing implementation for {@link PipelineOptions#cloneAs(Class)}.
-   *
-   * @return A copy of the PipelineOptions.
-   */
-  synchronized <T extends PipelineOptions> T cloneAs(Object proxy, Class<T> iface) {
-    PipelineOptions clonedOptions;
-    try {
-      clonedOptions = MAPPER.readValue(MAPPER.writeValueAsBytes(proxy), PipelineOptions.class);
-    } catch (IOException e) {
-      throw new IllegalStateException("Failed to serialize the pipeline options to JSON.", e);
-    }
-    for (Class<? extends PipelineOptions> knownIface : knownInterfaces) {
-      clonedOptions.as(knownIface);
-    }
-    return clonedOptions.as(iface);
-  }
-
-  /**
-   * Returns true if the other object is a ProxyInvocationHandler or is a Proxy object and has the
-   * same ProxyInvocationHandler as this.
-   *
-   * @param obj The object to compare against this.
-   * @return true iff the other object is a ProxyInvocationHandler or is a Proxy object and has the
-   *         same ProxyInvocationHandler as this.
-   */
-  @Override
-  public boolean equals(Object obj) {
-    return obj != null && ((obj instanceof ProxyInvocationHandler && this == obj)
-        || (Proxy.isProxyClass(obj.getClass()) && this == Proxy.getInvocationHandler(obj)));
-  }
-
-  /**
-   * Each instance of this ProxyInvocationHandler is unique and has a random hash code.
-   *
-   * @return A hash code that was generated randomly.
-   */
-  @Override
-  public int hashCode() {
-    return hashCode;
-  }
-
-  /**
-   * This will output all the currently set values. This is a relatively costly function
-   * as it will call {@code toString()} on each object that has been set and format
-   * the results in a readable format.
-   *
-   * @return A pretty printed string representation of this.
-   */
-  @Override
-  public synchronized String toString() {
-    SortedMap<String, Object> sortedOptions = new TreeMap<>();
-    // Add the options that we received from deserialization
-    sortedOptions.putAll(jsonOptions);
-    // Override with any programmatically set options.
-    sortedOptions.putAll(options);
-
-    StringBuilder b = new StringBuilder();
-    b.append("Current Settings:\n");
-    for (Map.Entry<String, Object> entry : sortedOptions.entrySet()) {
-      b.append("  " + entry.getKey() + ": " + entry.getValue() + "\n");
-    }
-    return b.toString();
-  }
-
-  /**
-   * Uses a Jackson {@link ObjectMapper} to attempt type conversion.
-   *
-   * @param method The method whose return type you would like to return.
-   * @param propertyName The name of the property that is being returned.
-   * @return An object matching the return type of the method passed in.
-   */
-  private Object getValueFromJson(String propertyName, Method method) {
-    try {
-      JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
-      JsonNode jsonNode = jsonOptions.get(propertyName);
-      return MAPPER.readValue(jsonNode.toString(), type);
-    } catch (IOException e) {
-      throw new RuntimeException("Unable to parse representation", e);
-    }
-  }
-
-  /**
-   * Returns a default value for the method based upon {@code @Default} metadata on the getter
-   * to return values. If there is no {@code @Default} annotation on the getter, then a <a
-   * href="https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html">default</a> as
-   * per the Java Language Specification for the expected return type is returned.
-   *
-   * @param proxy The proxy object for which we are attempting to get the default.
-   * @param method The getter method that was invoked.
-   * @return The default value from an {@link Default} annotation if present, otherwise a default
-   *         value as per the Java Language Specification.
-   */
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  private Object getDefault(PipelineOptions proxy, Method method) {
-    for (Annotation annotation : method.getAnnotations()) {
-      if (annotation instanceof Default.Class) {
-        return ((Default.Class) annotation).value();
-      } else if (annotation instanceof Default.String) {
-        return ((Default.String) annotation).value();
-      } else if (annotation instanceof Default.Boolean) {
-        return ((Default.Boolean) annotation).value();
-      } else if (annotation instanceof Default.Character) {
-        return ((Default.Character) annotation).value();
-      } else if (annotation instanceof Default.Byte) {
-        return ((Default.Byte) annotation).value();
-      } else if (annotation instanceof Default.Short) {
-        return ((Default.Short) annotation).value();
-      } else if (annotation instanceof Default.Integer) {
-        return ((Default.Integer) annotation).value();
-      } else if (annotation instanceof Default.Long) {
-        return ((Default.Long) annotation).value();
-      } else if (annotation instanceof Default.Float) {
-        return ((Default.Float) annotation).value();
-      } else if (annotation instanceof Default.Double) {
-        return ((Default.Double) annotation).value();
-      } else if (annotation instanceof Default.Enum) {
-        return Enum.valueOf((Class<Enum>) method.getReturnType(),
-            ((Default.Enum) annotation).value());
-      } else if (annotation instanceof Default.InstanceFactory) {
-        return InstanceBuilder.ofType(((Default.InstanceFactory) annotation).value())
-            .build()
-            .create(proxy);
-      }
-    }
-
-    /*
-     * We need to make sure that we return something appropriate for the return type. Thus we return
-     * a default value as defined by the JLS.
-     */
-    return Defaults.defaultValue(method.getReturnType());
-  }
-
-  /**
-   * Returns a map from the getters method name to the name of the property based upon the passed in
-   * {@link PropertyDescriptor}s property descriptors.
-   *
-   * @param propertyDescriptors A list of {@link PropertyDescriptor}s to use when generating the
-   *        map.
-   * @return A map of getter method name to property name.
-   */
-  private static Map<String, String> generateGettersToPropertyNames(
-      List<PropertyDescriptor> propertyDescriptors) {
-    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
-    for (PropertyDescriptor descriptor : propertyDescriptors) {
-      if (descriptor.getReadMethod() != null) {
-        builder.put(descriptor.getReadMethod().getName(), descriptor.getName());
-      }
-    }
-    return builder.build();
-  }
-
-  /**
-   * Returns a map from the setters method name to its matching getters method name based upon the
-   * passed in {@link PropertyDescriptor}s property descriptors.
-   *
-   * @param propertyDescriptors A list of {@link PropertyDescriptor}s to use when generating the
-   *        map.
-   * @return A map of setter method name to getter method name.
-   */
-  private static Map<String, String> generateSettersToPropertyNames(
-      List<PropertyDescriptor> propertyDescriptors) {
-    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
-    for (PropertyDescriptor descriptor : propertyDescriptors) {
-      if (descriptor.getWriteMethod() != null) {
-        builder.put(descriptor.getWriteMethod().getName(), descriptor.getName());
-      }
-    }
-    return builder.build();
-  }
-
-  static class Serializer extends JsonSerializer<PipelineOptions> {
-    @Override
-    public void serialize(PipelineOptions value, JsonGenerator jgen, SerializerProvider provider)
-        throws IOException, JsonProcessingException {
-      ProxyInvocationHandler handler = (ProxyInvocationHandler) Proxy.getInvocationHandler(value);
-      synchronized (handler) {
-        // We first filter out any properties that have been modified since
-        // the last serialization of this PipelineOptions and then verify that
-        // they are all serializable.
-        Map<String, Object> filteredOptions = Maps.newHashMap(handler.options);
-        removeIgnoredOptions(handler.knownInterfaces, filteredOptions);
-        ensureSerializable(handler.knownInterfaces, filteredOptions);
-
-        // Now we create the map of serializable options by taking the original
-        // set of serialized options (if any) and updating them with any properties
-        // instances that have been modified since the previous serialization.
-        Map<String, Object> serializableOptions =
-            Maps.<String, Object>newHashMap(handler.jsonOptions);
-        serializableOptions.putAll(filteredOptions);
-        jgen.writeStartObject();
-        jgen.writeFieldName("options");
-        jgen.writeObject(serializableOptions);
-        jgen.writeEndObject();
-      }
-    }
-
-    /**
-     * We remove all properties within the passed in options where there getter is annotated with
-     * {@link JsonIgnore @JsonIgnore} from the passed in options using the passed in interfaces.
-     */
-    private void removeIgnoredOptions(
-        Set<Class<? extends PipelineOptions>> interfaces, Map<String, Object> options) {
-      // Find all the method names that are annotated with JSON ignore.
-      Set<String> jsonIgnoreMethodNames = FluentIterable.from(
-          ReflectHelpers.getClosureOfMethodsOnInterfaces(interfaces))
-          .filter(JsonIgnorePredicate.INSTANCE).transform(new Function<Method, String>() {
-            @Override
-            public String apply(Method input) {
-              return input.getName();
-            }
-          }).toSet();
-
-      // Remove all options that have the same method name as the descriptor.
-      for (PropertyDescriptor descriptor
-          : PipelineOptionsFactory.getPropertyDescriptors(interfaces)) {
-        if (jsonIgnoreMethodNames.contains(descriptor.getReadMethod().getName())) {
-          options.remove(descriptor.getName());
-        }
-      }
-    }
-
-    /**
-     * We use an {@link ObjectMapper} to verify that the passed in options are serializable
-     * and deserializable.
-     */
-    private void ensureSerializable(Set<Class<? extends PipelineOptions>> interfaces,
-        Map<String, Object> options) throws IOException {
-      // Construct a map from property name to the return type of the getter.
-      Map<String, Type> propertyToReturnType = Maps.newHashMap();
-      for (PropertyDescriptor descriptor
-          : PipelineOptionsFactory.getPropertyDescriptors(interfaces)) {
-        if (descriptor.getReadMethod() != null) {
-          propertyToReturnType.put(descriptor.getName(),
-              descriptor.getReadMethod().getGenericReturnType());
-        }
-      }
-
-      // Attempt to serialize and deserialize each property.
-      for (Map.Entry<String, Object> entry : options.entrySet()) {
-        try {
-          String serializedValue = MAPPER.writeValueAsString(entry.getValue());
-          JavaType type = MAPPER.getTypeFactory()
-              .constructType(propertyToReturnType.get(entry.getKey()));
-          MAPPER.readValue(serializedValue, type);
-        } catch (Exception e) {
-          throw new IOException(String.format(
-              "Failed to serialize and deserialize property '%s' with value '%s'",
-              entry.getKey(), entry.getValue()), e);
-        }
-      }
-    }
-  }
-
-  static class Deserializer extends JsonDeserializer<PipelineOptions> {
-    @Override
-    public PipelineOptions deserialize(JsonParser jp, DeserializationContext ctxt)
-        throws IOException, JsonProcessingException {
-      ObjectNode objectNode = (ObjectNode) jp.readValueAsTree();
-      ObjectNode optionsNode = (ObjectNode) objectNode.get("options");
-
-      Map<String, JsonNode> fields = Maps.newHashMap();
-      for (Iterator<Map.Entry<String, JsonNode>> iterator = optionsNode.fields();
-          iterator.hasNext(); ) {
-        Map.Entry<String, JsonNode> field = iterator.next();
-        fields.put(field.getKey(), field.getValue());
-      }
-      PipelineOptions options =
-          new ProxyInvocationHandler(Maps.<String, Object>newHashMap(), fields)
-              .as(PipelineOptions.class);
-      return options;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
deleted file mode 100644
index 9563c58..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/StreamingOptions.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-/**
- * Options used to configure streaming.
- */
-public interface StreamingOptions extends
-    ApplicationNameOptions, GcpOptions, PipelineOptions {
-  /**
-   * Set to true if running a streaming pipeline.
-   */
-  @Description("Set to true if running a streaming pipeline.")
-  boolean isStreaming();
-  void setStreaming(boolean value);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
deleted file mode 100644
index 20034f8..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Validation.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-/**
- * {@link Validation} represents a set of annotations that can be used to annotate getter
- * properties on {@link PipelineOptions} with information representing the validation criteria to
- * be used when validating with the {@link PipelineOptionsValidator}.
- */
-public @interface Validation {
-  /**
-   * This criteria specifies that the value must be not null. Note that this annotation
-   * should only be applied to methods that return nullable objects.
-   */
-  @Target(value = ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Required {
-    /**
-     * The groups that the annotated attribute is a member of. A member can be in 0 or more groups.
-     * Members not in any groups are considered to be in a group consisting exclusively of
-     * themselves. At least one member of a group must be non-null if the options are to be valid.
-     */
-    String[] groups() default {};
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
deleted file mode 100644
index cef995f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines {@link com.google.cloud.dataflow.sdk.options.PipelineOptions} for
- * configuring pipeline execution.
- *
- * <p>{@link com.google.cloud.dataflow.sdk.options.PipelineOptions} encapsulates the various
- * parameters that describe how a pipeline should be run. {@code PipelineOptions} are created
- * using a {@link com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory}.
- */
-package com.google.cloud.dataflow.sdk.options;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
deleted file mode 100644
index 5567f03..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/package-info.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Provides a simple, powerful model for building both batch and
- * streaming parallel data processing
- * {@link com.google.cloud.dataflow.sdk.Pipeline}s.
- *
- * <p>To use the Google Cloud Dataflow SDK, you build a
- * {@link com.google.cloud.dataflow.sdk.Pipeline}, which manages a graph of
- * {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s
- * and the {@link com.google.cloud.dataflow.sdk.values.PCollection}s that
- * the PTransforms consume and produce.
- *
- * <p>Each Pipeline has a
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner} to specify
- * where and how it should run after pipeline construction is complete.
- *
- */
-package com.google.cloud.dataflow.sdk;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
deleted file mode 100644
index ab87f2e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorPipelineExtractor.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.AggregatorRetriever;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.SetMultimap;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Map;
-
-/**
- * Retrieves {@link Aggregator Aggregators} at each {@link ParDo} and returns a {@link Map} of
- * {@link Aggregator} to the {@link PTransform PTransforms} in which it is present.
- */
-public class AggregatorPipelineExtractor {
-  private final Pipeline pipeline;
-
-  /**
-   * Creates an {@code AggregatorPipelineExtractor} for the given {@link Pipeline}.
-   */
-  public AggregatorPipelineExtractor(Pipeline pipeline) {
-    this.pipeline = pipeline;
-  }
-
-  /**
-   * Returns a {@link Map} between each {@link Aggregator} in the {@link Pipeline} to the {@link
-   * PTransform PTransforms} in which it is used.
-   */
-  public Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> getAggregatorSteps() {
-    HashMultimap<Aggregator<?, ?>, PTransform<?, ?>> aggregatorSteps = HashMultimap.create();
-    pipeline.traverseTopologically(new AggregatorVisitor(aggregatorSteps));
-    return aggregatorSteps.asMap();
-  }
-
-  private static class AggregatorVisitor implements PipelineVisitor {
-    private final SetMultimap<Aggregator<?, ?>, PTransform<?, ?>> aggregatorSteps;
-
-    public AggregatorVisitor(SetMultimap<Aggregator<?, ?>, PTransform<?, ?>> aggregatorSteps) {
-      this.aggregatorSteps = aggregatorSteps;
-    }
-
-    @Override
-    public void enterCompositeTransform(TransformTreeNode node) {}
-
-    @Override
-    public void leaveCompositeTransform(TransformTreeNode node) {}
-
-    @Override
-    public void visitTransform(TransformTreeNode node) {
-      PTransform<?, ?> transform = node.getTransform();
-      addStepToAggregators(transform, getAggregators(transform));
-    }
-
-    private Collection<Aggregator<?, ?>> getAggregators(PTransform<?, ?> transform) {
-      if (transform != null) {
-        if (transform instanceof ParDo.Bound) {
-          return AggregatorRetriever.getAggregators(((ParDo.Bound<?, ?>) transform).getFn());
-        } else if (transform instanceof ParDo.BoundMulti) {
-          return AggregatorRetriever.getAggregators(((ParDo.BoundMulti<?, ?>) transform).getFn());
-        }
-      }
-      return Collections.emptyList();
-    }
-
-    private void addStepToAggregators(
-        PTransform<?, ?> transform, Collection<Aggregator<?, ?>> aggregators) {
-      for (Aggregator<?, ?> aggregator : aggregators) {
-        aggregatorSteps.put(aggregator, transform);
-      }
-    }
-
-    @Override
-    public void visitValue(PValue value, TransformTreeNode producer) {}
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
deleted file mode 100644
index 90162ad..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorRetrievalException.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-
-/**
- * Signals that an exception has occurred while retrieving {@link Aggregator}s.
- */
-public class AggregatorRetrievalException extends Exception {
-  /**
-   * Constructs a new {@code AggregatorRetrievalException} with the specified detail message and
-   * cause.
-   */
-  public AggregatorRetrievalException(String message, Throwable cause) {
-    super(message, cause);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java
deleted file mode 100644
index 21f0282..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/AggregatorValues.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-
-import java.util.Collection;
-import java.util.Map;
-
-/**
- * A collection of values associated with an {@link Aggregator}. Aggregators declared in a
- * {@link DoFn} are emitted on a per-{@code DoFn}-application basis.
- *
- * @param <T> the output type of the aggregator
- */
-public abstract class AggregatorValues<T> {
-  /**
-   * Get the values of the {@link Aggregator} at all steps it was used.
-   */
-  public Collection<T> getValues() {
-    return getValuesAtSteps().values();
-  }
-
-  /**
-   * Get the values of the {@link Aggregator} by the user name at each step it was used.
-   */
-  public abstract Map<String, T> getValuesAtSteps();
-
-  /**
-   * Get the total value of this {@link Aggregator} by applying the specified {@link CombineFn}.
-   */
-  public T getTotalValue(CombineFn<T, ?, T> combineFn) {
-    return combineFn.apply(getValues());
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
deleted file mode 100644
index 95e3dfe..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/BlockingDataflowPipelineRunner.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult.State;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link PipelineRunner} that's like {@link DataflowPipelineRunner}
- * but that waits for the launched job to finish.
- *
- * <p>Prints out job status updates and console messages while it waits.
- *
- * <p>Returns the final job state, or throws an exception if the job
- * fails or cannot be monitored.
- *
- * <p><h3>Permissions</h3>
- * When reading from a Dataflow source or writing to a Dataflow sink using
- * {@code BlockingDataflowPipelineRunner}, the Google cloud services account and the Google compute
- * engine service account of the GCP project running the Dataflow Job will need access to the
- * corresponding source/sink.
- *
- * <p>Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
- * Dataflow Security and Permissions</a> for more details.
- */
-public class BlockingDataflowPipelineRunner extends
-    PipelineRunner<DataflowPipelineJob> {
-  private static final Logger LOG = LoggerFactory.getLogger(BlockingDataflowPipelineRunner.class);
-
-  // Defaults to an infinite wait period.
-  // TODO: make this configurable after removal of option map.
-  private static final long BUILTIN_JOB_TIMEOUT_SEC = -1L;
-
-  private final DataflowPipelineRunner dataflowPipelineRunner;
-  private final BlockingDataflowPipelineOptions options;
-
-  protected BlockingDataflowPipelineRunner(
-      DataflowPipelineRunner internalRunner,
-      BlockingDataflowPipelineOptions options) {
-    this.dataflowPipelineRunner = internalRunner;
-    this.options = options;
-  }
-
-  /**
-   * Constructs a runner from the provided options.
-   */
-  public static BlockingDataflowPipelineRunner fromOptions(
-      PipelineOptions options) {
-    BlockingDataflowPipelineOptions dataflowOptions =
-        PipelineOptionsValidator.validate(BlockingDataflowPipelineOptions.class, options);
-    DataflowPipelineRunner dataflowPipelineRunner =
-        DataflowPipelineRunner.fromOptions(dataflowOptions);
-
-    return new BlockingDataflowPipelineRunner(dataflowPipelineRunner, dataflowOptions);
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws DataflowJobExecutionException if there is an exception during job execution.
-   * @throws DataflowServiceException if there is an exception retrieving information about the job.
-   */
-  @Override
-  public DataflowPipelineJob run(Pipeline p) {
-    final DataflowPipelineJob job = dataflowPipelineRunner.run(p);
-
-    // We ignore the potential race condition here (Ctrl-C after job submission but before the
-    // shutdown hook is registered). Even if we tried to do something smarter (eg., SettableFuture)
-    // the run method (which produces the job) could fail or be Ctrl-C'd before it had returned a
-    // job. The display of the command to cancel the job is best-effort anyways -- RPC's could fail,
-    // etc. If the user wants to verify the job was cancelled they should look at the job status.
-    Thread shutdownHook = new Thread() {
-      @Override
-      public void run() {
-        LOG.warn("Job is already running in Google Cloud Platform, Ctrl-C will not cancel it.\n"
-            + "To cancel the job in the cloud, run:\n> {}",
-            MonitoringUtil.getGcloudCancelCommand(options, job.getJobId()));
-      }
-    };
-
-    try {
-      Runtime.getRuntime().addShutdownHook(shutdownHook);
-
-      @Nullable
-      State result;
-      try {
-        result = job.waitToFinish(
-            BUILTIN_JOB_TIMEOUT_SEC, TimeUnit.SECONDS,
-            new MonitoringUtil.PrintHandler(options.getJobMessageOutput()));
-      } catch (IOException | InterruptedException ex) {
-        LOG.debug("Exception caught while retrieving status for job {}", job.getJobId(), ex);
-        throw new DataflowServiceException(
-            job, "Exception caught while retrieving status for job " + job.getJobId(), ex);
-      }
-
-      if (result == null) {
-        throw new DataflowServiceException(
-            job, "Timed out while retrieving status for job " + job.getJobId());
-      }
-
-      LOG.info("Job finished with status {}", result);
-      if (!result.isTerminal()) {
-        throw new IllegalStateException("Expected terminal state for job " + job.getJobId()
-            + ", got " + result);
-      }
-
-      if (result == State.DONE) {
-        return job;
-      } else if (result == State.UPDATED) {
-        DataflowPipelineJob newJob = job.getReplacedByJob();
-        LOG.info("Job {} has been updated and is running as the new job with id {}."
-            + "To access the updated job on the Dataflow monitoring console, please navigate to {}",
-            job.getJobId(),
-            newJob.getJobId(),
-            MonitoringUtil.getJobMonitoringPageURL(newJob.getProjectId(), newJob.getJobId()));
-        throw new DataflowJobUpdatedException(
-            job,
-            String.format("Job %s updated; new job is %s.", job.getJobId(), newJob.getJobId()),
-            newJob);
-      } else if (result == State.CANCELLED) {
-        String message = String.format("Job %s cancelled by user", job.getJobId());
-        LOG.info(message);
-        throw new DataflowJobCancelledException(job, message);
-      } else {
-        throw new DataflowJobExecutionException(job, "Job " + job.getJobId()
-            + " failed with status " + result);
-      }
-    } finally {
-      Runtime.getRuntime().removeShutdownHook(shutdownHook);
-    }
-  }
-
-  @Override
-  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
-      PTransform<InputT, OutputT> transform, InputT input) {
-    return dataflowPipelineRunner.apply(transform, input);
-  }
-
-  /**
-   * Sets callbacks to invoke during execution. See {@link DataflowPipelineRunnerHooks}.
-   */
-  @Experimental
-  public void setHooks(DataflowPipelineRunnerHooks hooks) {
-    this.dataflowPipelineRunner.setHooks(hooks);
-  }
-
-  @Override
-  public String toString() {
-    return "BlockingDataflowPipelineRunner#" + options.getJobName();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
deleted file mode 100644
index 1547f73..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyExistsException.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-/**
- * An exception that is thrown if the unique job name constraint of the Dataflow
- * service is broken because an existing job with the same job name is currently active.
- * The {@link DataflowPipelineJob} contained within this exception contains information
- * about the pre-existing job.
- */
-public class DataflowJobAlreadyExistsException extends DataflowJobException {
-  /**
-   * Create a new {@code DataflowJobAlreadyExistsException} with the specified {@link
-   * DataflowPipelineJob} and message.
-   */
-  public DataflowJobAlreadyExistsException(
-      DataflowPipelineJob job, String message) {
-    super(job, message, null);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
deleted file mode 100644
index d4ae4f5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobAlreadyUpdatedException.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-/**
- * An exception that is thrown if the existing job has already been updated within the Dataflow
- * service and is no longer able to be updated. The {@link DataflowPipelineJob} contained within
- * this exception contains information about the pre-existing updated job.
- */
-public class DataflowJobAlreadyUpdatedException extends DataflowJobException {
-  /**
-   * Create a new {@code DataflowJobAlreadyUpdatedException} with the specified {@link
-   * DataflowPipelineJob} and message.
-   */
-  public DataflowJobAlreadyUpdatedException(
-      DataflowPipelineJob job, String message) {
-    super(job, message, null);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
deleted file mode 100644
index 0d31726..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobCancelledException.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-/**
- * Signals that a job run by a {@link BlockingDataflowPipelineRunner} was updated during execution.
- */
-public class DataflowJobCancelledException extends DataflowJobException {
-  /**
-   * Create a new {@code DataflowJobAlreadyUpdatedException} with the specified {@link
-   * DataflowPipelineJob} and message.
-   */
-  public DataflowJobCancelledException(DataflowPipelineJob job, String message) {
-    super(job, message, null);
-  }
-
-  /**
-   * Create a new {@code DataflowJobAlreadyUpdatedException} with the specified {@link
-   * DataflowPipelineJob}, message, and cause.
-   */
-  public DataflowJobCancelledException(DataflowPipelineJob job, String message, Throwable cause) {
-    super(job, message, cause);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
deleted file mode 100644
index 9e305d5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobException.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link RuntimeException} that contains information about a {@link DataflowPipelineJob}.
- */
-public abstract class DataflowJobException extends RuntimeException {
-  private final DataflowPipelineJob job;
-
-  DataflowJobException(DataflowPipelineJob job, String message, @Nullable Throwable cause) {
-    super(message, cause);
-    this.job = Objects.requireNonNull(job);
-  }
-
-  /**
-   * Returns the failed job.
-   */
-  public DataflowPipelineJob getJob() {
-    return job;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
deleted file mode 100644
index ae6df0f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobExecutionException.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import javax.annotation.Nullable;
-
-/**
- * Signals that a job run by a {@link BlockingDataflowPipelineRunner} fails during execution, and
- * provides access to the failed job.
- */
-public class DataflowJobExecutionException extends DataflowJobException {
-  DataflowJobExecutionException(DataflowPipelineJob job, String message) {
-    this(job, message, null);
-  }
-
-  DataflowJobExecutionException(
-      DataflowPipelineJob job, String message, @Nullable Throwable cause) {
-    super(job, message, cause);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
deleted file mode 100644
index 1becdd7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowJobUpdatedException.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-/**
- * Signals that a job run by a {@link BlockingDataflowPipelineRunner} was updated during execution.
- */
-public class DataflowJobUpdatedException extends DataflowJobException {
-  private DataflowPipelineJob replacedByJob;
-
-  /**
-   * Create a new {@code DataflowJobUpdatedException} with the specified original {@link
-   * DataflowPipelineJob}, message, and replacement {@link DataflowPipelineJob}.
-   */
-  public DataflowJobUpdatedException(
-      DataflowPipelineJob job, String message, DataflowPipelineJob replacedByJob) {
-    this(job, message, replacedByJob, null);
-  }
-
-  /**
-   * Create a new {@code DataflowJobUpdatedException} with the specified original {@link
-   * DataflowPipelineJob}, message, replacement {@link DataflowPipelineJob}, and cause.
-   */
-  public DataflowJobUpdatedException(
-      DataflowPipelineJob job, String message, DataflowPipelineJob replacedByJob, Throwable cause) {
-    super(job, message, cause);
-    this.replacedByJob = replacedByJob;
-  }
-
-  /**
-   * The new job that replaces the job terminated with this exception.
-   */
-  public DataflowPipelineJob getReplacedByJob() {
-    return replacedByJob;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
deleted file mode 100644
index 5a78624..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipeline.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-
-/**
- * A {@link DataflowPipeline} is a {@link Pipeline} that returns a
- * {@link DataflowPipelineJob} when it is
- * {@link com.google.cloud.dataflow.sdk.Pipeline#run()}.
- *
- * <p>This is not intended for use by users of Cloud Dataflow.
- * Instead, use {@link Pipeline#create(PipelineOptions)} to initialize a
- * {@link Pipeline}.
- */
-public class DataflowPipeline extends Pipeline {
-
-  /**
-   * Creates and returns a new {@link DataflowPipeline} instance for tests.
-   */
-  public static DataflowPipeline create(DataflowPipelineOptions options) {
-    return new DataflowPipeline(options);
-  }
-
-  private DataflowPipeline(DataflowPipelineOptions options) {
-    super(DataflowPipelineRunner.fromOptions(options), options);
-  }
-
-  @Override
-  public DataflowPipelineJob run() {
-    return (DataflowPipelineJob) super.run();
-  }
-
-  @Override
-  public DataflowPipelineRunner getRunner() {
-    return (DataflowPipelineRunner) super.getRunner();
-  }
-
-  @Override
-  public String toString() {
-    return "DataflowPipeline#" + getOptions().as(DataflowPipelineOptions.class).getJobName();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
deleted file mode 100644
index e9f134c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineJob.java
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners;
-
-import static com.google.cloud.dataflow.sdk.util.TimeUtil.fromCloudTime;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.NanoClock;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.JobMessage;
-import com.google.api.services.dataflow.model.JobMetrics;
-import com.google.api.services.dataflow.model.MetricUpdate;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
-import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowMetricUpdateExtractor;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.util.AttemptAndTimeBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.AttemptBoundedExponentialBackOff;
-import com.google.cloud.dataflow.sdk.util.MapAggregatorValues;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Throwables;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.net.SocketTimeoutException;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.Nullable;
-
-/**
- * A DataflowPipelineJob represents a job submitted to Dataflow using
- * {@link DataflowPipelineRunner}.
- */
-public class DataflowPipelineJob implements PipelineResult {
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineJob.class);
-
-  /**
-   * The id for the job.
-   */
-  private String jobId;
-
-  /**
-   * Google cloud project to associate this pipeline with.
-   */
-  private String projectId;
-
-  /**
-   * Client for the Dataflow service. This can be used to query the service
-   * for information about the job.
-   */
-  private Dataflow dataflowClient;
-
-  /**
-   * The state the job terminated in or {@code null} if the job has not terminated.
-   */
-  @Nullable
-  private State terminalState = null;
-
-  /**
-   * The job that replaced this one or {@code null} if the job has not been replaced.
-   */
-  @Nullable
-  private DataflowPipelineJob replacedByJob = null;
-
-  private DataflowAggregatorTransforms aggregatorTransforms;
-
-  /**
-   * The Metric Updates retrieved after the job was in a terminal state.
-   */
-  private List<MetricUpdate> terminalMetricUpdates;
-
-  /**
-   * The polling interval for job status and messages information.
-   */
-  static final long MESSAGES_POLLING_INTERVAL = TimeUnit.SECONDS.toMillis(2);
-  static final long STATUS_POLLING_INTERVAL = TimeUnit.SECONDS.toMillis(2);
-
-  /**
-   * The amount of polling attempts for job status and messages information.
-   */
-  static final int MESSAGES_POLLING_ATTEMPTS = 10;
-  static final int STATUS_POLLING_ATTEMPTS = 5;
-
-  /**
-   * Constructs the job.
-   *
-   * @param projectId the project id
-   * @param jobId the job id
-   * @param dataflowClient the client for the Dataflow Service
-   */
-  public DataflowPipelineJob(String projectId, String jobId, Dataflow dataflowClient,
-      DataflowAggregatorTransforms aggregatorTransforms) {
-    this.projectId = projectId;
-    this.jobId = jobId;
-    this.dataflowClient = dataflowClient;
-    this.aggregatorTransforms = aggregatorTransforms;
-  }
-
-  /**
-   * Get the id of this job.
-   */
-  public String getJobId() {
-    return jobId;
-  }
-
-  /**
-   * Get the project this job exists in.
-   */
-  public String getProjectId() {
-    return projectId;
-  }
-
-  /**
-   * Returns a new {@link DataflowPipelineJob} for the job that replaced this one, if applicable.
-   *
-   * @throws IllegalStateException if called before the job has terminated or if the job terminated
-   * but was not updated
-   */
-  public DataflowPipelineJob getReplacedByJob() {
-    if (terminalState == null) {
-      throw new IllegalStateException("getReplacedByJob() called before job terminated");
-    }
-    if (replacedByJob == null) {
-      throw new IllegalStateException("getReplacedByJob() called for job that was not replaced");
-    }
-    return replacedByJob;
-  }
-
-  /**
-   * Get the Cloud Dataflow API Client used by this job.
-   */
-  public Dataflow getDataflowClient() {
-    return dataflowClient;
-  }
-
-  /**
-   * Waits for the job to finish and return the final status.
-   *
-   * @param timeToWait The time to wait in units timeUnit for the job to finish.
-   *     Provide a value less than 1 ms for an infinite wait.
-   * @param timeUnit The unit of time for timeToWait.
-   * @param messageHandler If non null this handler will be invoked for each
-   *   batch of messages received.
-   * @return The final state of the job or null on timeout or if the
-   *   thread is interrupted.
-   * @throws IOException If there is a persistent problem getting job
-   *   information.
-   * @throws InterruptedException
-   */
-  @Nullable
-  public State waitToFinish(
-      long timeToWait,
-      TimeUnit timeUnit,
-      MonitoringUtil.JobMessagesHandler messageHandler)
-          throws IOException, InterruptedException {
-    return waitToFinish(timeToWait, timeUnit, messageHandler, Sleeper.DEFAULT, NanoClock.SYSTEM);
-  }
-
-  /**
-   * Wait for the job to finish and return the final status.
-   *
-   * @param timeToWait The time to wait in units timeUnit for the job to finish.
-   *     Provide a value less than 1 ms for an infinite wait.
-   * @param timeUnit The unit of time for timeToWait.
-   * @param messageHandler If non null this handler will be invoked for each
-   *   batch of messages received.
-   * @param sleeper A sleeper to use to sleep between attempts.
-   * @param nanoClock A nanoClock used to time the total time taken.
-   * @return The final state of the job or null on timeout or if the
-   *   thread is interrupted.
-   * @throws IOException If there is a persistent problem getting job
-   *   information.
-   * @throws InterruptedException
-   */
-  @Nullable
-  @VisibleForTesting
-  State waitToFinish(
-      long timeToWait,
-      TimeUnit timeUnit,
-      MonitoringUtil.JobMessagesHandler messageHandler,
-      Sleeper sleeper,
-      NanoClock nanoClock)
-          throws IOException, InterruptedException {
-    MonitoringUtil monitor = new MonitoringUtil(projectId, dataflowClient);
-
-    long lastTimestamp = 0;
-    BackOff backoff =
-        timeUnit.toMillis(timeToWait) > 0
-            ? new AttemptAndTimeBoundedExponentialBackOff(
-                MESSAGES_POLLING_ATTEMPTS,
-                MESSAGES_POLLING_INTERVAL,
-                timeUnit.toMillis(timeToWait),
-                AttemptAndTimeBoundedExponentialBackOff.ResetPolicy.ATTEMPTS,
-                nanoClock)
-            : new AttemptBoundedExponentialBackOff(
-                MESSAGES_POLLING_ATTEMPTS, MESSAGES_POLLING_INTERVAL);
-    State state;
-    do {
-      // Get the state of the job before listing messages. This ensures we always fetch job
-      // messages after the job finishes to ensure we have all them.
-      state = getStateWithRetries(1, sleeper);
-      boolean hasError = state == State.UNKNOWN;
-
-      if (messageHandler != null && !hasError) {
-        // Process all the job messages that have accumulated so far.
-        try {
-          List<JobMessage> allMessages = monitor.getJobMessages(
-              jobId, lastTimestamp);
-
-          if (!allMessages.isEmpty()) {
-            lastTimestamp =
-                fromCloudTime(allMessages.get(allMessages.size() - 1).getTime()).getMillis();
-            messageHandler.process(allMessages);
-          }
-        } catch (GoogleJsonResponseException | SocketTimeoutException e) {
-          hasError = true;
-          LOG.warn("There were problems getting current job messages: {}.", e.getMessage());
-          LOG.debug("Exception information:", e);
-        }
-      }
-
-      if (!hasError) {
-        backoff.reset();
-        // Check if the job is done.
-        if (state.isTerminal()) {
-          return state;
-        }
-      }
-    } while(BackOffUtils.next(sleeper, backoff));
-    LOG.warn("No terminal state was returned.  State value {}", state);
-    return null;  // Timed out.
-  }
-
-  /**
-   * Cancels the job.
-   * @throws IOException if there is a problem executing the cancel request.
-   */
-  public void cancel() throws IOException {
-    Job content = new Job();
-    content.setProjectId(projectId);
-    content.setId(jobId);
-    content.setRequestedState("JOB_STATE_CANCELLED");
-    dataflowClient.projects().jobs()
-        .update(projectId, jobId, content)
-        .execute();
-  }
-
-  @Override
-  public State getState() {
-    if (terminalState != null) {
-      return terminalState;
-    }
-
-    return getStateWithRetries(STATUS_POLLING_ATTEMPTS, Sleeper.DEFAULT);
-  }
-
-  /**
-   * Attempts to get the state. Uses exponential backoff on failure up to the maximum number
-   * of passed in attempts.
-   *
-   * @param attempts The amount of attempts to make.
-   * @param sleeper Object used to do the sleeps between attempts.
-   * @return The state of the job or State.UNKNOWN in case of failure.
-   */
-  @VisibleForTesting
-  State getStateWithRetries(int attempts, Sleeper sleeper) {
-    if (terminalState != null) {
-      return terminalState;
-    }
-    try {
-      Job job = getJobWithRetries(attempts, sleeper);
-      return MonitoringUtil.toState(job.getCurrentState());
-    } catch (IOException exn) {
-      // The only IOException that getJobWithRetries is permitted to throw is the final IOException
-      // that caused the failure of retry. Other exceptions are wrapped in an unchecked exceptions
-      // and will propagate.
-      return State.UNKNOWN;
-    }
-  }
-
-  /**
-   * Attempts to get the underlying {@link Job}. Uses exponential backoff on failure up to the
-   * maximum number of passed in attempts.
-   *
-   * @param attempts The amount of attempts to make.
-   * @param sleeper Object used to do the sleeps between attempts.
-   * @return The underlying {@link Job} object.
-   * @throws IOException When the maximum number of retries is exhausted, the last exception is
-   * thrown.
-   */
-  @VisibleForTesting
-  Job getJobWithRetries(int attempts, Sleeper sleeper) throws IOException {
-    AttemptBoundedExponentialBackOff backoff =
-        new AttemptBoundedExponentialBackOff(attempts, STATUS_POLLING_INTERVAL);
-
-    // Retry loop ends in return or throw
-    while (true) {
-      try {
-        Job job = dataflowClient
-            .projects()
-            .jobs()
-            .get(projectId, jobId)
-            .execute();
-        State currentState = MonitoringUtil.toState(job.getCurrentState());
-        if (currentState.isTerminal()) {
-          terminalState = currentState;
-          replacedByJob = new DataflowPipelineJob(
-              getProjectId(), job.getReplacedByJobId(), dataflowClient, aggregatorTransforms);
-        }
-        return job;
-      } catch (IOException exn) {
-        LOG.warn("There were problems getting current job status: {}.", exn.getMessage());
-        LOG.debug("Exception information:", exn);
-
-        if (!nextBackOff(sleeper, backoff)) {
-          throw exn;
-        }
-      }
-    }
-  }
-
-  /**
-   * Identical to {@link BackOffUtils#next} but without checked exceptions.
-   */
-  private boolean nextBackOff(Sleeper sleeper, BackOff backoff) {
-    try {
-      return BackOffUtils.next(sleeper, backoff);
-    } catch (InterruptedException | IOException e) {
-      throw Throwables.propagate(e);
-    }
-  }
-
-  @Override
-  public <OutputT> AggregatorValues<OutputT> getAggregatorValues(Aggregator<?, OutputT> aggregator)
-      throws AggregatorRetrievalException {
-    try {
-      return new MapAggregatorValues<>(fromMetricUpdates(aggregator));
-    } catch (IOException e) {
-      throw new AggregatorRetrievalException(
-          "IOException when retrieving Aggregator values for Aggregator " + aggregator, e);
-    }
-  }
-
-  private <OutputT> Map<String, OutputT> fromMetricUpdates(Aggregator<?, OutputT> aggregator)
-      throws IOException {
-    if (aggregatorTransforms.contains(aggregator)) {
-      List<MetricUpdate> metricUpdates;
-      if (terminalMetricUpdates != null) {
-        metricUpdates = terminalMetricUpdates;
-      } else {
-        boolean terminal = getState().isTerminal();
-        JobMetrics jobMetrics =
-            dataflowClient.projects().jobs().getMetrics(projectId, jobId).execute();
-        metricUpdates = jobMetrics.getMetrics();
-        if (terminal && jobMetrics.getMetrics() != null) {
-          terminalMetricUpdates = metricUpdates;
-        }
-      }
-
-      return DataflowMetricUpdateExtractor.fromMetricUpdates(
-          aggregator, aggregatorTransforms, metricUpdates);
-    } else {
-      throw new IllegalArgumentException(
-          "Aggregator " + aggregator + " is not used in this pipeline");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
deleted file mode 100644
index 0e4d4e9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRegistrar.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import com.google.auto.service.AutoService;
-import com.google.cloud.dataflow.sdk.options.BlockingDataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar;
-import com.google.common.collect.ImmutableList;
-
-/**
- * Contains the {@link PipelineOptionsRegistrar} and {@link PipelineRunnerRegistrar} for
- * the {@link DataflowPipeline}.
- */
-public class DataflowPipelineRegistrar {
-  private DataflowPipelineRegistrar() { }
-
-  /**
-   * Register the {@link DataflowPipelineOptions} and {@link BlockingDataflowPipelineOptions}.
-   */
-  @AutoService(PipelineOptionsRegistrar.class)
-  public static class Options implements PipelineOptionsRegistrar {
-    @Override
-    public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
-      return ImmutableList.<Class<? extends PipelineOptions>>of(
-          DataflowPipelineOptions.class,
-          BlockingDataflowPipelineOptions.class);
-    }
-  }
-
-  /**
-   * Register the {@link DataflowPipelineRunner} and {@link BlockingDataflowPipelineRunner}.
-   */
-  @AutoService(PipelineRunnerRegistrar.class)
-  public static class Runner implements PipelineRunnerRegistrar {
-    @Override
-    public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
-      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(
-          DataflowPipelineRunner.class,
-          BlockingDataflowPipelineRunner.class);
-    }
-  }
-}

[61/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
deleted file mode 100644
index 4185376..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/LeaderBoard.java
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
-import com.google.cloud.dataflow.examples.complete.game.utils.WriteWindowedToBigQuery;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.joda.time.DateTimeZone;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TimeZone;
-
-/**
- * This class is the third in a series of four pipelines that tell a story in a 'gaming' domain,
- * following {@link UserScore} and {@link HourlyTeamScore}. Concepts include: processing unbounded
- * data using fixed windows; use of custom timestamps and event-time processing; generation of
- * early/speculative results; using .accumulatingFiredPanes() to do cumulative processing of late-
- * arriving data.
- *
- * <p> This pipeline processes an unbounded stream of 'game events'. The calculation of the team
- * scores uses fixed windowing based on event time (the time of the game play event), not
- * processing time (the time that an event is processed by the pipeline). The pipeline calculates
- * the sum of scores per team, for each window. By default, the team scores are calculated using
- * one-hour windows.
- *
- * <p> In contrast-- to demo another windowing option-- the user scores are calculated using a
- * global window, which periodically (every ten minutes) emits cumulative user score sums.
- *
- * <p> In contrast to the previous pipelines in the series, which used static, finite input data,
- * here we're using an unbounded data source, which lets us provide speculative results, and allows
- * handling of late data, at much lower latency. We can use the early/speculative results to keep a
- * 'leaderboard' updated in near-realtime. Our handling of late data lets us generate correct
- * results, e.g. for 'team prizes'. We're now outputing window results as they're
- * calculated, giving us much lower latency than with the previous batch examples.
- *
- * <p> Run {@link injector.Injector} to generate pubsub data for this pipeline.  The Injector
- * documentation provides more detail on how to do this.
- *
- * <p> To execute this pipeline using the Dataflow service, specify the pipeline configuration
- * like this:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- *   --dataset=YOUR-DATASET
- *   --topic=projects/YOUR-PROJECT/topics/YOUR-TOPIC
- * }
- * </pre>
- * where the BigQuery dataset you specify must already exist.
- * The PubSub topic you specify should be the same topic to which the Injector is publishing.
- */
-public class LeaderBoard extends HourlyTeamScore {
-
-  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
-
-  private static DateTimeFormatter fmt =
-      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
-          .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
-  static final Duration FIVE_MINUTES = Duration.standardMinutes(5);
-  static final Duration TEN_MINUTES = Duration.standardMinutes(10);
-
-
-  /**
-   * Options supported by {@link LeaderBoard}.
-   */
-  static interface Options extends HourlyTeamScore.Options, DataflowExampleOptions {
-
-    @Description("Pub/Sub topic to read from")
-    @Validation.Required
-    String getTopic();
-    void setTopic(String value);
-
-    @Description("Numeric value of fixed window duration for team analysis, in minutes")
-    @Default.Integer(60)
-    Integer getTeamWindowDuration();
-    void setTeamWindowDuration(Integer value);
-
-    @Description("Numeric value of allowed data lateness, in minutes")
-    @Default.Integer(120)
-    Integer getAllowedLateness();
-    void setAllowedLateness(Integer value);
-
-    @Description("Prefix used for the BigQuery table names")
-    @Default.String("leaderboard")
-    String getTableName();
-    void setTableName(String value);
-  }
-
-  /**
-   * Create a map of information that describes how to write pipeline output to BigQuery. This map
-   * is used to write team score sums and includes event timing information.
-   */
-  protected static Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>
-      configureWindowedTableWrite() {
-
-    Map<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
-        new HashMap<String, WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>>();
-    tableConfigure.put("team",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
-            c -> c.element().getKey()));
-    tableConfigure.put("total_score",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER",
-            c -> c.element().getValue()));
-    tableConfigure.put("window_start",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>("STRING",
-          c -> { IntervalWindow w = (IntervalWindow) c.window();
-                 return fmt.print(w.start()); }));
-    tableConfigure.put("processing_time",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
-            "STRING", c -> fmt.print(Instant.now())));
-    tableConfigure.put("timing",
-        new WriteWindowedToBigQuery.FieldInfo<KV<String, Integer>>(
-            "STRING", c -> c.pane().getTiming().toString()));
-    return tableConfigure;
-  }
-
-  /**
-   * Create a map of information that describes how to write pipeline output to BigQuery. This map
-   * is used to write user score sums.
-   */
-  protected static Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>
-      configureGlobalWindowBigQueryWrite() {
-
-    Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
-        configureBigQueryWrite();
-    tableConfigure.put("processing_time",
-        new WriteToBigQuery.FieldInfo<KV<String, Integer>>(
-            "STRING", c -> fmt.print(Instant.now())));
-    return tableConfigure;
-  }
-
-
-  public static void main(String[] args) throws Exception {
-
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    // Enforce that this pipeline is always run in streaming mode.
-    options.setStreaming(true);
-    // For example purposes, allow the pipeline to be easily cancelled instead of running
-    // continuously.
-    options.setRunner(DataflowPipelineRunner.class);
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    Pipeline pipeline = Pipeline.create(options);
-
-    // Read game events from Pub/Sub using custom timestamps, which are extracted from the pubsub
-    // data elements, and parse the data.
-    PCollection<GameActionInfo> gameEvents = pipeline
-        .apply(PubsubIO.Read.timestampLabel(TIMESTAMP_ATTRIBUTE).topic(options.getTopic()))
-        .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()));
-
-    // [START DocInclude_WindowAndTrigger]
-    // Extract team/score pairs from the event stream, using hour-long windows by default.
-    gameEvents
-        .apply(Window.named("LeaderboardTeamFixedWindows")
-          .<GameActionInfo>into(FixedWindows.of(
-              Duration.standardMinutes(options.getTeamWindowDuration())))
-          // We will get early (speculative) results as well as cumulative
-          // processing of late data.
-          .triggering(
-            AfterWatermark.pastEndOfWindow()
-            .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane()
-                  .plusDelayOf(FIVE_MINUTES))
-            .withLateFirings(AfterProcessingTime.pastFirstElementInPane()
-                  .plusDelayOf(TEN_MINUTES)))
-          .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness()))
-          .accumulatingFiredPanes())
-        // Extract and sum teamname/score pairs from the event data.
-        .apply("ExtractTeamScore", new ExtractAndSumScore("team"))
-        // Write the results to BigQuery.
-        .apply("WriteTeamScoreSums",
-               new WriteWindowedToBigQuery<KV<String, Integer>>(
-                  options.getTableName() + "_team", configureWindowedTableWrite()));
-    // [END DocInclude_WindowAndTrigger]
-
-    // [START DocInclude_ProcTimeTrigger]
-    // Extract user/score pairs from the event stream using processing time, via global windowing.
-    // Get periodic updates on all users' running scores.
-    gameEvents
-        .apply(Window.named("LeaderboardUserGlobalWindow")
-          .<GameActionInfo>into(new GlobalWindows())
-          // Get periodic results every ten minutes.
-              .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
-                  .plusDelayOf(TEN_MINUTES)))
-              .accumulatingFiredPanes()
-              .withAllowedLateness(Duration.standardMinutes(options.getAllowedLateness())))
-        // Extract and sum username/score pairs from the event data.
-        .apply("ExtractUserScore", new ExtractAndSumScore("user"))
-        // Write the results to BigQuery.
-        .apply("WriteUserScoreSums",
-               new WriteToBigQuery<KV<String, Integer>>(
-                  options.getTableName() + "_user", configureGlobalWindowBigQueryWrite()));
-    // [END DocInclude_ProcTimeTrigger]
-
-    // Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
-    // command line.
-    PipelineResult result = pipeline.run();
-    dataflowUtils.waitToFinish(result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md
deleted file mode 100644
index 79b55ce..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/README.md
+++ /dev/null
@@ -1,113 +0,0 @@
-
-# 'Gaming' examples
-
-
-This directory holds a series of example Dataflow pipelines in a simple 'mobile
-gaming' domain. They all require Java 8.  Each pipeline successively introduces
-new concepts, and gives some examples of using Java 8 syntax in constructing
-Dataflow pipelines. Other than usage of Java 8 lambda expressions, the concepts
-that are used apply equally well in Java 7.
-
-In the gaming scenario, many users play, as members of different teams, over
-the course of a day, and their actions are logged for processing. Some of the
-logged game events may be late-arriving, if users play on mobile devices and go
-transiently offline for a period.
-
-The scenario includes not only "regular" users, but "robot users", which have a
-higher click rate than the regular users, and may move from team to team.
-
-The first two pipelines in the series use pre-generated batch data samples. The
-second two pipelines read from a [PubSub](https://cloud.google.com/pubsub/)
-topic input.  For these examples, you will also need to run the
-`injector.Injector` program, which generates and publishes the gaming data to
-PubSub. The javadocs for each pipeline have more detailed information on how to
-run that pipeline.
-
-All of these pipelines write their results to BigQuery table(s).
-
-
-## The pipelines in the 'gaming' series
-
-### UserScore
-
-The first pipeline in the series is `UserScore`. This pipeline does batch
-processing of data collected from gaming events. It calculates the sum of
-scores per user, over an entire batch of gaming data (collected, say, for each
-day). The batch processing will not include any late data that arrives after
-the day's cutoff point.
-
-### HourlyTeamScore
-
-The next pipeline in the series is `HourlyTeamScore`. This pipeline also
-processes data collected from gaming events in batch. It builds on `UserScore`,
-but uses [fixed windows](https://cloud.google.com/dataflow/model/windowing), by
-default an hour in duration. It calculates the sum of scores per team, for each
-window, optionally allowing specification of two timestamps before and after
-which data is filtered out. This allows a model where late data collected after
-the intended analysis window can be included in the analysis, and any late-
-arriving data prior to the beginning of the analysis window can be removed as
-well.
-
-By using windowing and adding element timestamps, we can do finer-grained
-analysis than with the `UserScore` pipeline — we're now tracking scores for
-each hour rather than over the course of a whole day. However, our batch
-processing is high-latency, in that we don't get results from plays at the
-beginning of the batch's time period until the complete batch is processed.
-
-### LeaderBoard
-
-The third pipeline in the series is `LeaderBoard`. This pipeline processes an
-unbounded stream of 'game events' from a PubSub topic. The calculation of the
-team scores uses fixed windowing based on event time (the time of the game play
-event), not processing time (the time that an event is processed by the
-pipeline). The pipeline calculates the sum of scores per team, for each window.
-By default, the team scores are calculated using one-hour windows.
-
-In contrast — to demo another windowing option — the user scores are calculated
-using a global window, which periodically (every ten minutes) emits cumulative
-user score sums.
-
-In contrast to the previous pipelines in the series, which used static, finite
-input data, here we're using an unbounded data source, which lets us provide
-_speculative_ results, and allows handling of late data, at much lower latency.
-E.g., we could use the early/speculative results to keep a 'leaderboard'
-updated in near-realtime. Our handling of late data lets us generate correct
-results, e.g. for 'team prizes'. We're now outputing window results as they're
-calculated, giving us much lower latency than with the previous batch examples.
-
-### GameStats
-
-The fourth pipeline in the series is `GameStats`. This pipeline builds
-on the `LeaderBoard` functionality — supporting output of speculative and late
-data — and adds some "business intelligence" analysis: identifying abuse
-detection. The pipeline derives the Mean user score sum for a window, and uses
-that information to identify likely spammers/robots. (The injector is designed
-so that the "robots" have a higher click rate than the "real" users). The robot
-users are then filtered out when calculating the team scores.
-
-Additionally, user sessions are tracked: that is, we find bursts of user
-activity using session windows. Then, the mean session duration information is
-recorded in the context of subsequent fixed windowing. (This could be used to
-tell us what games are giving us greater user retention).
-
-### Running the PubSub Injector
-
-The `LeaderBoard` and `GameStats` example pipelines read unbounded data
-from a PubSub topic.
-
-Use the `injector.Injector` program to generate this data and publish to a
-PubSub topic. See the `Injector`javadocs for more information on how to run the
-injector. Set up the injector before you start one of these pipelines. Then,
-when you start the pipeline, pass as an argument the name of that PubSub topic.
-See the pipeline javadocs for the details.
-
-## Viewing the results in BigQuery
-
-All of the pipelines write their results to BigQuery.  `UserScore` and
-`HourlyTeamScore` each write one table, and `LeaderBoard` and
-`GameStats` each write two. The pipelines have default table names that
-you can override when you start up the pipeline if those tables already exist.
-
-Depending on the windowing intervals defined in a given pipeline, you may have
-to wait for a while (more than an hour) before you start to see results written
-to the BigQuery tables.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java
deleted file mode 100644
index de06ce3..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/UserScore.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.complete.game.utils.WriteToBigQuery;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.DefaultCoder;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.options.Validation;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.apache.avro.reflect.Nullable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * This class is the first in a series of four pipelines that tell a story in a 'gaming' domain.
- * Concepts: batch processing; reading input from Google Cloud Storage and writing output to
- * BigQuery; using standalone DoFns; use of the sum by key transform; examples of
- * Java 8 lambda syntax.
- *
- * <p> In this gaming scenario, many users play, as members of different teams, over the course of a
- * day, and their actions are logged for processing.  Some of the logged game events may be late-
- * arriving, if users play on mobile devices and go transiently offline for a period.
- *
- * <p> This pipeline does batch processing of data collected from gaming events. It calculates the
- * sum of scores per user, over an entire batch of gaming data (collected, say, for each day). The
- * batch processing will not include any late data that arrives after the day's cutoff point.
- *
- * <p> To execute this pipeline using the Dataflow service and static example input data, specify
- * the pipeline configuration like this:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowPipelineRunner
- *   --dataset=YOUR-DATASET
- * }
- * </pre>
- * where the BigQuery dataset you specify must already exist.
- *
- * <p> Optionally include the --input argument to specify a batch input file.
- * See the --input default value for example batch data file, or use {@link injector.Injector} to
- * generate your own batch data.
-  */
-public class UserScore {
-
-  /**
-   * Class to hold info about a game event.
-   */
-  @DefaultCoder(AvroCoder.class)
-  static class GameActionInfo {
-    @Nullable String user;
-    @Nullable String team;
-    @Nullable Integer score;
-    @Nullable Long timestamp;
-
-    public GameActionInfo() {}
-
-    public GameActionInfo(String user, String team, Integer score, Long timestamp) {
-      this.user = user;
-      this.team = team;
-      this.score = score;
-      this.timestamp = timestamp;
-    }
-
-    public String getUser() {
-      return this.user;
-    }
-    public String getTeam() {
-      return this.team;
-    }
-    public Integer getScore() {
-      return this.score;
-    }
-    public String getKey(String keyname) {
-      if (keyname.equals("team")) {
-        return this.team;
-      } else {  // return username as default
-        return this.user;
-      }
-    }
-    public Long getTimestamp() {
-      return this.timestamp;
-    }
-  }
-
-
-  /**
-   * Parses the raw game event info into GameActionInfo objects. Each event line has the following
-   * format: username,teamname,score,timestamp_in_ms,readable_time
-   * e.g.:
-   * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
-   * The human-readable time string is not used here.
-   */
-  static class ParseEventFn extends DoFn<String, GameActionInfo> {
-
-    // Log and count parse errors.
-    private static final Logger LOG = LoggerFactory.getLogger(ParseEventFn.class);
-    private final Aggregator<Long, Long> numParseErrors =
-        createAggregator("ParseErrors", new Sum.SumLongFn());
-
-    @Override
-    public void processElement(ProcessContext c) {
-      String[] components = c.element().split(",");
-      try {
-        String user = components[0].trim();
-        String team = components[1].trim();
-        Integer score = Integer.parseInt(components[2].trim());
-        Long timestamp = Long.parseLong(components[3].trim());
-        GameActionInfo gInfo = new GameActionInfo(user, team, score, timestamp);
-        c.output(gInfo);
-      } catch (ArrayIndexOutOfBoundsException | NumberFormatException e) {
-        numParseErrors.addValue(1L);
-        LOG.info("Parse error on " + c.element() + ", " + e.getMessage());
-      }
-    }
-  }
-
-  /**
-   * A transform to extract key/score information from GameActionInfo, and sum the scores. The
-   * constructor arg determines whether 'team' or 'user' info is extracted.
-   */
-  // [START DocInclude_USExtractXform]
-  public static class ExtractAndSumScore
-      extends PTransform<PCollection<GameActionInfo>, PCollection<KV<String, Integer>>> {
-
-    private final String field;
-
-    ExtractAndSumScore(String field) {
-      this.field = field;
-    }
-
-    @Override
-    public PCollection<KV<String, Integer>> apply(
-        PCollection<GameActionInfo> gameInfo) {
-
-      return gameInfo
-        .apply(MapElements
-            .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))
-            .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}))
-        .apply(Sum.<String>integersPerKey());
-    }
-  }
-  // [END DocInclude_USExtractXform]
-
-
-  /**
-   * Options supported by {@link UserScore}.
-   */
-  public static interface Options extends PipelineOptions {
-
-    @Description("Path to the data file(s) containing game data.")
-    // The default maps to two large Google Cloud Storage files (each ~12GB) holding two subsequent
-    // day's worth (roughly) of data.
-    @Default.String("gs://dataflow-samples/game/gaming_data*.csv")
-    String getInput();
-    void setInput(String value);
-
-    @Description("BigQuery Dataset to write tables to. Must already exist.")
-    @Validation.Required
-    String getDataset();
-    void setDataset(String value);
-
-    @Description("The BigQuery table name. Should not already exist.")
-    @Default.String("user_score")
-    String getTableName();
-    void setTableName(String value);
-  }
-
-  /**
-   * Create a map of information that describes how to write pipeline output to BigQuery. This map
-   * is passed to the {@link WriteToBigQuery} constructor to write user score sums.
-   */
-  protected static Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>
-    configureBigQueryWrite() {
-    Map<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>> tableConfigure =
-        new HashMap<String, WriteToBigQuery.FieldInfo<KV<String, Integer>>>();
-    tableConfigure.put("user",
-        new WriteToBigQuery.FieldInfo<KV<String, Integer>>("STRING", c -> c.element().getKey()));
-    tableConfigure.put("total_score",
-        new WriteToBigQuery.FieldInfo<KV<String, Integer>>("INTEGER", c -> c.element().getValue()));
-    return tableConfigure;
-  }
-
-
-  /**
-   * Run a batch pipeline.
-   */
- // [START DocInclude_USMain]
-  public static void main(String[] args) throws Exception {
-    // Begin constructing a pipeline configured by commandline flags.
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline pipeline = Pipeline.create(options);
-
-    // Read events from a text file and parse them.
-    pipeline.apply(TextIO.Read.from(options.getInput()))
-      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
-      // Extract and sum username/score pairs from the event data.
-      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
-      .apply("WriteUserScoreSums",
-          new WriteToBigQuery<KV<String, Integer>>(options.getTableName(),
-                                                   configureBigQueryWrite()));
-
-    // Run the batch pipeline.
-    pipeline.run();
-  }
-  // [END DocInclude_USMain]
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
deleted file mode 100644
index 1691c54..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/Injector.java
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game.injector;
-
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.PublishRequest;
-import com.google.api.services.pubsub.model.PubsubMessage;
-import com.google.common.collect.ImmutableMap;
-
-import org.joda.time.DateTimeZone;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-import java.io.BufferedOutputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Random;
-import java.util.TimeZone;
-
-
-/**
- * This is a generator that simulates usage data from a mobile game, and either publishes the data
- * to a pubsub topic or writes it to a file.
- *
- * <p> The general model used by the generator is the following. There is a set of teams with team
- * members. Each member is scoring points for their team. After some period, a team will dissolve
- * and a new one will be created in its place. There is also a set of 'Robots', or spammer users.
- * They hop from team to team. The robots are set to have a higher 'click rate' (generate more
- * events) than the regular team members.
- *
- * <p> Each generated line of data has the following form:
- * username,teamname,score,timestamp_in_ms,readable_time
- * e.g.:
- * user2_AsparagusPig,AsparagusPig,10,1445230923951,2015-11-02 09:09:28.224
- *
- * <p> The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
- * specified. It takes the following arguments:
- * {@code Injector project-name (topic-name|none) (filename|none)}.
- *
- * <p> To run the Injector in the mode where it publishes to PubSub, you will need to authenticate
- * locally using project-based service account credentials to avoid running over PubSub
- * quota.
- * See https://developers.google.com/identity/protocols/application-default-credentials
- * for more information on using service account credentials. Set the GOOGLE_APPLICATION_CREDENTIALS
- * environment variable to point to your downloaded service account credentials before starting the
- * program, e.g.:
- * {@code export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json}.
- * If you do not do this, then your injector will only run for a few minutes on your
- * 'user account' credentials before you will start to see quota error messages like:
- * "Request throttled due to user QPS limit being reached", and see this exception:
- * ".com.google.api.client.googleapis.json.GoogleJsonResponseException: 429 Too Many Requests".
- * Once you've set up your credentials, run the Injector like this":
-  * <pre>{@code
- * Injector <project-name> <topic-name> none
- * }
- * </pre>
- * The pubsub topic will be created if it does not exist.
- *
- * <p> To run the injector in write-to-file-mode, set the topic name to "none" and specify the
- * filename:
- * <pre>{@code
- * Injector <project-name> none <filename>
- * }
- * </pre>
- */
-class Injector {
-  private static Pubsub pubsub;
-  private static Random random = new Random();
-  private static String topic;
-  private static String project;
-  private static final String TIMESTAMP_ATTRIBUTE = "timestamp_ms";
-
-  // QPS ranges from 800 to 1000.
-  private static final int MIN_QPS = 800;
-  private static final int QPS_RANGE = 200;
-  // How long to sleep, in ms, between creation of the threads that make API requests to PubSub.
-  private static final int THREAD_SLEEP_MS = 500;
-
-  // Lists used to generate random team names.
-  private static final ArrayList<String> COLORS =
-      new ArrayList<String>(Arrays.asList(
-         "Magenta", "AliceBlue", "Almond", "Amaranth", "Amber",
-         "Amethyst", "AndroidGreen", "AntiqueBrass", "Fuchsia", "Ruby", "AppleGreen",
-         "Apricot", "Aqua", "ArmyGreen", "Asparagus", "Auburn", "Azure", "Banana",
-         "Beige", "Bisque", "BarnRed", "BattleshipGrey"));
-
-  private static final ArrayList<String> ANIMALS =
-      new ArrayList<String>(Arrays.asList(
-         "Echidna", "Koala", "Wombat", "Marmot", "Quokka", "Kangaroo", "Dingo", "Numbat", "Emu",
-         "Wallaby", "CaneToad", "Bilby", "Possum", "Cassowary", "Kookaburra", "Platypus",
-         "Bandicoot", "Cockatoo", "Antechinus"));
-
-  // The list of live teams.
-  private static ArrayList<TeamInfo> liveTeams = new ArrayList<TeamInfo>();
-
-  private static DateTimeFormatter fmt =
-    DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS")
-        .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")));
-
-
-  // The total number of robots in the system.
-  private static final int NUM_ROBOTS = 20;
-  // Determines the chance that a team will have a robot team member.
-  private static final int ROBOT_PROBABILITY = 3;
-  private static final int NUM_LIVE_TEAMS = 15;
-  private static final int BASE_MEMBERS_PER_TEAM = 5;
-  private static final int MEMBERS_PER_TEAM = 15;
-  private static final int MAX_SCORE = 20;
-  private static final int LATE_DATA_RATE = 5 * 60 * 2;       // Every 10 minutes
-  private static final int BASE_DELAY_IN_MILLIS = 5 * 60 * 1000;  // 5-10 minute delay
-  private static final int FUZZY_DELAY_IN_MILLIS = 5 * 60 * 1000;
-
-  // The minimum time a 'team' can live.
-  private static final int BASE_TEAM_EXPIRATION_TIME_IN_MINS = 20;
-  private static final int TEAM_EXPIRATION_TIME_IN_MINS = 20;
-
-
-  /**
-   * A class for holding team info: the name of the team, when it started,
-   * and the current team members. Teams may but need not include one robot team member.
-   */
-  private static class TeamInfo {
-    String teamName;
-    long startTimeInMillis;
-    int expirationPeriod;
-    // The team might but need not include 1 robot. Will be non-null if so.
-    String robot;
-    int numMembers;
-
-    private TeamInfo(String teamName, long startTimeInMillis, String robot) {
-      this.teamName = teamName;
-      this.startTimeInMillis = startTimeInMillis;
-      // How long until this team is dissolved.
-      this.expirationPeriod = random.nextInt(TEAM_EXPIRATION_TIME_IN_MINS) +
-        BASE_TEAM_EXPIRATION_TIME_IN_MINS;
-      this.robot = robot;
-      // Determine the number of team members.
-      numMembers = random.nextInt(MEMBERS_PER_TEAM) + BASE_MEMBERS_PER_TEAM;
-    }
-
-    String getTeamName() {
-      return teamName;
-    }
-    String getRobot() {
-      return robot;
-    }
-
-    long getStartTimeInMillis() {
-      return startTimeInMillis;
-    }
-    long getEndTimeInMillis() {
-      return startTimeInMillis + (expirationPeriod * 60 * 1000);
-    }
-    String getRandomUser() {
-      int userNum = random.nextInt(numMembers);
-      return "user" + userNum + "_" + teamName;
-    }
-
-    int numMembers() {
-      return numMembers;
-    }
-
-    @Override
-    public String toString() {
-      return "(" + teamName + ", num members: " + numMembers() + ", starting at: "
-        + startTimeInMillis + ", expires in: " + expirationPeriod + ", robot: " + robot + ")";
-    }
-  }
-
-  /** Utility to grab a random element from an array of Strings. */
-  private static String randomElement(ArrayList<String> list) {
-    int index = random.nextInt(list.size());
-    return list.get(index);
-  }
-
-  /**
-   * Get and return a random team. If the selected team is too old w.r.t its expiration, remove
-   * it, replacing it with a new team.
-   */
-  private static TeamInfo randomTeam(ArrayList<TeamInfo> list) {
-    int index = random.nextInt(list.size());
-    TeamInfo team = list.get(index);
-    // If the selected team is expired, remove it and return a new team.
-    long currTime = System.currentTimeMillis();
-    if ((team.getEndTimeInMillis() < currTime) || team.numMembers() == 0) {
-      System.out.println("\nteam " + team + " is too old; replacing.");
-      System.out.println("start time: " + team.getStartTimeInMillis() +
-        ", end time: " + team.getEndTimeInMillis() +
-        ", current time:" + currTime);
-      removeTeam(index);
-      // Add a new team in its stead.
-      return (addLiveTeam());
-    } else {
-      return team;
-    }
-  }
-
-  /**
-   * Create and add a team. Possibly add a robot to the team.
-   */
-  private static synchronized TeamInfo addLiveTeam() {
-    String teamName = randomElement(COLORS) + randomElement(ANIMALS);
-    String robot = null;
-    // Decide if we want to add a robot to the team.
-    if (random.nextInt(ROBOT_PROBABILITY) == 0) {
-      robot = "Robot-" + random.nextInt(NUM_ROBOTS);
-    }
-    // Create the new team.
-    TeamInfo newTeam = new TeamInfo(teamName, System.currentTimeMillis(), robot);
-    liveTeams.add(newTeam);
-    System.out.println("[+" + newTeam + "]");
-    return newTeam;
-  }
-
-  /**
-   * Remove a specific team.
-   */
-  private static synchronized void removeTeam(int teamIndex) {
-    TeamInfo removedTeam = liveTeams.remove(teamIndex);
-    System.out.println("[-" + removedTeam + "]");
-  }
-
-  /** Generate a user gaming event. */
-  private static String generateEvent(Long currTime, int delayInMillis) {
-    TeamInfo team = randomTeam(liveTeams);
-    String teamName = team.getTeamName();
-    String user;
-    final int parseErrorRate = 900000;
-
-    String robot = team.getRobot();
-    // If the team has an associated robot team member...
-    if (robot != null) {
-      // Then use that robot for the message with some probability.
-      // Set this probability to higher than that used to select any of the 'regular' team
-      // members, so that if there is a robot on the team, it has a higher click rate.
-      if (random.nextInt(team.numMembers() / 2) == 0) {
-        user = robot;
-      } else {
-        user = team.getRandomUser();
-      }
-    } else { // No robot.
-      user = team.getRandomUser();
-    }
-    String event = user + "," + teamName + "," + random.nextInt(MAX_SCORE);
-    // Randomly introduce occasional parse errors. You can see a custom counter tracking the number
-    // of such errors in the Dataflow Monitoring UI, as the example pipeline runs.
-    if (random.nextInt(parseErrorRate) == 0) {
-      System.out.println("Introducing a parse error.");
-      event = "THIS LINE REPRESENTS CORRUPT DATA AND WILL CAUSE A PARSE ERROR";
-    }
-    return addTimeInfoToEvent(event, currTime, delayInMillis);
-  }
-
-  /**
-   * Add time info to a generated gaming event.
-   */
-  private static String addTimeInfoToEvent(String message, Long currTime, int delayInMillis) {
-    String eventTimeString =
-        Long.toString((currTime - delayInMillis) / 1000 * 1000);
-    // Add a (redundant) 'human-readable' date string to make the data semantics more clear.
-    String dateString = fmt.print(currTime);
-    message = message + "," + eventTimeString + "," + dateString;
-    return message;
-  }
-
-  /**
-   * Publish 'numMessages' arbitrary events from live users with the provided delay, to a
-   * PubSub topic.
-   */
-  public static void publishData(int numMessages, int delayInMillis)
-      throws IOException {
-    List<PubsubMessage> pubsubMessages = new ArrayList<>();
-
-    for (int i = 0; i < Math.max(1, numMessages); i++) {
-      Long currTime = System.currentTimeMillis();
-      String message = generateEvent(currTime, delayInMillis);
-      PubsubMessage pubsubMessage = new PubsubMessage()
-              .encodeData(message.getBytes("UTF-8"));
-      pubsubMessage.setAttributes(
-          ImmutableMap.of(TIMESTAMP_ATTRIBUTE,
-              Long.toString((currTime - delayInMillis) / 1000 * 1000)));
-      if (delayInMillis != 0) {
-        System.out.println(pubsubMessage.getAttributes());
-        System.out.println("late data for: " + message);
-      }
-      pubsubMessages.add(pubsubMessage);
-    }
-
-    PublishRequest publishRequest = new PublishRequest();
-    publishRequest.setMessages(pubsubMessages);
-    pubsub.projects().topics().publish(topic, publishRequest).execute();
-  }
-
-  /**
-   * Publish generated events to a file.
-   */
-  public static void publishDataToFile(String fileName, int numMessages, int delayInMillis)
-      throws IOException {
-    PrintWriter out = new PrintWriter(new OutputStreamWriter(
-        new BufferedOutputStream(new FileOutputStream(fileName, true)), "UTF-8"));
-
-    try {
-      for (int i = 0; i < Math.max(1, numMessages); i++) {
-        Long currTime = System.currentTimeMillis();
-        String message = generateEvent(currTime, delayInMillis);
-        out.println(message);
-      }
-    } catch (Exception e) {
-      e.printStackTrace();
-    } finally {
-      if (out != null) {
-        out.flush();
-        out.close();
-      }
-    }
-  }
-
-
-  public static void main(String[] args) throws IOException, InterruptedException {
-    if (args.length < 3) {
-      System.out.println("Usage: Injector project-name (topic-name|none) (filename|none)");
-      System.exit(1);
-    }
-    boolean writeToFile = false;
-    boolean writeToPubsub = true;
-    project = args[0];
-    String topicName = args[1];
-    String fileName = args[2];
-    // The Injector writes either to a PubSub topic, or a file. It will use the PubSub topic if
-    // specified; otherwise, it will try to write to a file.
-    if (topicName.equalsIgnoreCase("none")) {
-      writeToFile = true;
-      writeToPubsub = false;
-    }
-    if (writeToPubsub) {
-      // Create the PubSub client.
-      pubsub = InjectorUtils.getClient();
-      // Create the PubSub topic as necessary.
-      topic = InjectorUtils.getFullyQualifiedTopicName(project, topicName);
-      InjectorUtils.createTopic(pubsub, topic);
-      System.out.println("Injecting to topic: " + topic);
-    } else {
-      if (fileName.equalsIgnoreCase("none")) {
-        System.out.println("Filename not specified.");
-        System.exit(1);
-      }
-      System.out.println("Writing to file: " + fileName);
-    }
-    System.out.println("Starting Injector");
-
-    // Start off with some random live teams.
-    while (liveTeams.size() < NUM_LIVE_TEAMS) {
-      addLiveTeam();
-    }
-
-    // Publish messages at a rate determined by the QPS and Thread sleep settings.
-    for (int i = 0; true; i++) {
-      if (Thread.activeCount() > 10) {
-        System.err.println("I'm falling behind!");
-      }
-
-      // Decide if this should be a batch of late data.
-      final int numMessages;
-      final int delayInMillis;
-      if (i % LATE_DATA_RATE == 0) {
-        // Insert delayed data for one user (one message only)
-        delayInMillis = BASE_DELAY_IN_MILLIS + random.nextInt(FUZZY_DELAY_IN_MILLIS);
-        numMessages = 1;
-        System.out.println("DELAY(" + delayInMillis + ", " + numMessages + ")");
-      } else {
-        System.out.print(".");
-        delayInMillis = 0;
-        numMessages = MIN_QPS + random.nextInt(QPS_RANGE);
-      }
-
-      if (writeToFile) { // Won't use threading for the file write.
-        publishDataToFile(fileName, numMessages, delayInMillis);
-      } else { // Write to PubSub.
-        // Start a thread to inject some data.
-        new Thread(){
-          @Override
-          public void run() {
-            try {
-              publishData(numMessages, delayInMillis);
-            } catch (IOException e) {
-              System.err.println(e);
-            }
-          }
-        }.start();
-      }
-
-      // Wait before creating another injector thread.
-      Thread.sleep(THREAD_SLEEP_MS);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
deleted file mode 100644
index 55982df..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/InjectorUtils.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game.injector;
-
-
-import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.client.googleapis.util.Utils;
-import com.google.api.client.http.HttpRequestInitializer;
-import com.google.api.client.http.HttpStatusCodes;
-import com.google.api.client.http.HttpTransport;
-import com.google.api.client.json.JsonFactory;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.PubsubScopes;
-import com.google.api.services.pubsub.model.Topic;
-
-import com.google.common.base.Preconditions;
-
-import java.io.IOException;
-
-class InjectorUtils {
-
-  private static final String APP_NAME = "injector";
-
-  /**
-   * Builds a new Pubsub client and returns it.
-   */
-  public static Pubsub getClient(final HttpTransport httpTransport,
-                                 final JsonFactory jsonFactory)
-           throws IOException {
-      Preconditions.checkNotNull(httpTransport);
-      Preconditions.checkNotNull(jsonFactory);
-      GoogleCredential credential =
-          GoogleCredential.getApplicationDefault(httpTransport, jsonFactory);
-      if (credential.createScopedRequired()) {
-          credential = credential.createScoped(PubsubScopes.all());
-      }
-      if (credential.getClientAuthentication() != null) {
-        System.out.println("\n***Warning! You are not using service account credentials to "
-          + "authenticate.\nYou need to use service account credentials for this example,"
-          + "\nsince user-level credentials do not have enough pubsub quota,\nand so you will run "
-          + "out of PubSub quota very quickly.\nSee "
-          + "https://developers.google.com/identity/protocols/application-default-credentials.");
-        System.exit(1);
-      }
-      HttpRequestInitializer initializer =
-          new RetryHttpInitializerWrapper(credential);
-      return new Pubsub.Builder(httpTransport, jsonFactory, initializer)
-              .setApplicationName(APP_NAME)
-              .build();
-  }
-
-  /**
-   * Builds a new Pubsub client with default HttpTransport and
-   * JsonFactory and returns it.
-   */
-  public static Pubsub getClient() throws IOException {
-      return getClient(Utils.getDefaultTransport(),
-                       Utils.getDefaultJsonFactory());
-  }
-
-
-  /**
-   * Returns the fully qualified topic name for Pub/Sub.
-   */
-  public static String getFullyQualifiedTopicName(
-          final String project, final String topic) {
-      return String.format("projects/%s/topics/%s", project, topic);
-  }
-
-  /**
-   * Create a topic if it doesn't exist.
-   */
-  public static void createTopic(Pubsub client, String fullTopicName)
-      throws IOException {
-    try {
-        client.projects().topics().get(fullTopicName).execute();
-    } catch (GoogleJsonResponseException e) {
-      if (e.getStatusCode() == HttpStatusCodes.STATUS_CODE_NOT_FOUND) {
-        Topic topic = client.projects().topics()
-                .create(fullTopicName, new Topic())
-                .execute();
-        System.out.printf("Topic %s was created.\n", topic.getName());
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
deleted file mode 100644
index 1437534..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/injector/RetryHttpInitializerWrapper.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game.injector;
-
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.http.HttpBackOffIOExceptionHandler;
-import com.google.api.client.http.HttpBackOffUnsuccessfulResponseHandler;
-import com.google.api.client.http.HttpRequest;
-import com.google.api.client.http.HttpRequestInitializer;
-import com.google.api.client.http.HttpResponse;
-import com.google.api.client.http.HttpUnsuccessfulResponseHandler;
-import com.google.api.client.util.ExponentialBackOff;
-import com.google.api.client.util.Sleeper;
-import com.google.common.base.Preconditions;
-
-import java.io.IOException;
-import java.util.logging.Logger;
-
-/**
- * RetryHttpInitializerWrapper will automatically retry upon RPC
- * failures, preserving the auto-refresh behavior of the Google
- * Credentials.
- */
-public class RetryHttpInitializerWrapper implements HttpRequestInitializer {
-
-    /**
-     * A private logger.
-     */
-    private static final Logger LOG =
-            Logger.getLogger(RetryHttpInitializerWrapper.class.getName());
-
-    /**
-     * One minutes in miliseconds.
-     */
-    private static final int ONEMINITUES = 60000;
-
-    /**
-     * Intercepts the request for filling in the "Authorization"
-     * header field, as well as recovering from certain unsuccessful
-     * error codes wherein the Credential must refresh its token for a
-     * retry.
-     */
-    private final Credential wrappedCredential;
-
-    /**
-     * A sleeper; you can replace it with a mock in your test.
-     */
-    private final Sleeper sleeper;
-
-    /**
-     * A constructor.
-     *
-     * @param wrappedCredential Credential which will be wrapped and
-     * used for providing auth header.
-     */
-    public RetryHttpInitializerWrapper(final Credential wrappedCredential) {
-        this(wrappedCredential, Sleeper.DEFAULT);
-    }
-
-    /**
-     * A protected constructor only for testing.
-     *
-     * @param wrappedCredential Credential which will be wrapped and
-     * used for providing auth header.
-     * @param sleeper Sleeper for easy testing.
-     */
-    RetryHttpInitializerWrapper(
-            final Credential wrappedCredential, final Sleeper sleeper) {
-        this.wrappedCredential = Preconditions.checkNotNull(wrappedCredential);
-        this.sleeper = sleeper;
-    }
-
-    /**
-     * Initializes the given request.
-     */
-    @Override
-    public final void initialize(final HttpRequest request) {
-        request.setReadTimeout(2 * ONEMINITUES); // 2 minutes read timeout
-        final HttpUnsuccessfulResponseHandler backoffHandler =
-                new HttpBackOffUnsuccessfulResponseHandler(
-                        new ExponentialBackOff())
-                        .setSleeper(sleeper);
-        request.setInterceptor(wrappedCredential);
-        request.setUnsuccessfulResponseHandler(
-                new HttpUnsuccessfulResponseHandler() {
-                    @Override
-                    public boolean handleResponse(
-                            final HttpRequest request,
-                            final HttpResponse response,
-                            final boolean supportsRetry) throws IOException {
-                        if (wrappedCredential.handleResponse(
-                                request, response, supportsRetry)) {
-                            // If credential decides it can handle it,
-                            // the return code or message indicated
-                            // something specific to authentication,
-                            // and no backoff is desired.
-                            return true;
-                        } else if (backoffHandler.handleResponse(
-                                request, response, supportsRetry)) {
-                            // Otherwise, we defer to the judgement of
-                            // our internal backoff handler.
-                            LOG.info("Retrying "
-                                    + request.getUrl().toString());
-                            return true;
-                        } else {
-                            return false;
-                        }
-                    }
-                });
-        request.setIOExceptionHandler(
-                new HttpBackOffIOExceptionHandler(new ExponentialBackOff())
-                        .setSleeper(sleeper));
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
deleted file mode 100644
index 2cf719a..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteToBigQuery.java
+++ /dev/null
@@ -1,134 +0,0 @@
-  /*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game.utils;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.complete.game.UserScore;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
-import com.google.cloud.dataflow.sdk.options.GcpOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Generate, format, and write BigQuery table row information. Use provided information about
- * the field names and types, as well as lambda functions that describe how to generate their
- * values.
- */
-public class WriteToBigQuery<T>
-    extends PTransform<PCollection<T>, PDone> {
-
-  protected String tableName;
-  protected Map<String, FieldInfo<T>> fieldInfo;
-
-  public WriteToBigQuery() {
-  }
-
-  public WriteToBigQuery(String tableName,
-      Map<String, FieldInfo<T>> fieldInfo) {
-    this.tableName = tableName;
-    this.fieldInfo = fieldInfo;
-  }
-
-  /** Define a class to hold information about output table field definitions. */
-  public static class FieldInfo<T> implements Serializable {
-    // The BigQuery 'type' of the field
-    private String fieldType;
-    // A lambda function to generate the field value
-    private SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fieldFn;
-
-    public FieldInfo(String fieldType,
-        SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fieldFn) {
-      this.fieldType = fieldType;
-      this.fieldFn = fieldFn;
-    }
-
-    String getFieldType() {
-      return this.fieldType;
-    }
-
-    SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> getFieldFn() {
-      return this.fieldFn;
-    }
-  }
-  /** Convert each key/score pair into a BigQuery TableRow as specified by fieldFn. */
-  protected class BuildRowFn extends DoFn<T, TableRow> {
-
-    @Override
-    public void processElement(ProcessContext c) {
-
-      TableRow row = new TableRow();
-      for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
-          String key = entry.getKey();
-          FieldInfo<T> fcnInfo = entry.getValue();
-          SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fcn =
-            fcnInfo.getFieldFn();
-          row.set(key, fcn.apply(c));
-        }
-      c.output(row);
-    }
-  }
-
-  /** Build the output table schema. */
-  protected TableSchema getSchema() {
-    List<TableFieldSchema> fields = new ArrayList<>();
-    for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
-      String key = entry.getKey();
-      FieldInfo<T> fcnInfo = entry.getValue();
-      String bqType = fcnInfo.getFieldType();
-      fields.add(new TableFieldSchema().setName(key).setType(bqType));
-    }
-    return new TableSchema().setFields(fields);
-  }
-
-  @Override
-  public PDone apply(PCollection<T> teamAndScore) {
-    return teamAndScore
-      .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
-      .apply(BigQueryIO.Write
-                .to(getTable(teamAndScore.getPipeline(),
-                    tableName))
-                .withSchema(getSchema())
-                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
-                .withWriteDisposition(WriteDisposition.WRITE_APPEND));
-  }
-
-  /** Utility to construct an output table reference. */
-  static TableReference getTable(Pipeline pipeline, String tableName) {
-    PipelineOptions options = pipeline.getOptions();
-    TableReference table = new TableReference();
-    table.setDatasetId(options.as(UserScore.Options.class).getDataset());
-    table.setProjectId(options.as(GcpOptions.class).getProject());
-    table.setTableId(tableName);
-    return table;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
----------------------------------------------------------------------
diff --git a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java b/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
deleted file mode 100644
index 8433021..0000000
--- a/java8examples/src/main/java/com/google/cloud/dataflow/examples/complete/game/utils/WriteWindowedToBigQuery.java
+++ /dev/null
@@ -1,76 +0,0 @@
-  /*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game.utils;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-
-import java.util.Map;
-
-/**
- * Generate, format, and write BigQuery table row information. Subclasses {@link WriteToBigQuery}
- * to require windowing; so this subclass may be used for writes that require access to the
- * context's window information.
- */
-public class WriteWindowedToBigQuery<T>
-    extends WriteToBigQuery<T> {
-
-  public WriteWindowedToBigQuery(String tableName,
-      Map<String, FieldInfo<T>> fieldInfo) {
-    super(tableName, fieldInfo);
-  }
-
-  /** Convert each key/score pair into a BigQuery TableRow. */
-  protected class BuildRowFn extends DoFn<T, TableRow>
-      implements RequiresWindowAccess {
-
-    @Override
-    public void processElement(ProcessContext c) {
-
-      TableRow row = new TableRow();
-      for (Map.Entry<String, FieldInfo<T>> entry : fieldInfo.entrySet()) {
-          String key = entry.getKey();
-          FieldInfo<T> fcnInfo = entry.getValue();
-          SerializableFunction<DoFn<T, TableRow>.ProcessContext, Object> fcn =
-            fcnInfo.getFieldFn();
-          row.set(key, fcn.apply(c));
-        }
-      c.output(row);
-    }
-  }
-
-  @Override
-  public PDone apply(PCollection<T> teamAndScore) {
-    return teamAndScore
-      .apply(ParDo.named("ConvertToRow").of(new BuildRowFn()))
-      .apply(BigQueryIO.Write
-                .to(getTable(teamAndScore.getPipeline(),
-                    tableName))
-                .withSchema(getSchema())
-                .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
-                .withWriteDisposition(WriteDisposition.WRITE_APPEND));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
----------------------------------------------------------------------
diff --git a/java8examples/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java b/java8examples/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
deleted file mode 100644
index fcae41c..0000000
--- a/java8examples/src/test/java/com/google/cloud/dataflow/examples/MinimalWordCountJava8Test.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.GcsOptions;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Count;
-import com.google.cloud.dataflow.sdk.transforms.Filter;
-import com.google.cloud.dataflow.sdk.transforms.FlatMapElements;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.util.GcsUtil;
-import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.collect.ImmutableList;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mockito;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.channels.FileChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.nio.file.Files;
-import java.nio.file.StandardOpenOption;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * To keep {@link MinimalWordCountJava8} simple, it is not factored or testable. This test
- * file should be maintained with a copy of its code for a basic smoke test.
- */
-@RunWith(JUnit4.class)
-public class MinimalWordCountJava8Test implements Serializable {
-
-  /**
-   * A basic smoke test that ensures there is no crash at pipeline construction time.
-   */
-  @Test
-  public void testMinimalWordCountJava8() throws Exception {
-    Pipeline p = TestPipeline.create();
-    p.getOptions().as(GcsOptions.class).setGcsUtil(buildMockGcsUtil());
-
-    p.apply(TextIO.Read.from("gs://dataflow-samples/shakespeare/*"))
-     .apply(FlatMapElements.via((String word) -> Arrays.asList(word.split("[^a-zA-Z']+")))
-         .withOutputType(new TypeDescriptor<String>() {}))
-     .apply(Filter.byPredicate((String word) -> !word.isEmpty()))
-     .apply(Count.<String>perElement())
-     .apply(MapElements
-         .via((KV<String, Long> wordCount) -> wordCount.getKey() + ": " + wordCount.getValue())
-         .withOutputType(new TypeDescriptor<String>() {}))
-     .apply(TextIO.Write.to("gs://YOUR_OUTPUT_BUCKET/AND_OUTPUT_PREFIX"));
-  }
-
-  private GcsUtil buildMockGcsUtil() throws IOException {
-    GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
-
-    // Any request to open gets a new bogus channel
-    Mockito
-        .when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
-        .then(new Answer<SeekableByteChannel>() {
-          @Override
-          public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
-            return FileChannel.open(
-                Files.createTempFile("channel-", ".tmp"),
-                StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
-          }
-        });
-
-    // Any request for expansion returns a list containing the original GcsPath
-    // This is required to pass validation that occurs in TextIO during apply()
-    Mockito
-        .when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
-        .then(new Answer<List<GcsPath>>() {
-          @Override
-          public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
-            return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
-          }
-        });
-
-    return mockGcsUtil;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
----------------------------------------------------------------------
diff --git a/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java b/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
deleted file mode 100644
index f77d146..0000000
--- a/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/GameStatsTest.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.complete.game.GameStats.CalculateSpammyUsers;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests of GameStats.
- * Because the pipeline was designed for easy readability and explanations, it lacks good
- * modularity for testing. See our testing documentation for better ideas:
- * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline.
- */
-@RunWith(JUnit4.class)
-public class GameStatsTest implements Serializable {
-
-  // User scores
-  static final List<KV<String, Integer>> USER_SCORES = Arrays.asList(
-    KV.of("Robot-2", 66), KV.of("Robot-1", 116), KV.of("user7_AndroidGreenKookaburra", 23),
-    KV.of("user7_AndroidGreenKookaburra", 1),
-    KV.of("user19_BisqueBilby", 14), KV.of("user13_ApricotQuokka", 15),
-    KV.of("user18_BananaEmu", 25), KV.of("user6_AmberEchidna", 8),
-    KV.of("user2_AmberQuokka", 6), KV.of("user0_MagentaKangaroo", 4),
-    KV.of("user0_MagentaKangaroo", 3), KV.of("user2_AmberCockatoo", 13),
-    KV.of("user7_AlmondWallaby", 15), KV.of("user6_AmberNumbat", 11),
-    KV.of("user6_AmberQuokka", 4));
-
-  // The expected list of 'spammers'.
-  static final List<KV<String, Integer>> SPAMMERS = Arrays.asList(
-      KV.of("Robot-2", 66), KV.of("Robot-1", 116));
-
-  /** Test the calculation of 'spammy users'. */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testCalculateSpammyUsers() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<KV<String, Integer>> input = p.apply(Create.of(USER_SCORES));
-    PCollection<KV<String, Integer>> output = input.apply(new CalculateSpammyUsers());
-
-    // Check the set of spammers.
-    DataflowAssert.that(output).containsInAnyOrder(SPAMMERS);
-
-    p.run();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
----------------------------------------------------------------------
diff --git a/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java b/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
deleted file mode 100644
index f77a5d4..0000000
--- a/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/HourlyTeamScoreTest.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo;
-import com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.Filter;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.joda.time.Instant;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests of HourlyTeamScore.
- * Because the pipeline was designed for easy readability and explanations, it lacks good
- * modularity for testing. See our testing documentation for better ideas:
- * https://cloud.google.com/dataflow/pipelines/testing-your-pipeline.
- */
-@RunWith(JUnit4.class)
-public class HourlyTeamScoreTest implements Serializable {
-
-  static final String[] GAME_EVENTS_ARRAY = new String[] {
-    "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444",
-    "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444",
-    "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444",
-    "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444",
-    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444",
-    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444",
-    "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444",
-    "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444",
-    // time gap...
-    "user0_AndroidGreenEchidna,AndroidGreenEchidna,0,1447965690000,2015-11-19 12:41:31.053",
-    "user0_MagentaKangaroo,MagentaKangaroo,4,1447965690000,2015-11-19 12:41:31.053",
-    "user2_AmberCockatoo,AmberCockatoo,13,1447965690000,2015-11-19 12:41:31.053",
-    "user18_BananaEmu,BananaEmu,7,1447965690000,2015-11-19 12:41:31.053",
-    "user3_BananaEmu,BananaEmu,17,1447965690000,2015-11-19 12:41:31.053",
-    "user18_BananaEmu,BananaEmu,1,1447965690000,2015-11-19 12:41:31.053",
-    "user18_ApricotCaneToad,ApricotCaneToad,14,1447965690000,2015-11-19 12:41:31.053"
-  };
-
-
-  static final List<String> GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY);
-
-
-  // Used to check the filtering.
-  static final KV[] FILTERED_EVENTS = new KV[] {
-      KV.of("user0_AndroidGreenEchidna", 0), KV.of("user0_MagentaKangaroo", 4),
-      KV.of("user2_AmberCockatoo", 13),
-      KV.of("user18_BananaEmu", 7), KV.of("user3_BananaEmu", 17),
-      KV.of("user18_BananaEmu", 1), KV.of("user18_ApricotCaneToad", 14)
-    };
-
-
-  /** Test the filtering. */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testUserScoresFilter() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    final Instant startMinTimestamp = new Instant(1447965680000L);
-
-    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
-
-    PCollection<KV<String, Integer>> output = input
-      .apply(ParDo.named("ParseGameEvent").of(new ParseEventFn()))
-
-      .apply("FilterStartTime", Filter.byPredicate(
-          (GameActionInfo gInfo)
-              -> gInfo.getTimestamp() > startMinTimestamp.getMillis()))
-      // run a map to access the fields in the result.
-      .apply(MapElements
-          .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
-          .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
-
-      DataflowAssert.that(output).containsInAnyOrder(FILTERED_EVENTS);
-
-    p.run();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
----------------------------------------------------------------------
diff --git a/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java b/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
deleted file mode 100644
index 641e2c3..0000000
--- a/java8examples/src/test/java/com/google/cloud/dataflow/examples/complete/game/UserScoreTest.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete.game;
-
-import com.google.cloud.dataflow.examples.complete.game.UserScore.ExtractAndSumScore;
-import com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo;
-import com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests of UserScore.
- */
-@RunWith(JUnit4.class)
-public class UserScoreTest implements Serializable {
-
-  static final String[] GAME_EVENTS_ARRAY = new String[] {
-    "user0_MagentaKangaroo,MagentaKangaroo,3,1447955630000,2015-11-19 09:53:53.444",
-    "user13_ApricotQuokka,ApricotQuokka,15,1447955630000,2015-11-19 09:53:53.444",
-    "user6_AmberNumbat,AmberNumbat,11,1447955630000,2015-11-19 09:53:53.444",
-    "user7_AlmondWallaby,AlmondWallaby,15,1447955630000,2015-11-19 09:53:53.444",
-    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,12,1447955630000,2015-11-19 09:53:53.444",
-    "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444",
-    "user7_AndroidGreenKookaburra,AndroidGreenKookaburra,11,1447955630000,2015-11-19 09:53:53.444",
-    "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444",
-    "user19_BisqueBilby,BisqueBilby,6,1447955630000,2015-11-19 09:53:53.444",
-    "user19_BisqueBilby,BisqueBilby,8,1447955630000,2015-11-19 09:53:53.444"
-  };
-
-    static final String[] GAME_EVENTS_ARRAY2 = new String[] {
-    "user6_AliceBlueDingo,AliceBlueDingo,4,xxxxxxx,2015-11-19 09:53:53.444",
-    "THIS IS A PARSE ERROR,2015-11-19 09:53:53.444",
-    "user13_BisqueBilby,BisqueBilby,xxx,1447955630000,2015-11-19 09:53:53.444"
-  };
-
-  static final List<String> GAME_EVENTS = Arrays.asList(GAME_EVENTS_ARRAY);
-  static final List<String> GAME_EVENTS2 = Arrays.asList(GAME_EVENTS_ARRAY2);
-
-  static final List<KV<String, Integer>> USER_SUMS = Arrays.asList(
-      KV.of("user0_MagentaKangaroo", 3), KV.of("user13_ApricotQuokka", 15),
-      KV.of("user6_AmberNumbat", 11), KV.of("user7_AlmondWallaby", 15),
-      KV.of("user7_AndroidGreenKookaburra", 23),
-      KV.of("user19_BisqueBilby", 14));
-
-  static final List<KV<String, Integer>> TEAM_SUMS = Arrays.asList(
-      KV.of("MagentaKangaroo", 3), KV.of("ApricotQuokka", 15),
-      KV.of("AmberNumbat", 11), KV.of("AlmondWallaby", 15),
-      KV.of("AndroidGreenKookaburra", 23),
-      KV.of("BisqueBilby", 14));
-
-  /** Test the ParseEventFn DoFn. */
-  @Test
-  public void testParseEventFn() {
-    DoFnTester<String, GameActionInfo> parseEventFn =
-        DoFnTester.of(new ParseEventFn());
-
-    List<GameActionInfo> results = parseEventFn.processBatch(GAME_EVENTS_ARRAY);
-    Assert.assertEquals(results.size(), 8);
-    Assert.assertEquals(results.get(0).getUser(), "user0_MagentaKangaroo");
-    Assert.assertEquals(results.get(0).getTeam(), "MagentaKangaroo");
-    Assert.assertEquals(results.get(0).getScore(), new Integer(3));
-  }
-
-  /** Tests ExtractAndSumScore("user"). */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testUserScoreSums() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
-
-    PCollection<KV<String, Integer>> output = input
-      .apply(ParDo.of(new ParseEventFn()))
-      // Extract and sum username/score pairs from the event data.
-      .apply("ExtractUserScore", new ExtractAndSumScore("user"));
-
-    // Check the user score sums.
-    DataflowAssert.that(output).containsInAnyOrder(USER_SUMS);
-
-    p.run();
-  }
-
-  /** Tests ExtractAndSumScore("team"). */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testTeamScoreSums() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
-
-    PCollection<KV<String, Integer>> output = input
-      .apply(ParDo.of(new ParseEventFn()))
-      // Extract and sum teamname/score pairs from the event data.
-      .apply("ExtractTeamScore", new ExtractAndSumScore("team"));
-
-    // Check the team score sums.
-    DataflowAssert.that(output).containsInAnyOrder(TEAM_SUMS);
-
-    p.run();
-  }
-
-  /** Test that bad input data is dropped appropriately. */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testUserScoresBadInput() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));
-
-    PCollection<KV<String, Integer>> extract = input
-      .apply(ParDo.of(new ParseEventFn()))
-      .apply(
-          MapElements.via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))
-          .withOutputType(new TypeDescriptor<KV<String, Integer>>() {}));
-
-    DataflowAssert.that(extract).empty();
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/11bb9e0e/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2d0a3e1..3803007 100644
--- a/pom.xml
+++ b/pom.xml
@@ -147,7 +147,7 @@
         <jdk>[1.8,)</jdk>
       </activation>
       <modules>
-        <module>java8examples</module>
+        <module>examples/java8</module>
       </modules>
     </profile>
     <profile>

[14/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
deleted file mode 100644
index b6a1493..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BatchTimerInternals.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Instant;
-
-import java.util.HashSet;
-import java.util.PriorityQueue;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * TimerInternals that uses priority queues to manage the timers that are ready to fire.
- */
-public class BatchTimerInternals implements TimerInternals {
-  /** Set of timers that are scheduled used for deduplicating timers. */
-  private Set<TimerData> existingTimers = new HashSet<>();
-
-  // Keep these queues separate so we can advance over them separately.
-  private PriorityQueue<TimerData> watermarkTimers = new PriorityQueue<>(11);
-  private PriorityQueue<TimerData> processingTimers = new PriorityQueue<>(11);
-
-  private Instant inputWatermarkTime;
-  private Instant processingTime;
-
-  private PriorityQueue<TimerData> queue(TimeDomain domain) {
-    return TimeDomain.EVENT_TIME.equals(domain) ? watermarkTimers : processingTimers;
-  }
-
-  public BatchTimerInternals(Instant processingTime) {
-    this.processingTime = processingTime;
-    this.inputWatermarkTime = BoundedWindow.TIMESTAMP_MIN_VALUE;
-  }
-
-  @Override
-  public void setTimer(TimerData timer) {
-    if (existingTimers.add(timer)) {
-      queue(timer.getDomain()).add(timer);
-    }
-  }
-
-  @Override
-  public void deleteTimer(TimerData timer) {
-    existingTimers.remove(timer);
-    queue(timer.getDomain()).remove(timer);
-  }
-
-  @Override
-  public Instant currentProcessingTime() {
-    return processingTime;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@link BoundedWindow#TIMESTAMP_MAX_VALUE}: in batch mode, upstream processing
-   * is already complete.
-   */
-  @Override
-  @Nullable
-  public Instant currentSynchronizedProcessingTime() {
-    return BoundedWindow.TIMESTAMP_MAX_VALUE;
-  }
-
-  @Override
-  public Instant currentInputWatermarkTime() {
-    return inputWatermarkTime;
-  }
-
-  @Override
-  @Nullable
-  public Instant currentOutputWatermarkTime() {
-    // The output watermark is always undefined in batch mode.
-    return null;
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(getClass())
-        .add("watermarkTimers", watermarkTimers)
-        .add("processingTimers", processingTimers)
-        .toString();
-  }
-
-  public void advanceInputWatermark(ReduceFnRunner<?, ?, ?, ?> runner, Instant newInputWatermark)
-      throws Exception {
-    Preconditions.checkState(!newInputWatermark.isBefore(inputWatermarkTime),
-        "Cannot move input watermark time backwards from %s to %s", inputWatermarkTime,
-        newInputWatermark);
-    inputWatermarkTime = newInputWatermark;
-    advance(runner, newInputWatermark, TimeDomain.EVENT_TIME);
-  }
-
-  public void advanceProcessingTime(ReduceFnRunner<?, ?, ?, ?> runner, Instant newProcessingTime)
-      throws Exception {
-    Preconditions.checkState(!newProcessingTime.isBefore(processingTime),
-        "Cannot move processing time backwards from %s to %s", processingTime, newProcessingTime);
-    processingTime = newProcessingTime;
-    advance(runner, newProcessingTime, TimeDomain.PROCESSING_TIME);
-  }
-
-  private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain)
-      throws Exception {
-    PriorityQueue<TimerData> timers = queue(domain);
-    boolean shouldFire = false;
-
-    do {
-      TimerData timer = timers.peek();
-      // Timers fire if the new time is ahead of the timer
-      shouldFire = timer != null && newTime.isAfter(timer.getTimestamp());
-      if (shouldFire) {
-        // Remove before firing, so that if the trigger adds another identical
-        // timer we don't remove it.
-        timers.remove();
-        runner.onTimer(timer);
-      }
-    } while (shouldFire);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
deleted file mode 100644
index cd51062..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableInserter.java
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.ExponentialBackOff;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableDataInsertAllRequest;
-import com.google.api.services.bigquery.model.TableDataInsertAllResponse;
-import com.google.api.services.bigquery.model.TableDataList;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.CreateDisposition;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO.Write.WriteDisposition;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.hadoop.util.ApiErrorExtractor;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-import com.google.common.util.concurrent.MoreExecutors;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.Nullable;
-
-/**
- * Inserts rows into BigQuery.
- */
-public class BigQueryTableInserter {
-  private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableInserter.class);
-
-  // Approximate amount of table data to upload per InsertAll request.
-  private static final long UPLOAD_BATCH_SIZE_BYTES = 64 * 1024;
-
-  // The maximum number of rows to upload per InsertAll request.
-  private static final long MAX_ROWS_PER_BATCH = 500;
-
-  // The maximum number of times to retry inserting rows into BigQuery.
-  private static final int MAX_INSERT_ATTEMPTS = 5;
-
-  // The initial backoff after a failure inserting rows into BigQuery.
-  private static final long INITIAL_INSERT_BACKOFF_INTERVAL_MS = 200L;
-
-  private final Bigquery client;
-  private final TableReference defaultRef;
-  private final long maxRowsPerBatch;
-
-  private static final ExecutorService executor = MoreExecutors.getExitingExecutorService(
-      (ThreadPoolExecutor) Executors.newFixedThreadPool(100), 10, TimeUnit.SECONDS);
-
-  /**
-   * Constructs a new row inserter.
-   *
-   * @param client a BigQuery client
-   */
-  public BigQueryTableInserter(Bigquery client) {
-    this.client = client;
-    this.defaultRef = null;
-    this.maxRowsPerBatch = MAX_ROWS_PER_BATCH;
-  }
-
-  /**
-   * Constructs a new row inserter.
-   *
-   * @param client a BigQuery client
-   * @param defaultRef identifies the table to insert into
-   * @deprecated replaced by {@link #BigQueryTableInserter(Bigquery)}
-   */
-  @Deprecated
-  public BigQueryTableInserter(Bigquery client, TableReference defaultRef) {
-    this.client = client;
-    this.defaultRef = defaultRef;
-    this.maxRowsPerBatch = MAX_ROWS_PER_BATCH;
-  }
-
-  /**
-   * Constructs a new row inserter.
-   *
-   * @param client a BigQuery client
-   */
-  public BigQueryTableInserter(Bigquery client, int maxRowsPerBatch) {
-    this.client = client;
-    this.defaultRef = null;
-    this.maxRowsPerBatch = maxRowsPerBatch;
-  }
-
-  /**
-   * Constructs a new row inserter.
-   *
-   * @param client a BigQuery client
-   * @param defaultRef identifies the default table to insert into
-   * @deprecated replaced by {@link #BigQueryTableInserter(Bigquery, int)}
-   */
-  @Deprecated
-  public BigQueryTableInserter(Bigquery client, TableReference defaultRef, int maxRowsPerBatch) {
-    this.client = client;
-    this.defaultRef = defaultRef;
-    this.maxRowsPerBatch = maxRowsPerBatch;
-  }
-
-  /**
-   * Insert all rows from the given list.
-   *
-   * @deprecated replaced by {@link #insertAll(TableReference, List)}
-   */
-  @Deprecated
-  public void insertAll(List<TableRow> rowList) throws IOException {
-    insertAll(defaultRef, rowList, null, null);
-  }
-
-  /**
-   * Insert all rows from the given list using specified insertIds if not null.
-   *
-   * @deprecated replaced by {@link #insertAll(TableReference, List, List)}
-   */
-  @Deprecated
-  public void insertAll(List<TableRow> rowList,
-      @Nullable List<String> insertIdList) throws IOException {
-    insertAll(defaultRef, rowList, insertIdList, null);
-  }
-
-  /**
-   * Insert all rows from the given list.
-   */
-  public void insertAll(TableReference ref, List<TableRow> rowList) throws IOException {
-    insertAll(ref, rowList, null, null);
-  }
-
-  /**
-   * Insert all rows from the given list using specified insertIds if not null. Track count of
-   * bytes written with the Aggregator.
-   */
-  public void insertAll(TableReference ref, List<TableRow> rowList,
-      @Nullable List<String> insertIdList, Aggregator<Long, Long> byteCountAggregator)
-      throws IOException {
-    Preconditions.checkNotNull(ref, "ref");
-    if (insertIdList != null && rowList.size() != insertIdList.size()) {
-      throw new AssertionError("If insertIdList is not null it needs to have at least "
-          + "as many elements as rowList");
-    }
-
-    AttemptBoundedExponentialBackOff backoff = new AttemptBoundedExponentialBackOff(
-        MAX_INSERT_ATTEMPTS,
-        INITIAL_INSERT_BACKOFF_INTERVAL_MS);
-
-    List<TableDataInsertAllResponse.InsertErrors> allErrors = new ArrayList<>();
-    // These lists contain the rows to publish. Initially the contain the entire list. If there are
-    // failures, they will contain only the failed rows to be retried.
-    List<TableRow> rowsToPublish = rowList;
-    List<String> idsToPublish = insertIdList;
-    while (true) {
-      List<TableRow> retryRows = new ArrayList<>();
-      List<String> retryIds = (idsToPublish != null) ? new ArrayList<String>() : null;
-
-      int strideIndex = 0;
-      // Upload in batches.
-      List<TableDataInsertAllRequest.Rows> rows = new LinkedList<>();
-      int dataSize = 0;
-
-      List<Future<List<TableDataInsertAllResponse.InsertErrors>>> futures = new ArrayList<>();
-      List<Integer> strideIndices = new ArrayList<>();
-
-      for (int i = 0; i < rowsToPublish.size(); ++i) {
-        TableRow row = rowsToPublish.get(i);
-        TableDataInsertAllRequest.Rows out = new TableDataInsertAllRequest.Rows();
-        if (idsToPublish != null) {
-          out.setInsertId(idsToPublish.get(i));
-        }
-        out.setJson(row.getUnknownKeys());
-        rows.add(out);
-
-        dataSize += row.toString().length();
-        if (dataSize >= UPLOAD_BATCH_SIZE_BYTES || rows.size() >= maxRowsPerBatch ||
-            i == rowsToPublish.size() - 1) {
-          TableDataInsertAllRequest content = new TableDataInsertAllRequest();
-          content.setRows(rows);
-
-          final Bigquery.Tabledata.InsertAll insert = client.tabledata()
-              .insertAll(ref.getProjectId(), ref.getDatasetId(), ref.getTableId(),
-                  content);
-
-          futures.add(
-              executor.submit(new Callable<List<TableDataInsertAllResponse.InsertErrors>>() {
-                @Override
-                public List<TableDataInsertAllResponse.InsertErrors> call() throws IOException {
-                  return insert.execute().getInsertErrors();
-                }
-              }));
-          strideIndices.add(strideIndex);
-
-          if (byteCountAggregator != null) {
-            byteCountAggregator.addValue(Long.valueOf(dataSize));
-          }
-          dataSize = 0;
-          strideIndex = i + 1;
-          rows = new LinkedList<>();
-        }
-      }
-
-      try {
-        for (int i = 0; i < futures.size(); i++) {
-          List<TableDataInsertAllResponse.InsertErrors> errors = futures.get(i).get();
-          if (errors != null) {
-            for (TableDataInsertAllResponse.InsertErrors error : errors) {
-              allErrors.add(error);
-              if (error.getIndex() == null) {
-                throw new IOException("Insert failed: " + allErrors);
-              }
-
-              int errorIndex = error.getIndex().intValue() + strideIndices.get(i);
-              retryRows.add(rowsToPublish.get(errorIndex));
-              if (retryIds != null) {
-                retryIds.add(idsToPublish.get(errorIndex));
-              }
-            }
-          }
-        }
-      } catch (InterruptedException e) {
-        throw new IOException("Interrupted while inserting " + rowsToPublish);
-      } catch (ExecutionException e) {
-        Throwables.propagate(e.getCause());
-      }
-
-      if (!allErrors.isEmpty() && !backoff.atMaxAttempts()) {
-        try {
-          Thread.sleep(backoff.nextBackOffMillis());
-        } catch (InterruptedException e) {
-          throw new IOException("Interrupted while waiting before retrying insert of " + retryRows);
-        }
-        LOG.info("Retrying failed inserts to BigQuery");
-        rowsToPublish = retryRows;
-        idsToPublish = retryIds;
-        allErrors.clear();
-      } else {
-        break;
-      }
-    }
-    if (!allErrors.isEmpty()) {
-      throw new IOException("Insert failed: " + allErrors);
-    }
-  }
-
-  /**
-   * Retrieves or creates the table.
-   *
-   * <p>The table is checked to conform to insertion requirements as specified
-   * by WriteDisposition and CreateDisposition.
-   *
-   * <p>If table truncation is requested (WriteDisposition.WRITE_TRUNCATE), then
-   * this will re-create the table if necessary to ensure it is empty.
-   *
-   * <p>If an empty table is required (WriteDisposition.WRITE_EMPTY), then this
-   * will fail if the table exists and is not empty.
-   *
-   * <p>When constructing a table, a {@code TableSchema} must be available.  If a
-   * schema is provided, then it will be used.  If no schema is provided, but
-   * an existing table is being cleared (WRITE_TRUNCATE option above), then
-   * the existing schema will be re-used.  If no schema is available, then an
-   * {@code IOException} is thrown.
-   */
-  public Table getOrCreateTable(
-      TableReference ref,
-      WriteDisposition writeDisposition,
-      CreateDisposition createDisposition,
-      @Nullable TableSchema schema) throws IOException {
-    // Check if table already exists.
-    Bigquery.Tables.Get get = client.tables()
-        .get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-    Table table = null;
-    try {
-      table = get.execute();
-    } catch (IOException e) {
-      ApiErrorExtractor errorExtractor = new ApiErrorExtractor();
-      if (!errorExtractor.itemNotFound(e) ||
-          createDisposition != CreateDisposition.CREATE_IF_NEEDED) {
-        // Rethrow.
-        throw e;
-      }
-    }
-
-    // If we want an empty table, and it isn't, then delete it first.
-    if (table != null) {
-      if (writeDisposition == WriteDisposition.WRITE_APPEND) {
-        return table;
-      }
-
-      boolean empty = isEmpty(ref);
-      if (empty) {
-        if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
-          LOG.info("Empty table found, not removing {}", BigQueryIO.toTableSpec(ref));
-        }
-        return table;
-
-      } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
-        throw new IOException("WriteDisposition is WRITE_EMPTY, "
-            + "but table is not empty");
-      }
-
-      // Reuse the existing schema if none was provided.
-      if (schema == null) {
-        schema = table.getSchema();
-      }
-
-      // Delete table and fall through to re-creating it below.
-      LOG.info("Deleting table {}", BigQueryIO.toTableSpec(ref));
-      Bigquery.Tables.Delete delete = client.tables()
-          .delete(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-      delete.execute();
-    }
-
-    if (schema == null) {
-      throw new IllegalArgumentException(
-          "Table schema required for new table.");
-    }
-
-    // Create the table.
-    return tryCreateTable(ref, schema);
-  }
-
-  /**
-   * Checks if a table is empty.
-   */
-  public boolean isEmpty(TableReference ref) throws IOException {
-    Bigquery.Tabledata.List list = client.tabledata()
-        .list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-    list.setMaxResults(1L);
-    TableDataList dataList = list.execute();
-
-    return dataList.getRows() == null || dataList.getRows().isEmpty();
-  }
-
-  /**
-   * Retry table creation up to 5 minutes (with exponential backoff) when this user is near the
-   * quota for table creation. This relatively innocuous behavior can happen when BigQueryIO is
-   * configured with a table spec function to use different tables for each window.
-   */
-  private static final int RETRY_CREATE_TABLE_DURATION_MILLIS = (int) TimeUnit.MINUTES.toMillis(5);
-
-  /**
-   * Tries to create the BigQuery table.
-   * If a table with the same name already exists in the dataset, the table
-   * creation fails, and the function returns null.  In such a case,
-   * the existing table doesn't necessarily have the same schema as specified
-   * by the parameter.
-   *
-   * @param schema Schema of the new BigQuery table.
-   * @return The newly created BigQuery table information, or null if the table
-   *     with the same name already exists.
-   * @throws IOException if other error than already existing table occurs.
-   */
-  @Nullable
-  public Table tryCreateTable(TableReference ref, TableSchema schema) throws IOException {
-    LOG.info("Trying to create BigQuery table: {}", BigQueryIO.toTableSpec(ref));
-    BackOff backoff =
-        new ExponentialBackOff.Builder()
-            .setMaxElapsedTimeMillis(RETRY_CREATE_TABLE_DURATION_MILLIS)
-            .build();
-
-    Table table = new Table().setTableReference(ref).setSchema(schema);
-    return tryCreateTable(table, ref.getProjectId(), ref.getDatasetId(), backoff, Sleeper.DEFAULT);
-  }
-
-  @VisibleForTesting
-  @Nullable
-  Table tryCreateTable(
-      Table table, String projectId, String datasetId, BackOff backoff, Sleeper sleeper)
-          throws IOException {
-    boolean retry = false;
-    while (true) {
-      try {
-        return client.tables().insert(projectId, datasetId, table).execute();
-      } catch (IOException e) {
-        ApiErrorExtractor extractor = new ApiErrorExtractor();
-        if (extractor.itemAlreadyExists(e)) {
-          // The table already exists, nothing to return.
-          return null;
-        } else if (extractor.rateLimited(e)) {
-          // The request failed because we hit a temporary quota. Back off and try again.
-          try {
-            if (BackOffUtils.next(sleeper, backoff)) {
-              if (!retry) {
-                LOG.info(
-                    "Quota limit reached when creating table {}:{}.{}, retrying up to {} minutes",
-                    projectId,
-                    datasetId,
-                    table.getTableReference().getTableId(),
-                    TimeUnit.MILLISECONDS.toSeconds(RETRY_CREATE_TABLE_DURATION_MILLIS) / 60.0);
-                retry = true;
-              }
-              continue;
-            }
-          } catch (InterruptedException e1) {
-            // Restore interrupted state and throw the last failure.
-            Thread.currentThread().interrupt();
-            throw e;
-          }
-        }
-        throw e;
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
deleted file mode 100644
index c2c80f7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BigQueryTableRowIterator.java
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.BackOffUtils;
-import com.google.api.client.util.ClassInfo;
-import com.google.api.client.util.Data;
-import com.google.api.client.util.Sleeper;
-import com.google.api.services.bigquery.Bigquery;
-import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
-import com.google.api.services.bigquery.model.Dataset;
-import com.google.api.services.bigquery.model.DatasetReference;
-import com.google.api.services.bigquery.model.ErrorProto;
-import com.google.api.services.bigquery.model.Job;
-import com.google.api.services.bigquery.model.JobConfiguration;
-import com.google.api.services.bigquery.model.JobConfigurationQuery;
-import com.google.api.services.bigquery.model.JobReference;
-import com.google.api.services.bigquery.model.JobStatus;
-import com.google.api.services.bigquery.model.Table;
-import com.google.api.services.bigquery.model.TableCell;
-import com.google.api.services.bigquery.model.TableDataList;
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Duration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Objects;
-import java.util.Random;
-
-import javax.annotation.Nullable;
-
-/**
- * Iterates over all rows in a table.
- */
-public class BigQueryTableRowIterator implements AutoCloseable {
-  private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableRowIterator.class);
-
-  @Nullable private TableReference ref;
-  @Nullable private final String projectId;
-  @Nullable private TableSchema schema;
-  private final Bigquery client;
-  private String pageToken;
-  private Iterator<TableRow> iteratorOverCurrentBatch;
-  private TableRow current;
-  // Set true when the final page is seen from the service.
-  private boolean lastPage = false;
-
-  // The maximum number of times a BigQuery request will be retried
-  private static final int MAX_RETRIES = 3;
-  // Initial wait time for the backoff implementation
-  private static final Duration INITIAL_BACKOFF_TIME = Duration.standardSeconds(1);
-
-  // After sending a query to BQ service we will be polling the BQ service to check the status with
-  // following interval to check the status of query execution job
-  private static final Duration QUERY_COMPLETION_POLL_TIME = Duration.standardSeconds(1);
-
-  private final String query;
-  // Whether to flatten query results.
-  private final boolean flattenResults;
-  // Temporary dataset used to store query results.
-  private String temporaryDatasetId = null;
-  // Temporary table used to store query results.
-  private String temporaryTableId = null;
-
-  private BigQueryTableRowIterator(
-      @Nullable TableReference ref, @Nullable String query, @Nullable String projectId,
-      Bigquery client, boolean flattenResults) {
-    this.ref = ref;
-    this.query = query;
-    this.projectId = projectId;
-    this.client = checkNotNull(client, "client");
-    this.flattenResults = flattenResults;
-  }
-
-  /**
-   * Constructs a {@code BigQueryTableRowIterator} that reads from the specified table.
-   */
-  public static BigQueryTableRowIterator fromTable(TableReference ref, Bigquery client) {
-    checkNotNull(ref, "ref");
-    checkNotNull(client, "client");
-    return new BigQueryTableRowIterator(ref, null, ref.getProjectId(), client, true);
-  }
-
-  /**
-   * Constructs a {@code BigQueryTableRowIterator} that reads from the results of executing the
-   * specified query in the specified project.
-   */
-  public static BigQueryTableRowIterator fromQuery(
-      String query, String projectId, Bigquery client, @Nullable Boolean flattenResults) {
-    checkNotNull(query, "query");
-    checkNotNull(projectId, "projectId");
-    checkNotNull(client, "client");
-    return new BigQueryTableRowIterator(null, query, projectId, client,
-        MoreObjects.firstNonNull(flattenResults, Boolean.TRUE));
-  }
-
-  /**
-   * Opens the table for read.
-   * @throws IOException on failure
-   */
-  public void open() throws IOException, InterruptedException {
-    if (query != null) {
-      ref = executeQueryAndWaitForCompletion();
-    }
-    // Get table schema.
-    Bigquery.Tables.Get get =
-        client.tables().get(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-
-    Table table =
-        executeWithBackOff(
-            get,
-            "Error opening BigQuery table  %s of dataset %s  : {}",
-            ref.getTableId(),
-            ref.getDatasetId());
-    schema = table.getSchema();
-  }
-
-  public boolean advance() throws IOException, InterruptedException {
-    while (true) {
-      if (iteratorOverCurrentBatch != null && iteratorOverCurrentBatch.hasNext()) {
-        // Embed schema information into the raw row, so that values have an
-        // associated key.  This matches how rows are read when using the
-        // DataflowPipelineRunner.
-        current = getTypedTableRow(schema.getFields(), iteratorOverCurrentBatch.next());
-        return true;
-      }
-      if (lastPage) {
-        return false;
-      }
-
-      Bigquery.Tabledata.List list =
-          client.tabledata().list(ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-      if (pageToken != null) {
-        list.setPageToken(pageToken);
-      }
-
-      TableDataList result =
-          executeWithBackOff(
-              list,
-              "Error reading from BigQuery table %s of dataset %s : {}",
-              ref.getTableId(),
-              ref.getDatasetId());
-
-      pageToken = result.getPageToken();
-      iteratorOverCurrentBatch =
-          result.getRows() != null
-              ? result.getRows().iterator()
-              : Collections.<TableRow>emptyIterator();
-
-      // The server may return a page token indefinitely on a zero-length table.
-      if (pageToken == null || result.getTotalRows() != null && result.getTotalRows() == 0) {
-        lastPage = true;
-      }
-    }
-  }
-
-  public TableRow getCurrent() {
-    if (current == null) {
-      throw new NoSuchElementException();
-    }
-    return current;
-  }
-
-  /**
-   * Adjusts a field returned from the BigQuery API to match what we will receive when running
-   * BigQuery's export-to-GCS and parallel read, which is the efficient parallel implementation
-   * used for batch jobs executed on the Cloud Dataflow service.
-   *
-   * <p>The following is the relationship between BigQuery schema and Java types:
-   *
-   * <ul>
-   *   <li>Nulls are {@code null}.
-   *   <li>Repeated fields are {@code List} of objects.
-   *   <li>Record columns are {@link TableRow} objects.
-   *   <li>{@code BOOLEAN} columns are JSON booleans, hence Java {@code Boolean} objects.
-   *   <li>{@code FLOAT} columns are JSON floats, hence Java {@code Double} objects.
-   *   <li>{@code TIMESTAMP} columns are {@code String} objects that are of the format
-   *       {@code yyyy-MM-dd HH:mm:ss[.SSSSSS] UTC}, where the {@code .SSSSSS} has no trailing
-   *       zeros and can be 1 to 6 digits long.
-   *   <li>Every other atomic type is a {@code String}.
-   * </ul>
-   *
-   * <p>Note that integers are encoded as strings to match BigQuery's exported JSON format.
-   *
-   * <p>Finally, values are stored in the {@link TableRow} as {"field name": value} pairs
-   * and are not accessible through the {@link TableRow#getF} function.
-   */
-  @Nullable private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
-    if (Data.isNull(v)) {
-      return null;
-    }
-
-    if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
-      TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
-      @SuppressWarnings("unchecked")
-      List<Map<String, Object>> rawCells = (List<Map<String, Object>>) v;
-      ImmutableList.Builder<Object> values = ImmutableList.builder();
-      for (Map<String, Object> element : rawCells) {
-        values.add(getTypedCellValue(elementSchema, element.get("v")));
-      }
-      return values.build();
-    }
-
-    if (fieldSchema.getType().equals("RECORD")) {
-      @SuppressWarnings("unchecked")
-      Map<String, Object> typedV = (Map<String, Object>) v;
-      return getTypedTableRow(fieldSchema.getFields(), typedV);
-    }
-
-    if (fieldSchema.getType().equals("FLOAT")) {
-      return Double.parseDouble((String) v);
-    }
-
-    if (fieldSchema.getType().equals("BOOLEAN")) {
-      return Boolean.parseBoolean((String) v);
-    }
-
-    if (fieldSchema.getType().equals("TIMESTAMP")) {
-      return AvroUtils.formatTimestamp((String) v);
-    }
-
-    return v;
-  }
-
-  /**
-   * A list of the field names that cannot be used in BigQuery tables processed by Dataflow,
-   * because they are reserved keywords in {@link TableRow}.
-   */
-  // TODO: This limitation is unfortunate. We need to give users a way to use BigQueryIO that does
-  // not indirect through our broken use of {@link TableRow}.
-  //     See discussion: https://github.com/GoogleCloudPlatform/DataflowJavaSDK/pull/41
-  private static final Collection<String> RESERVED_FIELD_NAMES =
-      ClassInfo.of(TableRow.class).getNames();
-
-  /**
-   * Converts a row returned from the BigQuery JSON API as a {@code Map<String, Object>} into a
-   * Java {@link TableRow} with nested {@link TableCell TableCells}. The {@code Object} values in
-   * the cells are converted to Java types according to the provided field schemas.
-   *
-   * <p>See {@link #getTypedCellValue(TableFieldSchema, Object)} for details on how BigQuery
-   * types are mapped to Java types.
-   */
-  private TableRow getTypedTableRow(List<TableFieldSchema> fields, Map<String, Object> rawRow) {
-    // If rawRow is a TableRow, use it. If not, create a new one.
-    TableRow row;
-    List<? extends Map<String, Object>> cells;
-    if (rawRow instanceof TableRow) {
-      // Since rawRow is a TableRow it already has TableCell objects in setF. We do not need to do
-      // any type conversion, but extract the cells for cell-wise processing below.
-      row = (TableRow) rawRow;
-      cells = row.getF();
-      // Clear the cells from the row, so that row.getF() will return null. This matches the
-      // behavior of rows produced by the BigQuery export API used on the service.
-      row.setF(null);
-    } else {
-      row = new TableRow();
-
-      // Since rawRow is a Map<String, Object> we use Map.get("f") instead of TableRow.getF() to
-      // get its cells. Similarly, when rawCell is a Map<String, Object> instead of a TableCell,
-      // we will use Map.get("v") instead of TableCell.getV() get its value.
-      @SuppressWarnings("unchecked")
-      List<? extends Map<String, Object>> rawCells =
-          (List<? extends Map<String, Object>>) rawRow.get("f");
-      cells = rawCells;
-    }
-
-    checkState(cells.size() == fields.size(),
-        "Expected that the row has the same number of cells %s as fields in the schema %s",
-        cells.size(), fields.size());
-
-    // Loop through all the fields in the row, normalizing their types with the TableFieldSchema
-    // and storing the normalized values by field name in the Map<String, Object> that
-    // underlies the TableRow.
-    Iterator<? extends Map<String, Object>> cellIt = cells.iterator();
-    Iterator<TableFieldSchema> fieldIt = fields.iterator();
-    while (cellIt.hasNext()) {
-      Map<String, Object> cell = cellIt.next();
-      TableFieldSchema fieldSchema = fieldIt.next();
-
-      // Convert the object in this cell to the Java type corresponding to its type in the schema.
-      Object convertedValue = getTypedCellValue(fieldSchema, cell.get("v"));
-
-      String fieldName = fieldSchema.getName();
-      checkArgument(!RESERVED_FIELD_NAMES.contains(fieldName),
-          "BigQueryIO does not support records with columns named %s", fieldName);
-
-      if (convertedValue == null) {
-        // BigQuery does not include null values when the export operation (to JSON) is used.
-        // To match that behavior, BigQueryTableRowiterator, and the DirectPipelineRunner,
-        // intentionally omits columns with null values.
-        continue;
-      }
-
-      row.set(fieldName, convertedValue);
-    }
-    return row;
-  }
-
-  // Create a new BigQuery dataset
-  private void createDataset(String datasetId) throws IOException, InterruptedException {
-    Dataset dataset = new Dataset();
-    DatasetReference reference = new DatasetReference();
-    reference.setProjectId(projectId);
-    reference.setDatasetId(datasetId);
-    dataset.setDatasetReference(reference);
-
-    String createDatasetError =
-        "Error when trying to create the temporary dataset " + datasetId + " in project "
-        + projectId;
-    executeWithBackOff(
-        client.datasets().insert(projectId, dataset), createDatasetError + " :{}");
-  }
-
-  // Delete the given table that is available in the given dataset.
-  private void deleteTable(String datasetId, String tableId)
-      throws IOException, InterruptedException {
-    executeWithBackOff(
-        client.tables().delete(projectId, datasetId, tableId),
-        "Error when trying to delete the temporary table " + datasetId + " in dataset " + datasetId
-        + " of project " + projectId + ". Manual deletion may be required. Error message : {}");
-  }
-
-  // Delete the given dataset. This will fail if the given dataset has any tables.
-  private void deleteDataset(String datasetId) throws IOException, InterruptedException {
-    executeWithBackOff(
-        client.datasets().delete(projectId, datasetId),
-        "Error when trying to delete the temporary dataset " + datasetId + " in project "
-        + projectId + ". Manual deletion may be required. Error message : {}");
-  }
-
-  /**
-   * Executes the specified query and returns a reference to the temporary BigQuery table created
-   * to hold the results.
-   *
-   * @throws IOException if the query fails.
-   */
-  private TableReference executeQueryAndWaitForCompletion()
-      throws IOException, InterruptedException {
-    // Create a temporary dataset to store results.
-    // Starting dataset name with an "_" so that it is hidden.
-    Random rnd = new Random(System.currentTimeMillis());
-    temporaryDatasetId = "_dataflow_temporary_dataset_" + rnd.nextInt(1000000);
-    temporaryTableId = "dataflow_temporary_table_" + rnd.nextInt(1000000);
-
-    createDataset(temporaryDatasetId);
-    Job job = new Job();
-    JobConfiguration config = new JobConfiguration();
-    JobConfigurationQuery queryConfig = new JobConfigurationQuery();
-    config.setQuery(queryConfig);
-    job.setConfiguration(config);
-    queryConfig.setQuery(query);
-    queryConfig.setAllowLargeResults(true);
-    queryConfig.setFlattenResults(flattenResults);
-
-    TableReference destinationTable = new TableReference();
-    destinationTable.setProjectId(projectId);
-    destinationTable.setDatasetId(temporaryDatasetId);
-    destinationTable.setTableId(temporaryTableId);
-    queryConfig.setDestinationTable(destinationTable);
-
-    Insert insert = client.jobs().insert(projectId, job);
-    Job queryJob = executeWithBackOff(
-        insert, "Error when trying to execute the job for query " + query + " :{}");
-    JobReference jobId = queryJob.getJobReference();
-
-    while (true) {
-      Job pollJob = executeWithBackOff(
-          client.jobs().get(projectId, jobId.getJobId()),
-          "Error when trying to get status of the job for query " + query + " :{}");
-      JobStatus status = pollJob.getStatus();
-      if (status.getState().equals("DONE")) {
-        // Job is DONE, but did not necessarily succeed.
-        ErrorProto error = status.getErrorResult();
-        if (error == null) {
-          return pollJob.getConfiguration().getQuery().getDestinationTable();
-        } else {
-          // There will be no temporary table to delete, so null out the reference.
-          temporaryTableId = null;
-          throw new IOException("Executing query " + query + " failed: " + error.getMessage());
-        }
-      }
-      try {
-        Thread.sleep(QUERY_COMPLETION_POLL_TIME.getMillis());
-      } catch (InterruptedException e) {
-        e.printStackTrace();
-      }
-    }
-  }
-
-  // Execute a BQ request with exponential backoff and return the result.
-  // client - BQ request to be executed
-  // error - Formatted message to log if when a request fails. Takes exception message as a
-  // formatter parameter.
-  public static <T> T executeWithBackOff(AbstractGoogleClientRequest<T> client, String error,
-      Object... errorArgs) throws IOException, InterruptedException {
-    Sleeper sleeper = Sleeper.DEFAULT;
-    BackOff backOff =
-        new AttemptBoundedExponentialBackOff(MAX_RETRIES, INITIAL_BACKOFF_TIME.getMillis());
-
-    T result = null;
-    while (true) {
-      try {
-        result = client.execute();
-        break;
-      } catch (IOException e) {
-        LOG.error(String.format(error, errorArgs), e.getMessage());
-        if (!BackOffUtils.next(sleeper, backOff)) {
-          LOG.error(
-              String.format(error, errorArgs), "Failing after retrying " + MAX_RETRIES + " times.");
-          throw e;
-        }
-      }
-    }
-
-    return result;
-  }
-
-  @Override
-  public void close() {
-    // Prevent any further requests.
-    lastPage = true;
-
-    try {
-      // Deleting temporary table and dataset that gets generated when executing a query.
-      if (temporaryDatasetId != null) {
-        if (temporaryTableId != null) {
-          deleteTable(temporaryDatasetId, temporaryTableId);
-        }
-        deleteDataset(temporaryDatasetId);
-      }
-    } catch (IOException | InterruptedException e) {
-      throw new RuntimeException(e);
-    }
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
deleted file mode 100644
index f3a039a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BitSetCoder.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.BitSet;
-
-/**
- * Coder for the BitSet used to track child-trigger finished states.
- */
-class BitSetCoder extends AtomicCoder<BitSet> {
-
-  private static final BitSetCoder INSTANCE = new BitSetCoder();
-  private transient ByteArrayCoder byteArrayCoder = ByteArrayCoder.of();
-
-  private BitSetCoder() {}
-
-  public static BitSetCoder of() {
-    return INSTANCE;
-  }
-
-  @Override
-  public void encode(BitSet value, OutputStream outStream, Context context)
-      throws CoderException, IOException {
-    byteArrayCoder.encodeAndOwn(value.toByteArray(), outStream, context);
-  }
-
-  @Override
-  public BitSet decode(InputStream inStream, Context context)
-      throws CoderException, IOException {
-    return BitSet.valueOf(byteArrayCoder.decode(inStream, context));
-  }
-
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    verifyDeterministic(
-        "BitSetCoder requires its byteArrayCoder to be deterministic.",
-        byteArrayCoder);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java
deleted file mode 100644
index e8e693a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BufferedElementCountingOutputStream.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-
-import javax.annotation.concurrent.NotThreadSafe;
-
-/**
- * Provides an efficient encoding for {@link Iterable}s containing small values by
- * buffering up to {@code bufferSize} bytes of data before prefixing the count.
- * Note that each element needs to be encoded in a nested context. See
- * {@link Context Coder.Context} for more details.
- *
- * <p>To use this stream:
- * <pre><code>
- * BufferedElementCountingOutputStream os = ...
- * for (Element E : elements) {
- *   os.markElementStart();
- *   // write an element to os
- * }
- * os.finish();
- * </code></pre>
- *
- * <p>The resulting output stream is:
- * <pre>
- * countA element(0) element(1) ... element(countA - 1)
- * countB element(0) element(1) ... element(countB - 1)
- * ...
- * countX element(0) element(1) ... element(countX - 1)
- * countY
- * </pre>
- *
- * <p>To read this stream:
- * <pre><code>
- * InputStream is = ...
- * long count;
- * do {
- *   count = VarInt.decodeLong(is);
- *   for (int i = 0; i < count; ++i) {
- *     // read an element from is
- *   }
- * } while(count > 0);
- * </code></pre>
- *
- * <p>The counts are encoded as variable length longs. See {@link VarInt#encode(long, OutputStream)}
- * for more details. The end of the iterable is detected by reading a count of 0.
- */
-@NotThreadSafe
-public class BufferedElementCountingOutputStream extends OutputStream {
-  public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
-  private final ByteBuffer buffer;
-  private final OutputStream os;
-  private boolean finished;
-  private long count;
-
-  /**
-   * Creates an output stream which encodes the number of elements output to it in a streaming
-   * manner.
-   */
-  public BufferedElementCountingOutputStream(OutputStream os) {
-    this(os, DEFAULT_BUFFER_SIZE);
-  }
-
-  /**
-   * Creates an output stream which encodes the number of elements output to it in a streaming
-   * manner with the given {@code bufferSize}.
-   */
-  BufferedElementCountingOutputStream(OutputStream os, int bufferSize) {
-    this.buffer = ByteBuffer.allocate(bufferSize);
-    this.os = os;
-    this.finished = false;
-    this.count = 0;
-  }
-
-  /**
-   * Finishes the encoding by flushing any buffered data,
-   * and outputting a final count of 0.
-   */
-  public void finish() throws IOException {
-    if (finished) {
-      return;
-    }
-    flush();
-    // Finish the stream by stating that there are 0 elements that follow.
-    VarInt.encode(0, os);
-    finished = true;
-  }
-
-  /**
-   * Marks that a new element is being output. This allows this output stream
-   * to use the buffer if it had previously overflowed marking the start of a new
-   * block of elements.
-   */
-  public void markElementStart() throws IOException {
-    if (finished) {
-      throw new IOException("Stream has been finished. Can not add any more elements.");
-    }
-    count++;
-  }
-
-  @Override
-  public void write(int b) throws IOException {
-    if (finished) {
-      throw new IOException("Stream has been finished. Can not write any more data.");
-    }
-    if (count == 0) {
-      os.write(b);
-      return;
-    }
-
-    if (buffer.hasRemaining()) {
-      buffer.put((byte) b);
-    } else {
-      outputBuffer();
-      os.write(b);
-    }
-  }
-
-  @Override
-  public void write(byte[] b, int off, int len) throws IOException {
-    if (finished) {
-      throw new IOException("Stream has been finished. Can not write any more data.");
-    }
-    if (count == 0) {
-      os.write(b, off, len);
-      return;
-    }
-
-    if (buffer.remaining() >= len) {
-      buffer.put(b, off, len);
-    } else {
-      outputBuffer();
-      os.write(b, off, len);
-    }
-  }
-
-  @Override
-  public void flush() throws IOException {
-    if (finished) {
-      return;
-    }
-    outputBuffer();
-    os.flush();
-  }
-
-  @Override
-  public void close() throws IOException {
-    finish();
-    os.close();
-  }
-
-  // Output the buffer if it contains any data.
-  private void outputBuffer() throws IOException {
-    if (count > 0) {
-      VarInt.encode(count, os);
-      // We are using a heap based buffer and not a direct buffer so it is safe to access
-      // the underlying array.
-      os.write(buffer.array(), buffer.arrayOffset(), buffer.position());
-      buffer.clear();
-      // The buffer has been flushed so we must write to the underlying stream until
-      // we learn of the next element. We reset the count to zero marking that we should
-      // not use the buffer.
-      count = 0;
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
deleted file mode 100644
index 8b41eb8..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudKnownType.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/** A utility for manipulating well-known cloud types. */
-enum CloudKnownType {
-  TEXT("http://schema.org/Text", String.class) {
-    @Override
-    public <T> T parse(Object value, Class<T> clazz) {
-      return clazz.cast(value);
-    }
-  },
-  BOOLEAN("http://schema.org/Boolean", Boolean.class) {
-    @Override
-    public <T> T parse(Object value, Class<T> clazz) {
-      return clazz.cast(value);
-    }
-  },
-  INTEGER("http://schema.org/Integer", Long.class, Integer.class) {
-    @Override
-    public <T> T parse(Object value, Class<T> clazz) {
-      Object result = null;
-      if (value.getClass() == clazz) {
-        result = value;
-      } else if (clazz == Long.class) {
-        if (value instanceof Integer) {
-          result = ((Integer) value).longValue();
-        } else if (value instanceof String) {
-          result = Long.valueOf((String) value);
-        }
-      } else if (clazz == Integer.class) {
-        if (value instanceof Long) {
-          result = ((Long) value).intValue();
-        } else if (value instanceof String) {
-          result = Integer.valueOf((String) value);
-        }
-      }
-      return clazz.cast(result);
-    }
-  },
-  FLOAT("http://schema.org/Float", Double.class, Float.class) {
-    @Override
-    public <T> T parse(Object value, Class<T> clazz) {
-      Object result = null;
-      if (value.getClass() == clazz) {
-        result = value;
-      } else if (clazz == Double.class) {
-        if (value instanceof Float) {
-          result = ((Float) value).doubleValue();
-        } else if (value instanceof String) {
-          result = Double.valueOf((String) value);
-        }
-      } else if (clazz == Float.class) {
-        if (value instanceof Double) {
-          result = ((Double) value).floatValue();
-        } else if (value instanceof String) {
-          result = Float.valueOf((String) value);
-        }
-      }
-      return clazz.cast(result);
-    }
-  };
-
-  private final String uri;
-  private final Class<?>[] classes;
-
-  private CloudKnownType(String uri, Class<?>... classes) {
-    this.uri = uri;
-    this.classes = classes;
-  }
-
-  public String getUri() {
-    return uri;
-  }
-
-  public abstract <T> T parse(Object value, Class<T> clazz);
-
-  public Class<?> defaultClass() {
-    return classes[0];
-  }
-
-  private static final Map<String, CloudKnownType> typesByUri =
-      Collections.unmodifiableMap(buildTypesByUri());
-
-  private static Map<String, CloudKnownType> buildTypesByUri() {
-    Map<String, CloudKnownType> result = new HashMap<>();
-    for (CloudKnownType ty : CloudKnownType.values()) {
-      result.put(ty.getUri(), ty);
-    }
-    return result;
-  }
-
-  @Nullable
-  public static CloudKnownType forUri(@Nullable String uri) {
-    if (uri == null) {
-      return null;
-    }
-    return typesByUri.get(uri);
-  }
-
-  private static final Map<Class<?>, CloudKnownType> typesByClass =
-  Collections.unmodifiableMap(buildTypesByClass());
-
-  private static Map<Class<?>, CloudKnownType> buildTypesByClass() {
-    Map<Class<?>, CloudKnownType> result = new HashMap<>();
-    for (CloudKnownType ty : CloudKnownType.values()) {
-      for (Class<?> clazz : ty.classes) {
-        result.put(clazz, ty);
-      }
-    }
-    return result;
-  }
-
-  @Nullable
-  public static CloudKnownType forClass(Class<?> clazz) {
-    return typesByClass.get(clazz);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
deleted file mode 100644
index 8c704bf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CloudObject.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.api.client.util.Preconditions.checkNotNull;
-
-import com.google.api.client.json.GenericJson;
-import com.google.api.client.util.Key;
-
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * A representation of an arbitrary Java object to be instantiated by Dataflow
- * workers.
- *
- * <p>Typically, an object to be written by the SDK to the Dataflow service will
- * implement a method (typically called {@code asCloudObject()}) that returns a
- * {@code CloudObject} to represent the object in the protocol.  Once the
- * {@code CloudObject} is constructed, the method should explicitly add
- * additional properties to be presented during deserialization, representing
- * child objects by building additional {@code CloudObject}s.
- */
-public final class CloudObject extends GenericJson {
-  /**
-   * Constructs a {@code CloudObject} by copying the supplied serialized object
-   * spec, which must represent an SDK object serialized for transport via the
-   * Dataflow API.
-   *
-   * <p>The most common use of this method is during deserialization on the worker,
-   * where it's used as a binding type during instance construction.
-   *
-   * @param spec supplies the serialized form of the object as a nested map
-   * @throws RuntimeException if the supplied map does not represent an SDK object
-   */
-  public static CloudObject fromSpec(Map<String, Object> spec) {
-    CloudObject result = new CloudObject();
-    result.putAll(spec);
-    if (result.className == null) {
-      throw new RuntimeException("Unable to create an SDK object from " + spec
-          + ": Object class not specified (missing \""
-          + PropertyNames.OBJECT_TYPE_NAME + "\" field)");
-    }
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} to be used for serializing an instance of
-   * the supplied class for transport via the Dataflow API.  The instance
-   * parameters to be serialized must be supplied explicitly after the
-   * {@code CloudObject} is created, by using {@link CloudObject#put}.
-   *
-   * @param cls the class to use when deserializing the object on the worker
-   */
-  public static CloudObject forClass(Class<?> cls) {
-    CloudObject result = new CloudObject();
-    result.className = checkNotNull(cls).getName();
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} to be used for serializing data to be
-   * deserialized using the supplied class name the supplied class name for
-   * transport via the Dataflow API.  The instance parameters to be serialized
-   * must be supplied explicitly after the {@code CloudObject} is created, by
-   * using {@link CloudObject#put}.
-   *
-   * @param className the class to use when deserializing the object on the worker
-   */
-  public static CloudObject forClassName(String className) {
-    CloudObject result = new CloudObject();
-    result.className = checkNotNull(className);
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} representing the given value.
-   * @param value the scalar value to represent.
-   */
-  public static CloudObject forString(String value) {
-    CloudObject result = forClassName(CloudKnownType.TEXT.getUri());
-    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} representing the given value.
-   * @param value the scalar value to represent.
-   */
-  public static CloudObject forBoolean(Boolean value) {
-    CloudObject result = forClassName(CloudKnownType.BOOLEAN.getUri());
-    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} representing the given value.
-   * @param value the scalar value to represent.
-   */
-  public static CloudObject forInteger(Long value) {
-    CloudObject result = forClassName(CloudKnownType.INTEGER.getUri());
-    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} representing the given value.
-   * @param value the scalar value to represent.
-   */
-  public static CloudObject forInteger(Integer value) {
-    CloudObject result = forClassName(CloudKnownType.INTEGER.getUri());
-    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} representing the given value.
-   * @param value the scalar value to represent.
-   */
-  public static CloudObject forFloat(Float value) {
-    CloudObject result = forClassName(CloudKnownType.FLOAT.getUri());
-    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} representing the given value.
-   * @param value the scalar value to represent.
-   */
-  public static CloudObject forFloat(Double value) {
-    CloudObject result = forClassName(CloudKnownType.FLOAT.getUri());
-    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
-    return result;
-  }
-
-  /**
-   * Constructs a {@code CloudObject} representing the given value of a
-   * well-known cloud object type.
-   * @param value the scalar value to represent.
-   * @throws RuntimeException if the value does not have a
-   * {@link CloudKnownType} mapping
-   */
-  public static CloudObject forKnownType(Object value) {
-    @Nullable CloudKnownType ty = CloudKnownType.forClass(value.getClass());
-    if (ty == null) {
-      throw new RuntimeException("Unable to represent value via the Dataflow API: " + value);
-    }
-    CloudObject result = forClassName(ty.getUri());
-    result.put(PropertyNames.SCALAR_FIELD_NAME, value);
-    return result;
-  }
-
-  @Key(PropertyNames.OBJECT_TYPE_NAME)
-  private String className;
-
-  private CloudObject() {}
-
-  /**
-   * Gets the name of the Java class that this CloudObject represents.
-   */
-  public String getClassName() {
-    return className;
-  }
-
-  @Override
-  public CloudObject clone() {
-    return (CloudObject) super.clone();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
deleted file mode 100644
index ddab933..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CoderUtils.java
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addList;
-
-import com.google.api.client.util.Base64;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoderBase;
-import com.google.cloud.dataflow.sdk.coders.MapCoder;
-import com.google.cloud.dataflow.sdk.coders.MapCoderBase;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.Throwables;
-
-import com.fasterxml.jackson.annotation.JsonTypeInfo;
-import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
-import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
-import com.fasterxml.jackson.databind.DatabindContext;
-import com.fasterxml.jackson.databind.JavaType;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver;
-import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase;
-import com.fasterxml.jackson.databind.module.SimpleModule;
-import com.fasterxml.jackson.databind.type.TypeFactory;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.lang.ref.SoftReference;
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.TypeVariable;
-
-/**
- * Utilities for working with Coders.
- */
-public final class CoderUtils {
-  private CoderUtils() {}  // Non-instantiable
-
-  /**
-   * Coder class-name alias for a key-value type.
-   */
-  public static final String KIND_PAIR = "kind:pair";
-
-  /**
-   * Coder class-name alias for a stream type.
-   */
-  public static final String KIND_STREAM = "kind:stream";
-
-  private static ThreadLocal<SoftReference<ExposedByteArrayOutputStream>> threadLocalOutputStream
-      = new ThreadLocal<>();
-
-  /**
-   * If true, a call to {@code encodeToByteArray} is already on the call stack.
-   */
-  private static ThreadLocal<Boolean> threadLocalOutputStreamInUse = new ThreadLocal<Boolean>() {
-    @Override
-    protected Boolean initialValue() {
-      return false;
-    }
-  };
-
-  /**
-   * Encodes the given value using the specified Coder, and returns
-   * the encoded bytes.
-   *
-   * <p>This function is not reentrant; it should not be called from methods of the provided
-   * {@link Coder}.
-   */
-  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value) throws CoderException {
-    return encodeToByteArray(coder, value, Coder.Context.OUTER);
-  }
-
-  public static <T> byte[] encodeToByteArray(Coder<T> coder, T value, Coder.Context context)
-      throws CoderException {
-    if (threadLocalOutputStreamInUse.get()) {
-      // encodeToByteArray() is called recursively and the thread local stream is in use,
-      // allocating a new one.
-      ByteArrayOutputStream stream = new ExposedByteArrayOutputStream();
-      encodeToSafeStream(coder, value, stream, context);
-      return stream.toByteArray();
-    } else {
-      threadLocalOutputStreamInUse.set(true);
-      try {
-        ByteArrayOutputStream stream = getThreadLocalOutputStream();
-        encodeToSafeStream(coder, value, stream, context);
-        return stream.toByteArray();
-      } finally {
-        threadLocalOutputStreamInUse.set(false);
-      }
-    }
-  }
-
-  /**
-   * Encodes {@code value} to the given {@code stream}, which should be a stream that never throws
-   * {@code IOException}, such as {@code ByteArrayOutputStream} or
-   * {@link ExposedByteArrayOutputStream}.
-   */
-  private static <T> void encodeToSafeStream(
-      Coder<T> coder, T value, OutputStream stream, Coder.Context context) throws CoderException {
-    try {
-      coder.encode(value, new UnownedOutputStream(stream), context);
-    } catch (IOException exn) {
-      Throwables.propagateIfPossible(exn, CoderException.class);
-      throw new IllegalArgumentException(
-          "Forbidden IOException when writing to OutputStream", exn);
-    }
-  }
-
-  /**
-   * Decodes the given bytes using the specified Coder, and returns
-   * the resulting decoded value.
-   */
-  public static <T> T decodeFromByteArray(Coder<T> coder, byte[] encodedValue)
-      throws CoderException {
-    return decodeFromByteArray(coder, encodedValue, Coder.Context.OUTER);
-  }
-
-  public static <T> T decodeFromByteArray(
-      Coder<T> coder, byte[] encodedValue, Coder.Context context) throws CoderException {
-    try (ExposedByteArrayInputStream stream = new ExposedByteArrayInputStream(encodedValue)) {
-      T result = decodeFromSafeStream(coder, stream, context);
-      if (stream.available() != 0) {
-        throw new CoderException(
-            stream.available() + " unexpected extra bytes after decoding " + result);
-      }
-      return result;
-    }
-  }
-
-  /**
-   * Decodes a value from the given {@code stream}, which should be a stream that never throws
-   * {@code IOException}, such as {@code ByteArrayInputStream} or
-   * {@link ExposedByteArrayInputStream}.
-   */
-  private static <T> T decodeFromSafeStream(
-      Coder<T> coder, InputStream stream, Coder.Context context) throws CoderException {
-    try {
-      return coder.decode(new UnownedInputStream(stream), context);
-    } catch (IOException exn) {
-      Throwables.propagateIfPossible(exn, CoderException.class);
-      throw new IllegalArgumentException(
-          "Forbidden IOException when reading from InputStream", exn);
-    }
-  }
-
-  private static ByteArrayOutputStream getThreadLocalOutputStream() {
-    SoftReference<ExposedByteArrayOutputStream> refStream = threadLocalOutputStream.get();
-    ExposedByteArrayOutputStream stream = refStream == null ? null : refStream.get();
-    if (stream == null) {
-      stream = new ExposedByteArrayOutputStream();
-      threadLocalOutputStream.set(new SoftReference<>(stream));
-    }
-    stream.reset();
-    return stream;
-  }
-
-  /**
-   * Clones the given value by encoding and then decoding it with the specified Coder.
-   *
-   * <p>This function is not reentrant; it should not be called from methods of the provided
-   * {@link Coder}.
-   */
-  public static <T> T clone(Coder<T> coder, T value) throws CoderException {
-    return decodeFromByteArray(coder, encodeToByteArray(coder, value, Coder.Context.OUTER));
-  }
-
-  /**
-   * Encodes the given value using the specified Coder, and returns the Base64 encoding of the
-   * encoded bytes.
-   *
-   * @throws CoderException if there are errors during encoding.
-   */
-  public static <T> String encodeToBase64(Coder<T> coder, T value)
-      throws CoderException {
-    byte[] rawValue = encodeToByteArray(coder, value);
-    return Base64.encodeBase64URLSafeString(rawValue);
-  }
-
-  /**
-   * Parses a value from a base64-encoded String using the given coder.
-   */
-  public static <T> T decodeFromBase64(Coder<T> coder, String encodedValue) throws CoderException {
-    return decodeFromSafeStream(
-        coder, new ByteArrayInputStream(Base64.decodeBase64(encodedValue)), Coder.Context.OUTER);
-  }
-
-  /**
-   * If {@code coderType} is a subclass of {@code Coder<T>} for a specific
-   * type {@code T}, returns {@code T.class}.
-   */
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public static TypeDescriptor getCodedType(TypeDescriptor coderDescriptor) {
-    ParameterizedType coderType =
-        (ParameterizedType) coderDescriptor.getSupertype(Coder.class).getType();
-    TypeDescriptor codedType = TypeDescriptor.of(coderType.getActualTypeArguments()[0]);
-    return codedType;
-  }
-
-  public static CloudObject makeCloudEncoding(
-      String type,
-      CloudObject... componentSpecs) {
-    CloudObject encoding = CloudObject.forClassName(type);
-    if (componentSpecs.length > 0) {
-      addList(encoding, PropertyNames.COMPONENT_ENCODINGS, componentSpecs);
-    }
-    return encoding;
-  }
-
-  /**
-   * A {@link com.fasterxml.jackson.databind.Module} that adds the type
-   * resolver needed for Coder definitions created by the Dataflow service.
-   */
-  static final class Jackson2Module extends SimpleModule {
-    /**
-     * The Coder custom type resolver.
-     *
-     * <p>This resolver resolves coders.  If the Coder ID is a particular
-     * well-known identifier supplied by the Dataflow service, it's replaced
-     * with the corresponding class.  All other Coder instances are resolved
-     * by class name, using the package com.google.cloud.dataflow.sdk.coders
-     * if there are no "."s in the ID.
-     */
-    private static final class Resolver extends TypeIdResolverBase {
-      @SuppressWarnings("unused") // Used via @JsonTypeIdResolver annotation on Mixin
-      public Resolver() {
-        super(TypeFactory.defaultInstance().constructType(Coder.class),
-            TypeFactory.defaultInstance());
-      }
-
-      @Deprecated
-      @Override
-      public JavaType typeFromId(String id) {
-        return typeFromId(null, id);
-      }
-
-      @Override
-      public JavaType typeFromId(DatabindContext context, String id) {
-        Class<?> clazz = getClassForId(id);
-        if (clazz == KvCoder.class) {
-          clazz = KvCoderBase.class;
-        }
-        if (clazz == MapCoder.class) {
-          clazz = MapCoderBase.class;
-        }
-        @SuppressWarnings("rawtypes")
-        TypeVariable[] tvs = clazz.getTypeParameters();
-        JavaType[] types = new JavaType[tvs.length];
-        for (int lupe = 0; lupe < tvs.length; lupe++) {
-          types[lupe] = TypeFactory.unknownType();
-        }
-        return _typeFactory.constructSimpleType(clazz, types);
-      }
-
-      private Class<?> getClassForId(String id) {
-        try {
-          if (id.contains(".")) {
-            return Class.forName(id);
-          }
-
-          if (id.equals(KIND_STREAM)) {
-            return IterableCoder.class;
-          } else if (id.equals(KIND_PAIR)) {
-            return KvCoder.class;
-          }
-
-          // Otherwise, see if the ID is the name of a class in
-          // com.google.cloud.dataflow.sdk.coders.  We do this via creating
-          // the class object so that class loaders have a chance to get
-          // involved -- and since we need the class object anyway.
-          return Class.forName(Coder.class.getPackage().getName() + "." + id);
-        } catch (ClassNotFoundException e) {
-          throw new RuntimeException("Unable to convert coder ID " + id + " to class", e);
-        }
-      }
-
-      @Override
-      public String idFromValueAndType(Object o, Class<?> clazz) {
-        return clazz.getName();
-      }
-
-      @Override
-      public String idFromValue(Object o) {
-        return o.getClass().getName();
-      }
-
-      @Override
-      public JsonTypeInfo.Id getMechanism() {
-        return JsonTypeInfo.Id.CUSTOM;
-      }
-    }
-
-    /**
-     * The mixin class defining how Coders are handled by the deserialization
-     * {@link ObjectMapper}.
-     *
-     * <p>This is done via a mixin so that this resolver is <i>only</i> used
-     * during deserialization requested by the Dataflow SDK.
-     */
-    @JsonTypeIdResolver(Resolver.class)
-    @JsonTypeInfo(use = Id.CUSTOM, include = As.PROPERTY, property = PropertyNames.OBJECT_TYPE_NAME)
-    private static final class Mixin {}
-
-    public Jackson2Module() {
-      super("DataflowCoders");
-      setMixInAnnotation(Coder.class, Mixin.class);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
deleted file mode 100644
index 6f2b89b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineContextFactory.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-/**
- * Factory that produces {@code Combine.Context} based on different inputs.
- */
-public class CombineContextFactory {
-
-  private static final Context NULL_CONTEXT = new Context() {
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      throw new IllegalArgumentException("cannot call getPipelineOptions() in a null context");
-    }
-
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      throw new IllegalArgumentException("cannot call sideInput() in a null context");
-    }
-  };
-
-  /**
-   * Returns a fake {@code Combine.Context} for tests.
-   */
-  public static Context nullContext() {
-    return NULL_CONTEXT;
-  }
-
-  /**
-   * Returns a {@code Combine.Context} that wraps a {@code DoFn.ProcessContext}.
-   */
-  public static Context createFromProcessContext(final DoFn<?, ?>.ProcessContext c) {
-    return new Context() {
-      @Override
-      public PipelineOptions getPipelineOptions() {
-        return c.getPipelineOptions();
-      }
-
-      @Override
-      public <T> T sideInput(PCollectionView<T> view) {
-        return c.sideInput(view);
-      }
-    };
-  }
-
-  /**
-   * Returns a {@code Combine.Context} that wraps a {@link StateContext}.
-   */
-  public static Context createFromStateContext(final StateContext<?> c) {
-    return new Context() {
-      @Override
-      public PipelineOptions getPipelineOptions() {
-        return c.getPipelineOptions();
-      }
-
-      @Override
-      public <T> T sideInput(PCollectionView<T> view) {
-        return c.sideInput(view);
-      }
-    };
-  }
-
-  /**
-   * Returns a {@code Combine.Context} from {@code PipelineOptions}, {@code SideInputReader},
-   * and the main input window.
-   */
-  public static Context createFromComponents(final PipelineOptions options,
-      final SideInputReader sideInputReader, final BoundedWindow mainInputWindow) {
-    return new Context() {
-      @Override
-      public PipelineOptions getPipelineOptions() {
-        return options;
-      }
-
-      @Override
-      public <T> T sideInput(PCollectionView<T> view) {
-        if (!sideInputReader.contains(view)) {
-          throw new IllegalArgumentException("calling sideInput() with unknown view");
-        }
-
-        BoundedWindow sideInputWindow =
-            view.getWindowingStrategyInternal().getWindowFn().getSideInputWindow(mainInputWindow);
-        return sideInputReader.get(view, sideInputWindow);
-      }
-    };
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java
deleted file mode 100644
index d974480..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/CombineFnUtil.java
+++ /dev/null
@@ -1,154 +0,0 @@
-
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.GlobalCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.CombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.util.state.StateContext;
-
-import java.io.IOException;
-import java.io.NotSerializableException;
-import java.io.ObjectOutputStream;
-
-/**
- * Static utility methods that create combine function instances.
- */
-public class CombineFnUtil {
-  /**
-   * Returns the partial application of the {@link KeyedCombineFnWithContext} to a specific
-   * context to produce a {@link KeyedCombineFn}.
-   *
-   * <p>The returned {@link KeyedCombineFn} cannot be serialized.
-   */
-  public static <K, InputT, AccumT, OutputT> KeyedCombineFn<K, InputT, AccumT, OutputT>
-  bindContext(
-      KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn,
-      StateContext<?> stateContext) {
-    Context context = CombineContextFactory.createFromStateContext(stateContext);
-    return new NonSerializableBoundedKeyedCombineFn<>(combineFn, context);
-  }
-
-  /**
-   * Return a {@link CombineFnWithContext} from the given {@link GlobalCombineFn}.
-   */
-  public static <InputT, AccumT, OutputT>
-      CombineFnWithContext<InputT, AccumT, OutputT> toFnWithContext(
-          GlobalCombineFn<InputT, AccumT, OutputT> globalCombineFn) {
-    if (globalCombineFn instanceof CombineFnWithContext) {
-      @SuppressWarnings("unchecked")
-      CombineFnWithContext<InputT, AccumT, OutputT> combineFnWithContext =
-          (CombineFnWithContext<InputT, AccumT, OutputT>) globalCombineFn;
-      return combineFnWithContext;
-    } else {
-      @SuppressWarnings("unchecked")
-      final CombineFn<InputT, AccumT, OutputT> combineFn =
-          (CombineFn<InputT, AccumT, OutputT>) globalCombineFn;
-      return new CombineFnWithContext<InputT, AccumT, OutputT>() {
-        @Override
-        public AccumT createAccumulator(Context c) {
-          return combineFn.createAccumulator();
-        }
-        @Override
-        public AccumT addInput(AccumT accumulator, InputT input, Context c) {
-          return combineFn.addInput(accumulator, input);
-        }
-        @Override
-        public AccumT mergeAccumulators(Iterable<AccumT> accumulators, Context c) {
-          return combineFn.mergeAccumulators(accumulators);
-        }
-        @Override
-        public OutputT extractOutput(AccumT accumulator, Context c) {
-          return combineFn.extractOutput(accumulator);
-        }
-        @Override
-        public AccumT compact(AccumT accumulator, Context c) {
-          return combineFn.compact(accumulator);
-        }
-        @Override
-        public OutputT defaultValue() {
-          return combineFn.defaultValue();
-        }
-        @Override
-        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-            throws CannotProvideCoderException {
-          return combineFn.getAccumulatorCoder(registry, inputCoder);
-        }
-        @Override
-        public Coder<OutputT> getDefaultOutputCoder(
-            CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return combineFn.getDefaultOutputCoder(registry, inputCoder);
-        }
-      };
-    }
-  }
-
-  private static class NonSerializableBoundedKeyedCombineFn<K, InputT, AccumT, OutputT>
-      extends KeyedCombineFn<K, InputT, AccumT, OutputT> {
-    private final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn;
-    private final Context context;
-
-    private NonSerializableBoundedKeyedCombineFn(
-        KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> combineFn,
-        Context context) {
-      this.combineFn = combineFn;
-      this.context = context;
-    }
-    @Override
-    public AccumT createAccumulator(K key) {
-      return combineFn.createAccumulator(key, context);
-    }
-    @Override
-    public AccumT addInput(K key, AccumT accumulator, InputT value) {
-      return combineFn.addInput(key, accumulator, value, context);
-    }
-    @Override
-    public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(key, accumulators, context);
-    }
-    @Override
-    public OutputT extractOutput(K key, AccumT accumulator) {
-      return combineFn.extractOutput(key, accumulator, context);
-    }
-    @Override
-    public AccumT compact(K key, AccumT accumulator) {
-      return combineFn.compact(key, accumulator, context);
-    }
-    @Override
-    public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-        Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return combineFn.getAccumulatorCoder(registry, keyCoder, inputCoder);
-    }
-    @Override
-    public Coder<OutputT> getDefaultOutputCoder(CoderRegistry registry, Coder<K> keyCoder,
-        Coder<InputT> inputCoder) throws CannotProvideCoderException {
-      return combineFn.getDefaultOutputCoder(registry, keyCoder, inputCoder);
-    }
-
-    private void writeObject(@SuppressWarnings("unused") ObjectOutputStream out)
-        throws IOException {
-      throw new NotSerializableException(
-          "Cannot serialize the CombineFn resulting from CombineFnUtil.bindContext.");
-    }
-  }
-}

[53/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
deleted file mode 100644
index ce5e08e..0000000
--- a/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
-import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
-import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
-import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
-import com.google.cloud.dataflow.examples.common.PubsubFileInjector;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.PipelineResult;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.Default;
-import com.google.cloud.dataflow.sdk.options.Description;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.TimeUnit;
-
-/**
- * This example illustrates the basic concepts behind triggering. It shows how to use different
- * trigger definitions to produce partial (speculative) results before all the data is processed and
- * to control when updated results are produced for late data. The example performs a streaming
- * analysis of the data coming in from PubSub and writes the results to BigQuery. It divides the
- * data into {@link Window windows} to be processed, and demonstrates using various kinds of {@link
- * Trigger triggers} to control when the results for each window are emitted.
- *
- * <p> This example uses a portion of real traffic data from San Diego freeways. It contains
- * readings from sensor stations set up along each freeway. Each sensor reading includes a
- * calculation of the 'total flow' across all lanes in that freeway direction.
- *
- * <p> Concepts:
- * <pre>
- *   1. The default triggering behavior
- *   2. Late data with the default trigger
- *   3. How to get speculative estimates
- *   4. Combining late data and speculative estimates
- * </pre>
- *
- * <p> Before running this example, it will be useful to familiarize yourself with Dataflow triggers
- * and understand the concept of 'late data',
- * See:  <a href="https://cloud.google.com/dataflow/model/triggers">
- * https://cloud.google.com/dataflow/model/triggers </a> and
- * <a href="https://cloud.google.com/dataflow/model/windowing#Advanced">
- * https://cloud.google.com/dataflow/model/windowing#Advanced </a>
- *
- * <p> The example pipeline reads data from a Pub/Sub topic. By default, running the example will
- * also run an auxiliary pipeline to inject data from the default {@code --input} file to the
- * {@code --pubsubTopic}. The auxiliary pipeline puts a timestamp on the injected data so that the
- * example pipeline can operate on <i>event time</i> (rather than arrival time). The auxiliary
- * pipeline also randomly simulates late data, by setting the timestamps of some of the data
- * elements to be in the past. You may override the default {@code --input} with the file of your
- * choosing or set {@code --input=""} which will disable the automatic Pub/Sub injection, and allow
- * you to use a separate tool to publish to the given topic.
- *
- * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
- * from the example common package (there are no defaults for a general Dataflow pipeline).
- * You can override them by using the {@code --pubsubTopic}, {@code --bigQueryDataset}, and
- * {@code --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
- * the example will try to create them.
- *
- * <p> The pipeline outputs its results to a BigQuery table.
- * Here are some queries you can use to see interesting results:
- * Replace {@code <enter_table_name>} in the query below with the name of the BigQuery table.
- * Replace {@code <enter_window_interval>} in the query below with the window interval.
- *
- * <p> To see the results of the default trigger,
- * Note: When you start up your pipeline, you'll initially see results from 'late' data. Wait after
- * the window duration, until the first pane of non-late data has been emitted, to see more
- * interesting results.
- * {@code SELECT * FROM enter_table_name WHERE trigger_type = "default" ORDER BY window DESC}
- *
- * <p> To see the late data i.e. dropped by the default trigger,
- * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "withAllowedLateness" and
- * (timing = "LATE" or timing = "ON_TIME") and freeway = "5" ORDER BY window DESC, processing_time}
- *
- * <p>To see the the difference between accumulation mode and discarding mode,
- * {@code SELECT * FROM <enter_table_name> WHERE (timing = "LATE" or timing = "ON_TIME") AND
- * (trigger_type = "withAllowedLateness" or trigger_type = "sequential") and freeway = "5" ORDER BY
- * window DESC, processing_time}
- *
- * <p> To see speculative results every minute,
- * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "speculative" and freeway = "5"
- * ORDER BY window DESC, processing_time}
- *
- * <p> To see speculative results every five minutes after the end of the window
- * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "sequential" and timing != "EARLY"
- * and freeway = "5" ORDER BY window DESC, processing_time}
- *
- * <p> To see the first and the last pane for a freeway in a window for all the trigger types,
- * {@code SELECT * FROM <enter_table_name> WHERE (isFirst = true or isLast = true) ORDER BY window}
- *
- * <p> To reduce the number of results for each query we can add additional where clauses.
- * For examples, To see the results of the default trigger,
- * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "default" AND freeway = "5" AND
- * window = "<enter_window_interval>"}
- *
- * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
- * and then exits.
- */
-
-public class TriggerExample {
-  //Numeric value of fixed window duration, in minutes
-  public static final int WINDOW_DURATION = 30;
-  // Constants used in triggers.
-  // Speeding up ONE_MINUTE or FIVE_MINUTES helps you get an early approximation of results.
-  // ONE_MINUTE is used only with processing time before the end of the window
-  public static final Duration ONE_MINUTE = Duration.standardMinutes(1);
-  // FIVE_MINUTES is used only with processing time after the end of the window
-  public static final Duration FIVE_MINUTES = Duration.standardMinutes(5);
-  // ONE_DAY is used to specify the amount of lateness allowed for the data elements.
-  public static final Duration ONE_DAY = Duration.standardDays(1);
-
-  /**
-   * This transform demonstrates using triggers to control when data is produced for each window
-   * Consider an example to understand the results generated by each type of trigger.
-   * The example uses "freeway" as the key. Event time is the timestamp associated with the data
-   * element and processing time is the time when the data element gets processed in the pipeline.
-   * For freeway 5, suppose there are 10 elements in the [10:00:00, 10:30:00) window.
-   * Key (freeway) | Value (total_flow) | event time | processing time
-   * 5             | 50                 | 10:00:03   | 10:00:47
-   * 5             | 30                 | 10:01:00   | 10:01:03
-   * 5             | 30                 | 10:02:00   | 11:07:00
-   * 5             | 20                 | 10:04:10   | 10:05:15
-   * 5             | 60                 | 10:05:00   | 11:03:00
-   * 5             | 20                 | 10:05:01   | 11.07:30
-   * 5             | 60                 | 10:15:00   | 10:27:15
-   * 5             | 40                 | 10:26:40   | 10:26:43
-   * 5             | 60                 | 10:27:20   | 10:27:25
-   * 5             | 60                 | 10:29:00   | 11:11:00
-   *
-   * <p> Dataflow tracks a watermark which records up to what point in event time the data is
-   * complete. For the purposes of the example, we'll assume the watermark is approximately 15m
-   * behind the current processing time. In practice, the actual value would vary over time based
-   * on the systems knowledge of the current PubSub delay and contents of the backlog (data
-   * that has not yet been processed).
-   *
-   * <p> If the watermark is 15m behind, then the window [10:00:00, 10:30:00) (in event time) would
-   * close at 10:44:59, when the watermark passes 10:30:00.
-   */
-  static class CalculateTotalFlow
-  extends PTransform <PCollection<KV<String, Integer>>, PCollectionList<TableRow>> {
-    private int windowDuration;
-
-    CalculateTotalFlow(int windowDuration) {
-      this.windowDuration = windowDuration;
-    }
-
-    @Override
-    public PCollectionList<TableRow> apply(PCollection<KV<String, Integer>> flowInfo) {
-
-      // Concept #1: The default triggering behavior
-      // By default Dataflow uses a trigger which fires when the watermark has passed the end of the
-      // window. This would be written {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
-
-      // The system also defaults to dropping late data -- data which arrives after the watermark
-      // has passed the event timestamp of the arriving element. This means that the default trigger
-      // will only fire once.
-
-      // Each pane produced by the default trigger with no allowed lateness will be the first and
-      // last pane in the window, and will be ON_TIME.
-
-      // The results for the example above with the default trigger and zero allowed lateness
-      // would be:
-      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
-      // 5             | 260                | 6                 | true    | true   | ON_TIME
-
-      // At 11:03:00 (processing time) the system watermark may have advanced to 10:54:00. As a
-      // result, when the data record with event time 10:05:00 arrives at 11:03:00, it is considered
-      // late, and dropped.
-
-      PCollection<TableRow> defaultTriggerResults = flowInfo
-          .apply("Default", Window
-              // The default window duration values work well if you're running the default input
-              // file. You may want to adjust the window duration otherwise.
-              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
-              // The default trigger first emits output when the system's watermark passes the end
-              // of the window.
-              .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
-              // Late data is dropped
-              .withAllowedLateness(Duration.ZERO)
-              // Discard elements after emitting each pane.
-              // With no allowed lateness and the specified trigger there will only be a single
-              // pane, so this doesn't have a noticeable effect. See concept 2 for more details.
-              .discardingFiredPanes())
-          .apply(new TotalFlow("default"));
-
-      // Concept #2: Late data with the default trigger
-      // This uses the same trigger as concept #1, but allows data that is up to ONE_DAY late. This
-      // leads to each window staying open for ONE_DAY after the watermark has passed the end of the
-      // window. Any late data will result in an additional pane being fired for that same window.
-
-      // The first pane produced will be ON_TIME and the remaining panes will be LATE.
-      // To definitely get the last pane when the window closes, use
-      // .withAllowedLateness(ONE_DAY, ClosingBehavior.FIRE_ALWAYS).
-
-      // The results for the example above with the default trigger and ONE_DAY allowed lateness
-      // would be:
-      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
-      // 5             | 260                | 6                 | true    | false  | ON_TIME
-      // 5             | 60                 | 1                 | false   | false  | LATE
-      // 5             | 30                 | 1                 | false   | false  | LATE
-      // 5             | 20                 | 1                 | false   | false  | LATE
-      // 5             | 60                 | 1                 | false   | false  | LATE
-      PCollection<TableRow> withAllowedLatenessResults = flowInfo
-          .apply("WithLateData", Window
-              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
-              // Late data is emitted as it arrives
-              .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
-              // Once the output is produced, the pane is dropped and we start preparing the next
-              // pane for the window
-              .discardingFiredPanes()
-              // Late data is handled up to one day
-              .withAllowedLateness(ONE_DAY))
-          .apply(new TotalFlow("withAllowedLateness"));
-
-      // Concept #3: How to get speculative estimates
-      // We can specify a trigger that fires independent of the watermark, for instance after
-      // ONE_MINUTE of processing time. This allows us to produce speculative estimates before
-      // all the data is available. Since we don't have any triggers that depend on the watermark
-      // we don't get an ON_TIME firing. Instead, all panes are either EARLY or LATE.
-
-      // We also use accumulatingFiredPanes to build up the results across each pane firing.
-
-      // The results for the example above for this trigger would be:
-      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
-      // 5             | 80                 | 2                 | true    | false  | EARLY
-      // 5             | 100                | 3                 | false   | false  | EARLY
-      // 5             | 260                | 6                 | false   | false  | EARLY
-      // 5             | 320                | 7                 | false   | false  | LATE
-      // 5             | 370                | 9                 | false   | false  | LATE
-      // 5             | 430                | 10                | false   | false  | LATE
-      PCollection<TableRow> speculativeResults = flowInfo
-          .apply("Speculative" , Window
-              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
-              // Trigger fires every minute.
-              .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
-                  // Speculative every ONE_MINUTE
-                  .plusDelayOf(ONE_MINUTE)))
-              // After emitting each pane, it will continue accumulating the elements so that each
-              // approximation includes all of the previous data in addition to the newly arrived
-              // data.
-              .accumulatingFiredPanes()
-              .withAllowedLateness(ONE_DAY))
-          .apply(new TotalFlow("speculative"));
-
-      // Concept #4: Combining late data and speculative estimates
-      // We can put the previous concepts together to get EARLY estimates, an ON_TIME result,
-      // and LATE updates based on late data.
-
-      // Each time a triggering condition is satisfied it advances to the next trigger.
-      // If there are new elements this trigger emits a window under following condition:
-      // > Early approximations every minute till the end of the window.
-      // > An on-time firing when the watermark has passed the end of the window
-      // > Every five minutes of late data.
-
-      // Every pane produced will either be EARLY, ON_TIME or LATE.
-
-      // The results for the example above for this trigger would be:
-      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
-      // 5             | 80                 | 2                 | true    | false  | EARLY
-      // 5             | 100                | 3                 | false   | false  | EARLY
-      // 5             | 260                | 6                 | false   | false  | EARLY
-      // [First pane fired after the end of the window]
-      // 5             | 320                | 7                 | false   | false  | ON_TIME
-      // 5             | 430                | 10                | false   | false  | LATE
-
-      // For more possibilities of how to build advanced triggers, see {@link Trigger}.
-      PCollection<TableRow> sequentialResults = flowInfo
-          .apply("Sequential", Window
-              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
-              .triggering(AfterEach.inOrder(
-                  Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
-                      // Speculative every ONE_MINUTE
-                      .plusDelayOf(ONE_MINUTE)).orFinally(AfterWatermark.pastEndOfWindow()),
-                  Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
-                      // Late data every FIVE_MINUTES
-                      .plusDelayOf(FIVE_MINUTES))))
-              .accumulatingFiredPanes()
-              // For up to ONE_DAY
-              .withAllowedLateness(ONE_DAY))
-          .apply(new TotalFlow("sequential"));
-
-      // Adds the results generated by each trigger type to a PCollectionList.
-      PCollectionList<TableRow> resultsList = PCollectionList.of(defaultTriggerResults)
-          .and(withAllowedLatenessResults)
-          .and(speculativeResults)
-          .and(sequentialResults);
-
-      return resultsList;
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-  // The remaining parts of the pipeline are needed to produce the output for each
-  // concept above. Not directly relevant to understanding the trigger examples.
-
-  /**
-   * Calculate total flow and number of records for each freeway and format the results to TableRow
-   * objects, to save to BigQuery.
-   */
-  static class TotalFlow extends
-  PTransform <PCollection<KV<String, Integer>>, PCollection<TableRow>> {
-    private String triggerType;
-
-    public TotalFlow(String triggerType) {
-      this.triggerType = triggerType;
-    }
-
-    @Override
-    public PCollection<TableRow> apply(PCollection<KV<String, Integer>> flowInfo) {
-      PCollection<KV<String, Iterable<Integer>>> flowPerFreeway = flowInfo
-          .apply(GroupByKey.<String, Integer>create());
-
-      PCollection<KV<String, String>> results = flowPerFreeway.apply(ParDo.of(
-          new DoFn <KV<String, Iterable<Integer>>, KV<String, String>>() {
-
-            @Override
-            public void processElement(ProcessContext c) throws Exception {
-              Iterable<Integer> flows = c.element().getValue();
-              Integer sum = 0;
-              Long numberOfRecords = 0L;
-              for (Integer value : flows) {
-                sum += value;
-                numberOfRecords++;
-              }
-              c.output(KV.of(c.element().getKey(), sum + "," + numberOfRecords));
-            }
-          }));
-      PCollection<TableRow> output = results.apply(ParDo.of(new FormatTotalFlow(triggerType)));
-      return output;
-    }
-  }
-
-  /**
-   * Format the results of the Total flow calculation to a TableRow, to save to BigQuery.
-   * Adds the triggerType, pane information, processing time and the window timestamp.
-   * */
-  static class FormatTotalFlow extends DoFn<KV<String, String>, TableRow>
-  implements  RequiresWindowAccess {
-    private String triggerType;
-
-    public FormatTotalFlow(String triggerType) {
-      this.triggerType = triggerType;
-    }
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      String[] values = c.element().getValue().split(",");
-      TableRow row = new TableRow()
-          .set("trigger_type", triggerType)
-          .set("freeway", c.element().getKey())
-          .set("total_flow", Integer.parseInt(values[0]))
-          .set("number_of_records", Long.parseLong(values[1]))
-          .set("window", c.window().toString())
-          .set("isFirst", c.pane().isFirst())
-          .set("isLast", c.pane().isLast())
-          .set("timing", c.pane().getTiming().toString())
-          .set("event_time", c.timestamp().toString())
-          .set("processing_time", Instant.now().toString());
-      c.output(row);
-    }
-  }
-
-  /**
-   * Extract the freeway and total flow in a reading.
-   * Freeway is used as key since we are calculating the total flow for each freeway.
-   */
-  static class ExtractFlowInfo extends DoFn<String, KV<String, Integer>> {
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      String[] laneInfo = c.element().split(",");
-      if (laneInfo[0].equals("timestamp")) {
-        // Header row
-        return;
-      }
-      if (laneInfo.length < 48) {
-        //Skip the invalid input.
-        return;
-      }
-      String freeway = laneInfo[2];
-      Integer totalFlow = tryIntegerParse(laneInfo[7]);
-      // Ignore the records with total flow 0 to easily understand the working of triggers.
-      // Skip the records with total flow -1 since they are invalid input.
-      if (totalFlow == null || totalFlow <= 0) {
-        return;
-      }
-      c.output(KV.of(freeway,  totalFlow));
-    }
-  }
-
-  /**
-   * Inherits standard configuration options.
-   */
-  public interface TrafficFlowOptions
-      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions, DataflowExampleOptions {
-
-    @Description("Input file to inject to Pub/Sub topic")
-    @Default.String("gs://dataflow-samples/traffic_sensor/"
-        + "Freeways-5Minaa2010-01-01_to_2010-02-15.csv")
-    String getInput();
-    void setInput(String value);
-
-    @Description("Numeric value of window duration for fixed windows, in minutes")
-    @Default.Integer(WINDOW_DURATION)
-    Integer getWindowDuration();
-    void setWindowDuration(Integer value);
-  }
-
-  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
-
-  public static void main(String[] args) throws Exception {
-    TrafficFlowOptions options = PipelineOptionsFactory.fromArgs(args)
-        .withValidation()
-        .as(TrafficFlowOptions.class);
-    options.setStreaming(true);
-
-    // In order to cancel the pipelines automatically,
-    // {@code DataflowPipelineRunner} is forced to be used.
-    options.setRunner(DataflowPipelineRunner.class);
-    options.setBigQuerySchema(getSchema());
-
-    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
-    dataflowUtils.setup();
-
-    Pipeline pipeline = Pipeline.create(options);
-
-    TableReference tableRef = getTableReference(options.getProject(),
-        options.getBigQueryDataset(), options.getBigQueryTable());
-
-    PCollectionList<TableRow> resultList = pipeline.apply(PubsubIO.Read.named("ReadPubsubInput")
-        .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
-        .topic(options.getPubsubTopic()))
-        .apply(ParDo.of(new ExtractFlowInfo()))
-        .apply(new CalculateTotalFlow(options.getWindowDuration()));
-
-    for (int i = 0; i < resultList.size(); i++){
-      resultList.get(i).apply(BigQueryIO.Write.to(tableRef).withSchema(getSchema()));
-    }
-
-    PipelineResult result = pipeline.run();
-    if (!options.getInput().isEmpty()){
-      //Inject the data into the pubsub topic
-      dataflowUtils.runInjectorPipeline(runInjector(options));
-    }
-    // dataflowUtils will try to cancel the pipeline and the injector before the program exits.
-    dataflowUtils.waitToFinish(result);
-  }
-
-  private static Pipeline runInjector(TrafficFlowOptions options){
-    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
-    copiedOptions.setStreaming(false);
-    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
-    copiedOptions.setJobName(options.getJobName() + "-injector");
-    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
-    injectorPipeline
-    .apply(TextIO.Read.named("ReadMyFile").from(options.getInput()))
-    .apply(ParDo.named("InsertRandomDelays").of(new InsertDelays()))
-    .apply(IntraBundleParallelization.of(PubsubFileInjector
-        .withTimestampLabelKey(PUBSUB_TIMESTAMP_LABEL_KEY)
-        .publish(options.getPubsubTopic()))
-        .withMaxParallelism(20));
-
-    return injectorPipeline;
-  }
-
-  /**
-   * Add current time to each record.
-   * Also insert a delay at random to demo the triggers.
-   */
-  public static class InsertDelays extends DoFn<String, String> {
-    private static final double THRESHOLD = 0.001;
-    // MIN_DELAY and MAX_DELAY in minutes.
-    private static final int MIN_DELAY = 1;
-    private static final int MAX_DELAY = 100;
-
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      Instant timestamp = Instant.now();
-      if (Math.random() < THRESHOLD){
-        int range = MAX_DELAY - MIN_DELAY;
-        int delayInMinutes = (int) (Math.random() * range) + MIN_DELAY;
-        long delayInMillis = TimeUnit.MINUTES.toMillis(delayInMinutes);
-        timestamp = new Instant(timestamp.getMillis() - delayInMillis);
-      }
-      c.outputWithTimestamp(c.element(), timestamp);
-    }
-  }
-
-
-  /**Sets the table reference. **/
-  private static TableReference getTableReference(String project, String dataset, String table){
-    TableReference tableRef = new TableReference();
-    tableRef.setProjectId(project);
-    tableRef.setDatasetId(dataset);
-    tableRef.setTableId(table);
-    return tableRef;
-  }
-
-  /** Defines the BigQuery schema used for the output. */
-  private static TableSchema getSchema() {
-    List<TableFieldSchema> fields = new ArrayList<>();
-    fields.add(new TableFieldSchema().setName("trigger_type").setType("STRING"));
-    fields.add(new TableFieldSchema().setName("freeway").setType("STRING"));
-    fields.add(new TableFieldSchema().setName("total_flow").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("number_of_records").setType("INTEGER"));
-    fields.add(new TableFieldSchema().setName("window").setType("STRING"));
-    fields.add(new TableFieldSchema().setName("isFirst").setType("BOOLEAN"));
-    fields.add(new TableFieldSchema().setName("isLast").setType("BOOLEAN"));
-    fields.add(new TableFieldSchema().setName("timing").setType("STRING"));
-    fields.add(new TableFieldSchema().setName("event_time").setType("TIMESTAMP"));
-    fields.add(new TableFieldSchema().setName("processing_time").setType("TIMESTAMP"));
-    TableSchema schema = new TableSchema().setFields(fields);
-    return schema;
-  }
-
-  private static Integer tryIntegerParse(String number) {
-    try {
-      return Integer.parseInt(number);
-    } catch (NumberFormatException e) {
-      return null;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
deleted file mode 100644
index 77d7bc8..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.common.io.Files;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.File;
-import java.nio.charset.StandardCharsets;
-
-/**
- * Tests for {@link DebuggingWordCount}.
- */
-@RunWith(JUnit4.class)
-public class DebuggingWordCountTest {
-  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
-
-  @Test
-  public void testDebuggingWordCount() throws Exception {
-    File file = tmpFolder.newFile();
-    Files.write("stomach secret Flourish message Flourish here Flourish", file,
-        StandardCharsets.UTF_8);
-    DebuggingWordCount.main(new String[]{"--inputFile=" + file.getAbsolutePath()});
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
deleted file mode 100644
index 4542c48..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples;
-
-import com.google.cloud.dataflow.examples.WordCount.CountWords;
-import com.google.cloud.dataflow.examples.WordCount.ExtractWordsFn;
-import com.google.cloud.dataflow.examples.WordCount.FormatAsTextFn;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.transforms.MapElements;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Tests of WordCount.
- */
-@RunWith(JUnit4.class)
-public class WordCountTest {
-
-  /** Example test that tests a specific DoFn. */
-  @Test
-  public void testExtractWordsFn() {
-    DoFnTester<String, String> extractWordsFn =
-        DoFnTester.of(new ExtractWordsFn());
-
-    Assert.assertThat(extractWordsFn.processBatch(" some  input  words "),
-                      CoreMatchers.hasItems("some", "input", "words"));
-    Assert.assertThat(extractWordsFn.processBatch(" "),
-                      CoreMatchers.<String>hasItems());
-    Assert.assertThat(extractWordsFn.processBatch(" some ", " input", " words"),
-                      CoreMatchers.hasItems("some", "input", "words"));
-  }
-
-  static final String[] WORDS_ARRAY = new String[] {
-    "hi there", "hi", "hi sue bob",
-    "hi sue", "", "bob hi"};
-
-  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
-
-  static final String[] COUNTS_ARRAY = new String[] {
-      "hi: 5", "there: 1", "sue: 2", "bob: 2"};
-
-  /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testCountWords() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
-
-    PCollection<String> output = input.apply(new CountWords())
-      .apply(MapElements.via(new FormatAsTextFn()));
-
-    DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
deleted file mode 100644
index aec1557..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.cloud.dataflow.examples.complete.AutoComplete.CompletionCandidate;
-import com.google.cloud.dataflow.examples.complete.AutoComplete.ComputeTopCompletions;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Filter;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * Tests of AutoComplete.
- */
-@RunWith(Parameterized.class)
-public class AutoCompleteTest implements Serializable {
-  private boolean recursive;
-
-  public AutoCompleteTest(Boolean recursive) {
-    this.recursive = recursive;
-  }
-
-  @Parameterized.Parameters
-  public static Collection<Object[]> testRecursive() {
-    return Arrays.asList(new Object[][] {
-        { true },
-        { false }
-      });
-  }
-
-  @Test
-  public void testAutoComplete() {
-    List<String> words = Arrays.asList(
-        "apple",
-        "apple",
-        "apricot",
-        "banana",
-        "blackberry",
-        "blackberry",
-        "blackberry",
-        "blueberry",
-        "blueberry",
-        "cherry");
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(words));
-
-    PCollection<KV<String, List<CompletionCandidate>>> output =
-      input.apply(new ComputeTopCompletions(2, recursive))
-           .apply(Filter.byPredicate(
-                        new SerializableFunction<KV<String, List<CompletionCandidate>>, Boolean>() {
-                          @Override
-                          public Boolean apply(KV<String, List<CompletionCandidate>> element) {
-                            return element.getKey().length() <= 2;
-                          }
-                      }));
-
-    DataflowAssert.that(output).containsInAnyOrder(
-        KV.of("a", parseList("apple:2", "apricot:1")),
-        KV.of("ap", parseList("apple:2", "apricot:1")),
-        KV.of("b", parseList("blackberry:3", "blueberry:2")),
-        KV.of("ba", parseList("banana:1")),
-        KV.of("bl", parseList("blackberry:3", "blueberry:2")),
-        KV.of("c", parseList("cherry:1")),
-        KV.of("ch", parseList("cherry:1")));
-    p.run();
-  }
-
-  @Test
-  public void testTinyAutoComplete() {
-    List<String> words = Arrays.asList("x", "x", "x", "xy", "xy", "xyz");
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p.apply(Create.of(words));
-
-    PCollection<KV<String, List<CompletionCandidate>>> output =
-      input.apply(new ComputeTopCompletions(2, recursive));
-
-    DataflowAssert.that(output).containsInAnyOrder(
-        KV.of("x", parseList("x:3", "xy:2")),
-        KV.of("xy", parseList("xy:2", "xyz:1")),
-        KV.of("xyz", parseList("xyz:1")));
-    p.run();
-  }
-
-  @Test
-  public void testWindowedAutoComplete() {
-    List<TimestampedValue<String>> words = Arrays.asList(
-        TimestampedValue.of("xA", new Instant(1)),
-        TimestampedValue.of("xA", new Instant(1)),
-        TimestampedValue.of("xB", new Instant(1)),
-        TimestampedValue.of("xB", new Instant(2)),
-        TimestampedValue.of("xB", new Instant(2)));
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input = p
-      .apply(Create.of(words))
-      .apply(new ReifyTimestamps<String>());
-
-    PCollection<KV<String, List<CompletionCandidate>>> output =
-      input.apply(Window.<String>into(SlidingWindows.of(new Duration(2))))
-           .apply(new ComputeTopCompletions(2, recursive));
-
-    DataflowAssert.that(output).containsInAnyOrder(
-        // Window [0, 2)
-        KV.of("x", parseList("xA:2", "xB:1")),
-        KV.of("xA", parseList("xA:2")),
-        KV.of("xB", parseList("xB:1")),
-
-        // Window [1, 3)
-        KV.of("x", parseList("xB:3", "xA:2")),
-        KV.of("xA", parseList("xA:2")),
-        KV.of("xB", parseList("xB:3")),
-
-        // Window [2, 3)
-        KV.of("x", parseList("xB:2")),
-        KV.of("xB", parseList("xB:2")));
-    p.run();
-  }
-
-  private static List<CompletionCandidate> parseList(String... entries) {
-    List<CompletionCandidate> all = new ArrayList<>();
-    for (String s : entries) {
-      String[] countValue = s.split(":");
-      all.add(new CompletionCandidate(countValue[0], Integer.valueOf(countValue[1])));
-    }
-    return all;
-  }
-
-  private static class ReifyTimestamps<T>
-      extends PTransform<PCollection<TimestampedValue<T>>, PCollection<T>> {
-    @Override
-    public PCollection<T> apply(PCollection<TimestampedValue<T>> input) {
-      return input.apply(ParDo.of(new DoFn<TimestampedValue<T>, T>() {
-        @Override
-        public void processElement(ProcessContext c) {
-          c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
-        }
-      }));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
deleted file mode 100644
index 5ee136c..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.Keys;
-import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.net.URI;
-import java.util.Arrays;
-
-/**
- * Tests of {@link TfIdf}.
- */
-@RunWith(JUnit4.class)
-public class TfIdfTest {
-
-  /** Test that the example runs. */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testTfIdf() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
-
-    PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
-        .apply(Create.of(
-            KV.of(new URI("x"), "a b c d"),
-            KV.of(new URI("y"), "a b c"),
-            KV.of(new URI("z"), "a m n")))
-        .apply(new TfIdf.ComputeTfIdf());
-
-    PCollection<String> words = wordToUriAndTfIdf
-        .apply(Keys.<String>create())
-        .apply(RemoveDuplicates.<String>create());
-
-    DataflowAssert.that(words).containsInAnyOrder(Arrays.asList("a", "m", "n", "b", "c", "d"));
-
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
deleted file mode 100644
index ce9de51..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.complete;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-
-/** Unit tests for {@link TopWikipediaSessions}. */
-@RunWith(JUnit4.class)
-public class TopWikipediaSessionsTest {
-  @Test
-  @Category(RunnableOnService.class)
-  public void testComputeTopUsers() {
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> output =
-        p.apply(Create.of(Arrays.asList(
-            new TableRow().set("timestamp", 0).set("contributor_username", "user1"),
-            new TableRow().set("timestamp", 1).set("contributor_username", "user1"),
-            new TableRow().set("timestamp", 2).set("contributor_username", "user1"),
-            new TableRow().set("timestamp", 0).set("contributor_username", "user2"),
-            new TableRow().set("timestamp", 1).set("contributor_username", "user2"),
-            new TableRow().set("timestamp", 3601).set("contributor_username", "user2"),
-            new TableRow().set("timestamp", 3602).set("contributor_username", "user2"),
-            new TableRow().set("timestamp", 35 * 24 * 3600).set("contributor_username", "user3"))))
-        .apply(new TopWikipediaSessions.ComputeTopSessions(1.0));
-
-    DataflowAssert.that(output).containsInAnyOrder(Arrays.asList(
-        "user1 : [1970-01-01T00:00:00.000Z..1970-01-01T01:00:02.000Z)"
-        + " : 3 : 1970-01-01T00:00:00.000Z",
-        "user3 : [1970-02-05T00:00:00.000Z..1970-02-05T01:00:00.000Z)"
-        + " : 1 : 1970-02-01T00:00:00.000Z"));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
deleted file mode 100644
index 6dce4ed..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes.ExtractTornadoesFn;
-import com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes.FormatCountsFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.List;
-
-/**
- * Test case for {@link BigQueryTornadoes}.
- */
-@RunWith(JUnit4.class)
-public class BigQueryTornadoesTest {
-
-  @Test
-  public void testExtractTornadoes() throws Exception {
-    TableRow row = new TableRow()
-          .set("month", "6")
-          .set("tornado", true);
-    DoFnTester<TableRow, Integer> extractWordsFn =
-        DoFnTester.of(new ExtractTornadoesFn());
-    Assert.assertThat(extractWordsFn.processBatch(row),
-                      CoreMatchers.hasItems(6));
-  }
-
-  @Test
-  public void testNoTornadoes() throws Exception {
-    TableRow row = new TableRow()
-          .set("month", 6)
-          .set("tornado", false);
-    DoFnTester<TableRow, Integer> extractWordsFn =
-        DoFnTester.of(new ExtractTornadoesFn());
-    Assert.assertTrue(extractWordsFn.processBatch(row).isEmpty());
-  }
-
-  @Test
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  public void testFormatCounts() throws Exception {
-    DoFnTester<KV<Integer, Long>, TableRow> formatCountsFn =
-        DoFnTester.of(new FormatCountsFn());
-    KV empty[] = {};
-    List<TableRow> results = formatCountsFn.processBatch(empty);
-    Assert.assertTrue(results.size() == 0);
-    KV input[] = { KV.of(3, 0L),
-                   KV.of(4, Long.MAX_VALUE),
-                   KV.of(5, Long.MIN_VALUE) };
-    results = formatCountsFn.processBatch(input);
-    Assert.assertEquals(results.size(), 3);
-    Assert.assertEquals(results.get(0).get("month"), 3);
-    Assert.assertEquals(results.get(0).get("tornado_count"), 0L);
-    Assert.assertEquals(results.get(1).get("month"), 4);
-    Assert.assertEquals(results.get(1).get("tornado_count"), Long.MAX_VALUE);
-    Assert.assertEquals(results.get(2).get("month"), 5);
-    Assert.assertEquals(results.get(2).get("tornado_count"), Long.MIN_VALUE);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
deleted file mode 100644
index fe4823d..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.cookbook.CombinePerKeyExamples.ExtractLargeWordsFn;
-import com.google.cloud.dataflow.examples.cookbook.CombinePerKeyExamples.FormatShakespeareOutputFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.List;
-
-/** Unit tests for {@link CombinePerKeyExamples}. */
-@RunWith(JUnit4.class)
-public class CombinePerKeyExamplesTest {
-
-  private static final TableRow row1 = new TableRow()
-      .set("corpus", "king_lear").set("word", "snuffleupaguses");
-  private static final TableRow row2 = new TableRow()
-      .set("corpus", "macbeth").set("word", "antidisestablishmentarianism");
-  private static final TableRow row3 = new TableRow()
-      .set("corpus", "king_lear").set("word", "antidisestablishmentarianism");
-  private static final TableRow row4 = new TableRow()
-      .set("corpus", "macbeth").set("word", "bob");
-  private static final TableRow row5 = new TableRow()
-      .set("corpus", "king_lear").set("word", "hi");
-
-  static final TableRow[] ROWS_ARRAY = new TableRow[] {
-    row1, row2, row3, row4, row5
-  };
-
-  private static final KV<String, String> tuple1 = KV.of("snuffleupaguses", "king_lear");
-  private static final KV<String, String> tuple2 = KV.of("antidisestablishmentarianism", "macbeth");
-  private static final KV<String, String> tuple3 = KV.of("antidisestablishmentarianism",
-      "king_lear");
-
-  private static final KV<String, String> combinedTuple1 = KV.of("antidisestablishmentarianism",
-      "king_lear,macbeth");
-  private static final KV<String, String> combinedTuple2 = KV.of("snuffleupaguses", "king_lear");
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  static final KV<String, String>[] COMBINED_TUPLES_ARRAY = new KV[] {
-    combinedTuple1, combinedTuple2
-  };
-
-  private static final TableRow resultRow1 = new TableRow()
-      .set("word", "snuffleupaguses").set("all_plays", "king_lear");
-  private static final TableRow resultRow2 = new TableRow()
-      .set("word", "antidisestablishmentarianism")
-      .set("all_plays", "king_lear,macbeth");
-
-  @Test
-  public void testExtractLargeWordsFn() {
-    DoFnTester<TableRow, KV<String, String>> extractLargeWordsFn =
-        DoFnTester.of(new ExtractLargeWordsFn());
-    List<KV<String, String>> results = extractLargeWordsFn.processBatch(ROWS_ARRAY);
-    Assert.assertThat(results, CoreMatchers.hasItem(tuple1));
-    Assert.assertThat(results, CoreMatchers.hasItem(tuple2));
-    Assert.assertThat(results, CoreMatchers.hasItem(tuple3));
-  }
-
-  @Test
-  public void testFormatShakespeareOutputFn() {
-    DoFnTester<KV<String, String>, TableRow> formatShakespeareOutputFn =
-        DoFnTester.of(new FormatShakespeareOutputFn());
-    List<TableRow> results = formatShakespeareOutputFn.processBatch(COMBINED_TUPLES_ARRAY);
-    Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
-    Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
deleted file mode 100644
index bce6b11..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/** Unit tests for {@link DeDupExample}. */
-@RunWith(JUnit4.class)
-public class DeDupExampleTest {
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testRemoveDuplicates() {
-    List<String> strings = Arrays.asList(
-        "k1",
-        "k5",
-        "k5",
-        "k2",
-        "k1",
-        "k2",
-        "k3");
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input =
-        p.apply(Create.of(strings)
-            .withCoder(StringUtf8Coder.of()));
-
-    PCollection<String> output =
-        input.apply(RemoveDuplicates.<String>create());
-
-    DataflowAssert.that(output)
-        .containsInAnyOrder("k1", "k5", "k2", "k3");
-    p.run();
-  }
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testRemoveDuplicatesEmpty() {
-    List<String> strings = Arrays.asList();
-
-    Pipeline p = TestPipeline.create();
-
-    PCollection<String> input =
-        p.apply(Create.of(strings)
-            .withCoder(StringUtf8Coder.of()));
-
-    PCollection<String> output =
-        input.apply(RemoveDuplicates.<String>create());
-
-    DataflowAssert.that(output).empty();
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
deleted file mode 100644
index 6d822f9..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.cookbook.FilterExamples.FilterSingleMonthDataFn;
-import com.google.cloud.dataflow.examples.cookbook.FilterExamples.ProjectionFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/** Unit tests for {@link FilterExamples}. */
-@RunWith(JUnit4.class)
-public class FilterExamplesTest {
-
-  private static final TableRow row1 = new TableRow()
-      .set("month", "6").set("day", "21")
-      .set("year", "2014").set("mean_temp", "85.3")
-      .set("tornado", true);
-  private static final TableRow row2 = new TableRow()
-      .set("month", "7").set("day", "20")
-      .set("year", "2014").set("mean_temp", "75.4")
-      .set("tornado", false);
-  private static final TableRow row3 = new TableRow()
-      .set("month", "6").set("day", "18")
-      .set("year", "2014").set("mean_temp", "45.3")
-      .set("tornado", true);
-  static final TableRow[] ROWS_ARRAY = new TableRow[] {
-    row1, row2, row3
-  };
-  static final List<TableRow> ROWS = Arrays.asList(ROWS_ARRAY);
-
-  private static final TableRow outRow1 = new TableRow()
-      .set("year", 2014).set("month", 6)
-      .set("day", 21).set("mean_temp", 85.3);
-  private static final TableRow outRow2 = new TableRow()
-      .set("year", 2014).set("month", 7)
-      .set("day", 20).set("mean_temp", 75.4);
-  private static final TableRow outRow3 = new TableRow()
-      .set("year", 2014).set("month", 6)
-      .set("day", 18).set("mean_temp", 45.3);
-  private static final TableRow[] PROJROWS_ARRAY = new TableRow[] {
-    outRow1, outRow2, outRow3
-  };
-
-
-  @Test
-  public void testProjectionFn() {
-    DoFnTester<TableRow, TableRow> projectionFn =
-        DoFnTester.of(new ProjectionFn());
-    List<TableRow> results = projectionFn.processBatch(ROWS_ARRAY);
-    Assert.assertThat(results, CoreMatchers.hasItem(outRow1));
-    Assert.assertThat(results, CoreMatchers.hasItem(outRow2));
-    Assert.assertThat(results, CoreMatchers.hasItem(outRow3));
-  }
-
-  @Test
-  public void testFilterSingleMonthDataFn() {
-    DoFnTester<TableRow, TableRow> filterSingleMonthDataFn =
-        DoFnTester.of(new FilterSingleMonthDataFn(7));
-    List<TableRow> results = filterSingleMonthDataFn.processBatch(PROJROWS_ARRAY);
-    Assert.assertThat(results, CoreMatchers.hasItem(outRow2));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
deleted file mode 100644
index db3ae34..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.cookbook.JoinExamples.ExtractCountryInfoFn;
-import com.google.cloud.dataflow.examples.cookbook.JoinExamples.ExtractEventDataFn;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/** Unit tests for {@link JoinExamples}. */
-@RunWith(JUnit4.class)
-public class JoinExamplesTest {
-
-  private static final TableRow row1 = new TableRow()
-        .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
-        .set("Actor1Name", "BANGKOK").set("SOURCEURL", "http://cnn.com");
-  private static final TableRow row2 = new TableRow()
-        .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
-        .set("Actor1Name", "LAOS").set("SOURCEURL", "http://www.chicagotribune.com");
-  private static final TableRow row3 = new TableRow()
-        .set("ActionGeo_CountryCode", "BE").set("SQLDATE", "20141213")
-        .set("Actor1Name", "AFGHANISTAN").set("SOURCEURL", "http://cnn.com");
-  static final TableRow[] EVENTS = new TableRow[] {
-    row1, row2, row3
-  };
-  static final List<TableRow> EVENT_ARRAY = Arrays.asList(EVENTS);
-
-  private static final KV<String, String> kv1 = KV.of("VM",
-      "Date: 20141212, Actor1: LAOS, url: http://www.chicagotribune.com");
-  private static final KV<String, String> kv2 = KV.of("BE",
-      "Date: 20141213, Actor1: AFGHANISTAN, url: http://cnn.com");
-  private static final KV<String, String> kv3 = KV.of("BE", "Belgium");
-  private static final KV<String, String> kv4 = KV.of("VM", "Vietnam");
-
-  private static final TableRow cc1 = new TableRow()
-        .set("FIPSCC", "VM").set("HumanName", "Vietnam");
-  private static final TableRow cc2 = new TableRow()
-        .set("FIPSCC", "BE").set("HumanName", "Belgium");
-  static final TableRow[] CCS = new TableRow[] {
-    cc1, cc2
-  };
-  static final List<TableRow> CC_ARRAY = Arrays.asList(CCS);
-
-  static final String[] JOINED_EVENTS = new String[] {
-      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: LAOS, "
-          + "url: http://www.chicagotribune.com",
-      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: BANGKOK, "
-          + "url: http://cnn.com",
-      "Country code: BE, Country name: Belgium, Event info: Date: 20141213, Actor1: AFGHANISTAN, "
-          + "url: http://cnn.com"
-    };
-
-  @Test
-  public void testExtractEventDataFn() {
-    DoFnTester<TableRow, KV<String, String>> extractEventDataFn =
-        DoFnTester.of(new ExtractEventDataFn());
-    List<KV<String, String>> results = extractEventDataFn.processBatch(EVENTS);
-    Assert.assertThat(results, CoreMatchers.hasItem(kv1));
-    Assert.assertThat(results, CoreMatchers.hasItem(kv2));
-  }
-
-  @Test
-  public void testExtractCountryInfoFn() {
-    DoFnTester<TableRow, KV<String, String>> extractCountryInfoFn =
-        DoFnTester.of(new ExtractCountryInfoFn());
-    List<KV<String, String>> results = extractCountryInfoFn.processBatch(CCS);
-    Assert.assertThat(results, CoreMatchers.hasItem(kv3));
-    Assert.assertThat(results, CoreMatchers.hasItem(kv4));
-  }
-
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testJoin() throws java.lang.Exception {
-    Pipeline p = TestPipeline.create();
-    PCollection<TableRow> input1 = p.apply("CreateEvent", Create.of(EVENT_ARRAY));
-    PCollection<TableRow> input2 = p.apply("CreateCC", Create.of(CC_ARRAY));
-
-    PCollection<String> output = JoinExamples.joinEvents(input1, input2);
-    DataflowAssert.that(output).containsInAnyOrder(JOINED_EVENTS);
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
deleted file mode 100644
index 3deff2a..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamplesTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.cookbook.MaxPerKeyExamples.ExtractTempFn;
-import com.google.cloud.dataflow.examples.cookbook.MaxPerKeyExamples.FormatMaxesFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.collect.ImmutableList;
-
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.List;
-
-/** Unit tests for {@link MaxPerKeyExamples}. */
-@RunWith(JUnit4.class)
-public class MaxPerKeyExamplesTest {
-
-  private static final TableRow row1 = new TableRow()
-        .set("month", "6").set("day", "21")
-        .set("year", "2014").set("mean_temp", "85.3")
-        .set("tornado", true);
-  private static final TableRow row2 = new TableRow()
-        .set("month", "7").set("day", "20")
-        .set("year", "2014").set("mean_temp", "75.4")
-        .set("tornado", false);
-  private static final TableRow row3 = new TableRow()
-        .set("month", "6").set("day", "18")
-        .set("year", "2014").set("mean_temp", "45.3")
-        .set("tornado", true);
-  private static final List<TableRow> TEST_ROWS = ImmutableList.of(row1, row2, row3);
-
-  private static final KV<Integer, Double> kv1 = KV.of(6, 85.3);
-  private static final KV<Integer, Double> kv2 = KV.of(6, 45.3);
-  private static final KV<Integer, Double> kv3 = KV.of(7, 75.4);
-
-  private static final List<KV<Integer, Double>> TEST_KVS = ImmutableList.of(kv1, kv2, kv3);
-
-  private static final TableRow resultRow1 = new TableRow()
-      .set("month", 6)
-      .set("max_mean_temp", 85.3);
-  private static final TableRow resultRow2 = new TableRow()
-      .set("month", 7)
-      .set("max_mean_temp", 75.4);
-
-
-  @Test
-  public void testExtractTempFn() {
-    DoFnTester<TableRow, KV<Integer, Double>> extractTempFn =
-        DoFnTester.of(new ExtractTempFn());
-    List<KV<Integer, Double>> results = extractTempFn.processBatch(TEST_ROWS);
-    Assert.assertThat(results, CoreMatchers.hasItem(kv1));
-    Assert.assertThat(results, CoreMatchers.hasItem(kv2));
-    Assert.assertThat(results, CoreMatchers.hasItem(kv3));
-  }
-
-  @Test
-  public void testFormatMaxesFn() {
-    DoFnTester<KV<Integer, Double>, TableRow> formatMaxesFnFn =
-        DoFnTester.of(new FormatMaxesFn());
-    List<TableRow> results = formatMaxesFnFn.processBatch(TEST_KVS);
-    Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
-    Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
----------------------------------------------------------------------
diff --git a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java b/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
deleted file mode 100644
index 209ea52..0000000
--- a/examples/src/test/java/com/google/cloud/dataflow/examples/cookbook/TriggerExampleTest.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.examples.cookbook;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.cloud.dataflow.examples.cookbook.TriggerExample.ExtractFlowInfo;
-import com.google.cloud.dataflow.examples.cookbook.TriggerExample.TotalFlow;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Unit Tests for {@link TriggerExample}.
- * The results generated by triggers are by definition non-deterministic and hence hard to test.
- * The unit test does not test all aspects of the example.
- */
-@RunWith(JUnit4.class)
-public class TriggerExampleTest {
-
-  private static final String[] INPUT =
-    {"01/01/2010 00:00:00,1108302,94,E,ML,36,100,29,0.0065,66,9,1,0.001,74.8,1,9,3,0.0028,71,1,9,"
-        + "12,0.0099,67.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,,,0", "01/01/2010 00:00:00,"
-            + "1100333,5,N,FR,9,0,39,,,9,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,,0,,,,"};
-
-  private static final List<TimestampedValue<String>> TIME_STAMPED_INPUT = Arrays.asList(
-      TimestampedValue.of("01/01/2010 00:00:00,1108302,5,W,ML,36,100,30,0.0065,66,9,1,0.001,"
-          + "74.8,1,9,3,0.0028,71,1,9,12,0.0099,87.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,"
-          + ",,0", new Instant(60000)),
-      TimestampedValue.of("01/01/2010 00:00:00,1108302,110,E,ML,36,100,40,0.0065,66,9,1,0.001,"
-          + "74.8,1,9,3,0.0028,71,1,9,12,0.0099,67.4,1,9,13,0.0121,99.0,1,,,,,0,,,,,0,,,,,0,,,"
-          + ",,0", new Instant(1)),
-      TimestampedValue.of("01/01/2010 00:00:00,1108302,110,E,ML,36,100,50,0.0065,66,9,1,"
-          + "0.001,74.8,1,9,3,0.0028,71,1,9,12,0.0099,97.4,1,9,13,0.0121,50.0,1,,,,,0,,,,,0"
-          + ",,,,,0,,,,,0", new Instant(1)));
-
-  private static final TableRow OUT_ROW_1 = new TableRow()
-      .set("trigger_type", "default")
-      .set("freeway", "5").set("total_flow", 30)
-      .set("number_of_records", 1)
-      .set("isFirst", true).set("isLast", true)
-      .set("timing", "ON_TIME")
-      .set("window", "[1970-01-01T00:01:00.000Z..1970-01-01T00:02:00.000Z)");
-
-  private static final TableRow OUT_ROW_2 = new TableRow()
-      .set("trigger_type", "default")
-      .set("freeway", "110").set("total_flow", 90)
-      .set("number_of_records", 2)
-      .set("isFirst", true).set("isLast", true)
-      .set("timing", "ON_TIME")
-      .set("window", "[1970-01-01T00:00:00.000Z..1970-01-01T00:01:00.000Z)");
-
-  @Test
-  public void testExtractTotalFlow() {
-    DoFnTester<String, KV<String, Integer>> extractFlowInfow = DoFnTester
-        .of(new ExtractFlowInfo());
-
-    List<KV<String, Integer>> results = extractFlowInfow.processBatch(INPUT);
-    Assert.assertEquals(results.size(), 1);
-    Assert.assertEquals(results.get(0).getKey(), "94");
-    Assert.assertEquals(results.get(0).getValue(), new Integer(29));
-
-    List<KV<String, Integer>> output = extractFlowInfow.processBatch("");
-    Assert.assertEquals(output.size(), 0);
-  }
-
-  @Test
-  @Category(RunnableOnService.class)
-  public void testTotalFlow () {
-    Pipeline pipeline = TestPipeline.create();
-    PCollection<KV<String, Integer>> flow = pipeline
-        .apply(Create.timestamped(TIME_STAMPED_INPUT))
-        .apply(ParDo.of(new ExtractFlowInfo()));
-
-    PCollection<TableRow> totalFlow = flow
-        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(1))))
-        .apply(new TotalFlow("default"));
-
-    PCollection<TableRow> results =  totalFlow.apply(ParDo.of(new FormatResults()));
-
-
-    DataflowAssert.that(results).containsInAnyOrder(OUT_ROW_1, OUT_ROW_2);
-    pipeline.run();
-
-  }
-
-  static class FormatResults extends DoFn<TableRow, TableRow> {
-    @Override
-    public void processElement(ProcessContext c) throws Exception {
-      TableRow element = c.element();
-      TableRow row = new TableRow()
-          .set("trigger_type", element.get("trigger_type"))
-          .set("freeway", element.get("freeway"))
-          .set("total_flow", element.get("total_flow"))
-          .set("number_of_records", element.get("number_of_records"))
-          .set("isFirst", element.get("isFirst"))
-          .set("isLast", element.get("isLast"))
-          .set("timing", element.get("timing"))
-          .set("window", element.get("window"));
-      c.output(row);
-    }
-  }
-}
-
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3145c40..2d0a3e1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -127,7 +127,7 @@
   <modules>
     <module>sdks/java/core</module>
     <module>runners</module>
-    <module>examples</module>
+    <module>examples/java</module>
     <module>maven-archetypes</module>
   </modules>
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/travis/test_wordcount.sh
----------------------------------------------------------------------
diff --git a/travis/test_wordcount.sh b/travis/test_wordcount.sh
index fdb9d10..fdd878d 100755
--- a/travis/test_wordcount.sh
+++ b/travis/test_wordcount.sh
@@ -19,7 +19,7 @@ set -o pipefail
 
 PASS=1
 VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -v '\[')
-JAR_FILE=examples/target/google-cloud-dataflow-java-examples-all-bundled-${VERSION}.jar
+JAR_FILE=examples/java/target/java-examples-all-bundled-${VERSION}.jar
 
 function check_result_hash {
   local name=$1
@@ -52,7 +52,7 @@ function run_via_mvn {
   local expected_hash=$3
 
   local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
-  local cmd='mvn exec:java -f pom.xml -pl examples \
+  local cmd='mvn exec:java -f pom.xml -pl examples/java \
     -Dexec.mainClass=com.google.cloud.dataflow.examples.WordCount \
     -Dexec.args="--runner=DirectPipelineRunner --inputFile='"$input"' --output='"$outfile_prefix"'"'
   echo "$name: Running $cmd" >&2

[29/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorServices.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorServices.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorServices.java
deleted file mode 100644
index 34efdf6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/TransformExecutorServices.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.common.base.MoreObjects;
-
-import java.util.Map;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.atomic.AtomicReference;
-
-/**
- * Static factory methods for constructing instances of {@link TransformExecutorService}.
- */
-final class TransformExecutorServices {
-  private TransformExecutorServices() {
-    // Do not instantiate
-  }
-
-  /**
-   * Returns an EvaluationState that evaluates {@link TransformExecutor TransformExecutors} in
-   * parallel.
-   */
-  public static TransformExecutorService parallel(
-      ExecutorService executor, Map<TransformExecutor<?>, Boolean> scheduled) {
-    return new ParallelEvaluationState(executor, scheduled);
-  }
-
-  /**
-   * Returns an EvaluationState that evaluates {@link TransformExecutor TransformExecutors} in
-   * serial.
-   */
-  public static TransformExecutorService serial(
-      ExecutorService executor, Map<TransformExecutor<?>, Boolean> scheduled) {
-    return new SerialEvaluationState(executor, scheduled);
-  }
-
-  /**
-   * A {@link TransformExecutorService} with unlimited parallelism. Any {@link TransformExecutor}
-   * scheduled will be immediately submitted to the {@link ExecutorService}.
-   *
-   * <p>A principal use of this is for the evaluation of an unkeyed Step. Unkeyed computations are
-   * processed in parallel.
-   */
-  private static class ParallelEvaluationState implements TransformExecutorService {
-    private final ExecutorService executor;
-    private final Map<TransformExecutor<?>, Boolean> scheduled;
-
-    private ParallelEvaluationState(
-        ExecutorService executor, Map<TransformExecutor<?>, Boolean> scheduled) {
-      this.executor = executor;
-      this.scheduled = scheduled;
-    }
-
-    @Override
-    public void schedule(TransformExecutor<?> work) {
-      executor.submit(work);
-      scheduled.put(work, true);
-    }
-
-    @Override
-    public void complete(TransformExecutor<?> completed) {
-      scheduled.remove(completed);
-    }
-  }
-
-  /**
-   * A {@link TransformExecutorService} with a single work queue. Any {@link TransformExecutor}
-   * scheduled will be placed on the work queue. Only one item of work will be submitted to the
-   * {@link ExecutorService} at any time.
-   *
-   * <p>A principal use of this is for the serial evaluation of a (Step, Key) pair.
-   * Keyed computations are processed serially per step.
-   */
-  private static class SerialEvaluationState implements TransformExecutorService {
-    private final ExecutorService executor;
-    private final Map<TransformExecutor<?>, Boolean> scheduled;
-
-    private AtomicReference<TransformExecutor<?>> currentlyEvaluating;
-    private final Queue<TransformExecutor<?>> workQueue;
-
-    private SerialEvaluationState(
-        ExecutorService executor, Map<TransformExecutor<?>, Boolean> scheduled) {
-      this.scheduled = scheduled;
-      this.executor = executor;
-      this.currentlyEvaluating = new AtomicReference<>();
-      this.workQueue = new ConcurrentLinkedQueue<>();
-    }
-
-    /**
-     * Schedules the work, adding it to the work queue if there is a bundle currently being
-     * evaluated and scheduling it immediately otherwise.
-     */
-    @Override
-    public void schedule(TransformExecutor<?> work) {
-      workQueue.offer(work);
-      updateCurrentlyEvaluating();
-    }
-
-    @Override
-    public void complete(TransformExecutor<?> completed) {
-      if (!currentlyEvaluating.compareAndSet(completed, null)) {
-        throw new IllegalStateException(
-            "Finished work "
-                + completed
-                + " but could not complete due to unexpected currently executing "
-                + currentlyEvaluating.get());
-      }
-      scheduled.remove(completed);
-      updateCurrentlyEvaluating();
-    }
-
-    private void updateCurrentlyEvaluating() {
-      if (currentlyEvaluating.get() == null) {
-        // Only synchronize if we need to update what's currently evaluating
-        synchronized (this) {
-          TransformExecutor<?> newWork = workQueue.poll();
-          if (newWork != null) {
-            if (currentlyEvaluating.compareAndSet(null, newWork)) {
-              scheduled.put(newWork, true);
-              executor.submit(newWork);
-            } else {
-              workQueue.offer(newWork);
-            }
-          }
-        }
-      }
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(SerialEvaluationState.class)
-          .add("currentlyEvaluating", currentlyEvaluating)
-          .add("workQueue", workQueue)
-          .toString();
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
deleted file mode 100644
index 549afab..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/UnboundedReadEvaluatorFactory.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.io.Read.Unbounded;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource.CheckpointMark;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource.UnboundedReader;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import java.io.IOException;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ConcurrentMap;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link TransformEvaluatorFactory} that produces {@link TransformEvaluator TransformEvaluators}
- * for the {@link Unbounded Read.Unbounded} primitive {@link PTransform}.
- */
-class UnboundedReadEvaluatorFactory implements TransformEvaluatorFactory {
-  /*
-   * An evaluator for a Source is stateful, to ensure the CheckpointMark is properly persisted.
-   * Evaluators are cached here to ensure that the checkpoint mark is appropriately reused
-   * and any splits are honored.
-   */
-  private final ConcurrentMap<EvaluatorKey, Queue<? extends UnboundedReadEvaluator<?>>>
-      sourceEvaluators = new ConcurrentHashMap<>();
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  @Override
-  public <InputT> TransformEvaluator<InputT> forApplication(AppliedPTransform<?, ?, ?> application,
-      @Nullable CommittedBundle<?> inputBundle, InProcessEvaluationContext evaluationContext) {
-    return getTransformEvaluator((AppliedPTransform) application, evaluationContext);
-  }
-
-  private <OutputT> TransformEvaluator<?> getTransformEvaluator(
-      final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
-      final InProcessEvaluationContext evaluationContext) {
-    UnboundedReadEvaluator<?> currentEvaluator =
-        getTransformEvaluatorQueue(transform, evaluationContext).poll();
-    if (currentEvaluator == null) {
-      return EmptyTransformEvaluator.create(transform);
-    }
-    return currentEvaluator;
-  }
-
-  /**
-   * Get the queue of {@link TransformEvaluator TransformEvaluators} that produce elements for the
-   * provided application of {@link Unbounded Read.Unbounded}, initializing it if required.
-   *
-   * <p>This method is thread-safe, and will only produce new evaluators if no other invocation has
-   * already done so.
-   */
-  @SuppressWarnings("unchecked")
-  private <OutputT> Queue<UnboundedReadEvaluator<OutputT>> getTransformEvaluatorQueue(
-      final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
-      final InProcessEvaluationContext evaluationContext) {
-    // Key by the application and the context the evaluation is occurring in (which call to
-    // Pipeline#run).
-    EvaluatorKey key = new EvaluatorKey(transform, evaluationContext);
-    @SuppressWarnings("unchecked")
-    Queue<UnboundedReadEvaluator<OutputT>> evaluatorQueue =
-        (Queue<UnboundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
-    if (evaluatorQueue == null) {
-      evaluatorQueue = new ConcurrentLinkedQueue<>();
-      if (sourceEvaluators.putIfAbsent(key, evaluatorQueue) == null) {
-        // If no queue existed in the evaluators, add an evaluator to initialize the evaluator
-        // factory for this transform
-        UnboundedReadEvaluator<OutputT> evaluator =
-            new UnboundedReadEvaluator<OutputT>(transform, evaluationContext, evaluatorQueue);
-        evaluatorQueue.offer(evaluator);
-      } else {
-        // otherwise return the existing Queue that arrived before us
-        evaluatorQueue = (Queue<UnboundedReadEvaluator<OutputT>>) sourceEvaluators.get(key);
-      }
-    }
-    return evaluatorQueue;
-  }
-
-  /**
-   * A {@link UnboundedReadEvaluator} produces elements from an underlying {@link UnboundedSource},
-   * discarding all input elements. Within the call to {@link #finishBundle()}, the evaluator
-   * creates the {@link UnboundedReader} and consumes some currently available input.
-   *
-   * <p>Calls to {@link UnboundedReadEvaluator} are not internally thread-safe, and should only be
-   * used by a single thread at a time. Each {@link UnboundedReadEvaluator} maintains its own
-   * checkpoint, and constructs its reader from the current checkpoint in each call to
-   * {@link #finishBundle()}.
-   */
-  private static class UnboundedReadEvaluator<OutputT> implements TransformEvaluator<Object> {
-    private static final int ARBITRARY_MAX_ELEMENTS = 10;
-    private final AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform;
-    private final InProcessEvaluationContext evaluationContext;
-    private final Queue<UnboundedReadEvaluator<OutputT>> evaluatorQueue;
-    private CheckpointMark checkpointMark;
-
-    public UnboundedReadEvaluator(
-        AppliedPTransform<?, PCollection<OutputT>, Unbounded<OutputT>> transform,
-        InProcessEvaluationContext evaluationContext,
-        Queue<UnboundedReadEvaluator<OutputT>> evaluatorQueue) {
-      this.transform = transform;
-      this.evaluationContext = evaluationContext;
-      this.evaluatorQueue = evaluatorQueue;
-      this.checkpointMark = null;
-    }
-
-    @Override
-    public void processElement(WindowedValue<Object> element) {}
-
-    @Override
-    public InProcessTransformResult finishBundle() throws IOException {
-      UncommittedBundle<OutputT> output = evaluationContext.createRootBundle(transform.getOutput());
-      try (UnboundedReader<OutputT> reader =
-              createReader(
-                  transform.getTransform().getSource(), evaluationContext.getPipelineOptions());) {
-        int numElements = 0;
-        if (reader.start()) {
-          do {
-            output.add(
-                WindowedValue.timestampedValueInGlobalWindow(
-                    reader.getCurrent(), reader.getCurrentTimestamp()));
-            numElements++;
-          } while (numElements < ARBITRARY_MAX_ELEMENTS && reader.advance());
-        }
-        checkpointMark = reader.getCheckpointMark();
-        checkpointMark.finalizeCheckpoint();
-        // TODO: When exercising create initial splits, make this the minimum watermark across all
-        // existing readers
-        StepTransformResult result =
-            StepTransformResult.withHold(transform, reader.getWatermark())
-                .addOutput(output)
-                .build();
-        evaluatorQueue.offer(this);
-        return result;
-      }
-    }
-
-    private <CheckpointMarkT extends CheckpointMark> UnboundedReader<OutputT> createReader(
-        UnboundedSource<OutputT, CheckpointMarkT> source, PipelineOptions options) {
-      @SuppressWarnings("unchecked")
-      CheckpointMarkT mark = (CheckpointMarkT) checkpointMark;
-      return source.createReader(options, mark);
-    }
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
deleted file mode 100644
index dd2bfb1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/ViewEvaluatorFactory.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.Values;
-import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
-import com.google.cloud.dataflow.sdk.transforms.WithKeys;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * The {@link InProcessPipelineRunner} {@link TransformEvaluatorFactory} for the
- * {@link CreatePCollectionView} primitive {@link PTransform}.
- *
- * <p>The {@link ViewEvaluatorFactory} produces {@link TransformEvaluator TransformEvaluators} for
- * the {@link WriteView} {@link PTransform}, which is part of the
- * {@link InProcessCreatePCollectionView} composite transform. This transform is an override for the
- * {@link CreatePCollectionView} transform that applies windowing and triggers before the view is
- * written.
- */
-class ViewEvaluatorFactory implements TransformEvaluatorFactory {
-  @Override
-  public <T> TransformEvaluator<T> forApplication(
-      AppliedPTransform<?, ?, ?> application,
-      InProcessPipelineRunner.CommittedBundle<?> inputBundle,
-      InProcessEvaluationContext evaluationContext) {
-    @SuppressWarnings({"cast", "unchecked", "rawtypes"})
-    TransformEvaluator<T> evaluator = (TransformEvaluator<T>) createEvaluator(
-            (AppliedPTransform) application, evaluationContext);
-    return evaluator;
-  }
-
-  private <InT, OuT> TransformEvaluator<Iterable<InT>> createEvaluator(
-      final AppliedPTransform<PCollection<Iterable<InT>>, PCollectionView<OuT>, WriteView<InT, OuT>>
-          application,
-      InProcessEvaluationContext context) {
-    PCollection<Iterable<InT>> input = application.getInput();
-    final PCollectionViewWriter<InT, OuT> writer =
-        context.createPCollectionViewWriter(input, application.getOutput());
-    return new TransformEvaluator<Iterable<InT>>() {
-      private final List<WindowedValue<InT>> elements = new ArrayList<>();
-
-      @Override
-      public void processElement(WindowedValue<Iterable<InT>> element) {
-        for (InT input : element.getValue()) {
-          elements.add(element.withValue(input));
-        }
-      }
-
-      @Override
-      public InProcessTransformResult finishBundle() {
-        writer.add(elements);
-        return StepTransformResult.withoutHold(application).build();
-      }
-    };
-  }
-
-  /**
-   * An in-process override for {@link CreatePCollectionView}.
-   */
-  public static class InProcessCreatePCollectionView<ElemT, ViewT>
-      extends PTransform<PCollection<ElemT>, PCollectionView<ViewT>> {
-    private final CreatePCollectionView<ElemT, ViewT> og;
-
-    private InProcessCreatePCollectionView(CreatePCollectionView<ElemT, ViewT> og) {
-      this.og = og;
-    }
-
-    @Override
-    public PCollectionView<ViewT> apply(PCollection<ElemT> input) {
-      return input.apply(WithKeys.<Void, ElemT>of((Void) null))
-          .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()))
-          .apply(GroupByKey.<Void, ElemT>create())
-          .apply(Values.<Iterable<ElemT>>create())
-          .apply(new WriteView<ElemT, ViewT>(og));
-    }
-  }
-
-  /**
-   * An in-process implementation of the {@link CreatePCollectionView} primitive.
-   *
-   * This implementation requires the input {@link PCollection} to be an iterable, which is provided
-   * to {@link PCollectionView#fromIterableInternal(Iterable)}.
-   */
-  public static final class WriteView<ElemT, ViewT>
-      extends PTransform<PCollection<Iterable<ElemT>>, PCollectionView<ViewT>> {
-    private final CreatePCollectionView<ElemT, ViewT> og;
-
-    WriteView(CreatePCollectionView<ElemT, ViewT> og) {
-      this.og = og;
-    }
-
-    @Override
-    public PCollectionView<ViewT> apply(PCollection<Iterable<ElemT>> input) {
-      return og.getView();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/WatermarkCallbackExecutor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/WatermarkCallbackExecutor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/WatermarkCallbackExecutor.java
deleted file mode 100644
index 27d59b9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/WatermarkCallbackExecutor.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.Ordering;
-
-import org.joda.time.Instant;
-
-import java.util.PriorityQueue;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-/**
- * Executes callbacks that occur based on the progression of the watermark per-step.
- *
- * <p>Callbacks are registered by calls to
- * {@link #callOnGuaranteedFiring(AppliedPTransform, BoundedWindow, WindowingStrategy, Runnable)},
- * and are executed after a call to {@link #fireForWatermark(AppliedPTransform, Instant)} with the
- * same {@link AppliedPTransform} and a watermark sufficient to ensure that the trigger for the
- * windowing strategy would have been produced.
- *
- * <p>NOTE: {@link WatermarkCallbackExecutor} does not track the latest observed watermark for any
- * {@link AppliedPTransform} - any call to
- * {@link #callOnGuaranteedFiring(AppliedPTransform, BoundedWindow, WindowingStrategy, Runnable)}
- * that could have potentially already fired should be followed by a call to
- * {@link #fireForWatermark(AppliedPTransform, Instant)} for the same transform with the current
- * value of the watermark.
- */
-class WatermarkCallbackExecutor {
-  /**
-   * Create a new {@link WatermarkCallbackExecutor}.
-   */
-  public static WatermarkCallbackExecutor create() {
-    return new WatermarkCallbackExecutor();
-  }
-
-  private final ConcurrentMap<AppliedPTransform<?, ?, ?>, PriorityQueue<WatermarkCallback>>
-      callbacks;
-  private final ExecutorService executor;
-
-  private WatermarkCallbackExecutor() {
-    this.callbacks = new ConcurrentHashMap<>();
-    this.executor = Executors.newSingleThreadExecutor();
-  }
-
-  /**
-   * Execute the provided {@link Runnable} after the next call to
-   * {@link #fireForWatermark(AppliedPTransform, Instant)} where the window is guaranteed to have
-   * produced output.
-   */
-  public void callOnGuaranteedFiring(
-      AppliedPTransform<?, ?, ?> step,
-      BoundedWindow window,
-      WindowingStrategy<?, ?> windowingStrategy,
-      Runnable runnable) {
-    WatermarkCallback callback =
-        WatermarkCallback.onGuaranteedFiring(window, windowingStrategy, runnable);
-
-    PriorityQueue<WatermarkCallback> callbackQueue = callbacks.get(step);
-    if (callbackQueue == null) {
-      callbackQueue = new PriorityQueue<>(11, new CallbackOrdering());
-      if (callbacks.putIfAbsent(step, callbackQueue) != null) {
-        callbackQueue = callbacks.get(step);
-      }
-    }
-
-    synchronized (callbackQueue) {
-      callbackQueue.offer(callback);
-    }
-  }
-
-  /**
-   * Schedule all pending callbacks that must have produced output by the time of the provided
-   * watermark.
-   */
-  public void fireForWatermark(AppliedPTransform<?, ?, ?> step, Instant watermark) {
-    PriorityQueue<WatermarkCallback> callbackQueue = callbacks.get(step);
-    if (callbackQueue == null) {
-      return;
-    }
-    synchronized (callbackQueue) {
-      while (!callbackQueue.isEmpty() && callbackQueue.peek().shouldFire(watermark)) {
-        executor.submit(callbackQueue.poll().getCallback());
-      }
-    }
-  }
-
-  private static class WatermarkCallback {
-    public static <W extends BoundedWindow> WatermarkCallback onGuaranteedFiring(
-        BoundedWindow window, WindowingStrategy<?, W> strategy, Runnable callback) {
-      @SuppressWarnings("unchecked")
-      Instant firingAfter =
-          strategy.getTrigger().getSpec().getWatermarkThatGuaranteesFiring((W) window);
-      return new WatermarkCallback(firingAfter, callback);
-    }
-
-    private final Instant fireAfter;
-    private final Runnable callback;
-
-    private WatermarkCallback(Instant fireAfter, Runnable callback) {
-      this.fireAfter = fireAfter;
-      this.callback = callback;
-    }
-
-    public boolean shouldFire(Instant currentWatermark) {
-      return currentWatermark.isAfter(fireAfter)
-          || currentWatermark.equals(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    }
-
-    public Runnable getCallback() {
-      return callback;
-    }
-  }
-
-  private static class CallbackOrdering extends Ordering<WatermarkCallback> {
-    @Override
-    public int compare(WatermarkCallback left, WatermarkCallback right) {
-      return ComparisonChain.start()
-          .compare(left.fireAfter, right.fireAfter)
-          .compare(left.callback, right.callback, Ordering.arbitrary())
-          .result();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
deleted file mode 100644
index d1aa6af..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/package-info.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines runners for executing Pipelines in different modes, including
- * {@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} and
- * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner}.
- *
- * <p>{@link com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner} executes a {@code Pipeline}
- * locally, without contacting the Dataflow service.
- * {@link com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner} submits a
- * {@code Pipeline} to the Dataflow service, which executes it on Dataflow-managed Compute Engine
- * instances. {@code DataflowPipelineRunner} returns
- * as soon as the {@code Pipeline} has been submitted. Use
- * {@link com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner} to have execution
- * updates printed to the console.
- *
- * <p>The runner is specified as part {@link com.google.cloud.dataflow.sdk.options.PipelineOptions}.
- */
-package com.google.cloud.dataflow.sdk.runners;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
deleted file mode 100644
index 318de9b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/IsmFormat.java
+++ /dev/null
@@ -1,946 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addLong;
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.coders.AtomicCoder;
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.RandomAccessData;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.MoreObjects.ToStringHelper;
-import com.google.common.base.Objects;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * An Ism file is a prefix encoded composite key value file broken into shards. Each composite
- * key is composed of a fixed number of component keys. A fixed number of those sub keys represent
- * the shard key portion; see {@link IsmRecord} and {@link IsmRecordCoder} for further details
- * around the data format. In addition to the data, there is a bloom filter,
- * and multiple indices to allow for efficient retrieval.
- *
- * <p>An Ism file is composed of these high level sections (in order):
- * <ul>
- *   <li>shard block</li>
- *   <li>bloom filter (See {@code ScalableBloomFilter} for details on encoding format)</li>
- *   <li>shard index</li>
- *   <li>footer (See {@link Footer} for details on encoding format)</li>
- * </ul>
- *
- * <p>The shard block is composed of multiple copies of the following:
- * <ul>
- *   <li>data block</li>
- *   <li>data index</li>
- * </ul>
- *
- * <p>The data block is composed of multiple copies of the following:
- * <ul>
- *   <li>key prefix (See {@link KeyPrefix} for details on encoding format)</li>
- *   <li>unshared key bytes</li>
- *   <li>value bytes</li>
- *   <li>optional 0x00 0x00 bytes followed by metadata bytes
- *       (if the following 0x00 0x00 bytes are not present, then there are no metadata bytes)</li>
- * </ul>
- * Each key written into the data block must be in unsigned lexicographically increasing order
- * and also its shard portion of the key must hash to the same shard id as all other keys
- * within the same data block. The hashing function used is the
- * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
- * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
- * using {@code 1225801234} as the seed value.
- *
- * <p>The data index is composed of {@code N} copies of the following:
- * <ul>
- *   <li>key prefix (See {@link KeyPrefix} for details on encoding format)</li>
- *   <li>unshared key bytes</li>
- *   <li>byte offset to key prefix in data block (variable length long coding)</li>
- * </ul>
- *
- * <p>The shard index is composed of a {@link VarInt variable length integer} encoding representing
- * the number of shard index records followed by that many shard index records.
- * See {@link IsmShardCoder} for further details as to its encoding scheme.
- */
-public class IsmFormat {
-  private static final int HASH_SEED = 1225801234;
-  private static final HashFunction HASH_FUNCTION = Hashing.murmur3_32(HASH_SEED);
-  static final int SHARD_BITS = 0x7F; // [0-127] shards + [128-255] metadata shards
-
-  /**
-   * A record containing a composite key and either a value or metadata. The composite key
-   * must not contain the metadata key component place holder if producing a value record, and must
-   * contain the metadata component key place holder if producing a metadata record.
-   *
-   * <p>The composite key is a fixed number of component keys where the first {@code N} component
-   * keys are used to create a shard id via hashing. See {@link IsmRecordCoder#hash(List)} for
-   * further details.
-   */
-  public static class IsmRecord<V> {
-    /** Returns an IsmRecord with the specified key components and value. */
-    public static <V> IsmRecord<V> of(List<?> keyComponents, V value) {
-      checkNotNull(keyComponents);
-      checkArgument(!keyComponents.isEmpty(), "Expected non-empty list of key components.");
-      checkArgument(!isMetadataKey(keyComponents),
-          "Expected key components to not contain metadata key.");
-      return new IsmRecord<>(keyComponents, value, null);
-    }
-
-    public static <V> IsmRecord<V> meta(List<?> keyComponents, byte[] metadata) {
-      checkNotNull(keyComponents);
-      checkNotNull(metadata);
-      checkArgument(!keyComponents.isEmpty(), "Expected non-empty list of key components.");
-      checkArgument(isMetadataKey(keyComponents),
-          "Expected key components to contain metadata key.");
-      return new IsmRecord<V>(keyComponents, null, metadata);
-    }
-
-    private final List<?> keyComponents;
-    @Nullable
-    private final V value;
-    @Nullable
-    private final byte[] metadata;
-    private IsmRecord(List<?> keyComponents, V value, byte[] metadata) {
-      this.keyComponents = keyComponents;
-      this.value = value;
-      this.metadata = metadata;
-    }
-
-    /** Returns the list of key components. */
-    public List<?> getKeyComponents() {
-      return keyComponents;
-    }
-
-    /** Returns the key component at the specified index. */
-    public Object getKeyComponent(int index) {
-      return keyComponents.get(index);
-    }
-
-    /**
-     * Returns the value. Throws {@link IllegalStateException} if this is not a
-     * value record.
-     */
-    public V getValue() {
-      checkState(!isMetadataKey(keyComponents),
-          "This is a metadata record and not a value record.");
-      return value;
-    }
-
-    /**
-     * Returns the metadata. Throws {@link IllegalStateException} if this is not a
-     * metadata record.
-     */
-    public byte[] getMetadata() {
-      checkState(isMetadataKey(keyComponents),
-          "This is a value record and not a metadata record.");
-      return metadata;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (!(obj instanceof IsmRecord)) {
-        return false;
-      }
-      IsmRecord<?> other = (IsmRecord<?>) obj;
-      return Objects.equal(keyComponents, other.keyComponents)
-          && Objects.equal(value, other.value)
-          && Arrays.equals(metadata, other.metadata);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(keyComponents, value, Arrays.hashCode(metadata));
-    }
-
-    @Override
-    public String toString() {
-      ToStringHelper builder = MoreObjects.toStringHelper(IsmRecord.class)
-          .add("keyComponents", keyComponents);
-      if (isMetadataKey(keyComponents)) {
-        builder.add("metadata", metadata);
-      } else {
-        builder.add("value", value);
-      }
-      return builder.toString();
-    }
-  }
-
-  /** A {@link Coder} for {@link IsmRecord}s.
-   *
-   * <p>Note that this coder standalone will not produce an Ism file. This coder can be used
-   * to materialize a {@link PCollection} of {@link IsmRecord}s. Only when this coder
-   * is combined with an {@link IsmSink} will one produce an Ism file.
-   *
-   * <p>The {@link IsmRecord} encoded format is:
-   * <ul>
-   *   <li>encoded key component 1 using key component coder 1</li>
-   *   <li>...</li>
-   *   <li>encoded key component N using key component coder N</li>
-   *   <li>encoded value using value coder</li>
-   * </ul>
-   */
-  public static class IsmRecordCoder<V>
-      extends StandardCoder<IsmRecord<V>> {
-    /** Returns an IsmRecordCoder with the specified key component coders, value coder. */
-    public static <V> IsmRecordCoder<V> of(
-        int numberOfShardKeyCoders,
-        int numberOfMetadataShardKeyCoders,
-        List<Coder<?>> keyComponentCoders,
-        Coder<V> valueCoder) {
-      checkNotNull(keyComponentCoders);
-      checkArgument(keyComponentCoders.size() > 0);
-      checkArgument(numberOfShardKeyCoders > 0);
-      checkArgument(numberOfShardKeyCoders <= keyComponentCoders.size());
-      checkArgument(numberOfMetadataShardKeyCoders <= keyComponentCoders.size());
-      return new IsmRecordCoder<>(
-          numberOfShardKeyCoders,
-          numberOfMetadataShardKeyCoders,
-          keyComponentCoders,
-          valueCoder);
-    }
-
-    /**
-     * Returns an IsmRecordCoder with the specified coders. Note that this method is not meant
-     * to be called by users but used by Jackson when decoding this coder.
-     */
-    @JsonCreator
-    public static IsmRecordCoder<?> of(
-        @JsonProperty(PropertyNames.NUM_SHARD_CODERS) int numberOfShardCoders,
-        @JsonProperty(PropertyNames.NUM_METADATA_SHARD_CODERS) int numberOfMetadataShardCoders,
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
-      Preconditions.checkArgument(components.size() >= 2,
-          "Expecting at least 2 components, got " + components.size());
-      return of(
-          numberOfShardCoders,
-          numberOfMetadataShardCoders,
-          components.subList(0, components.size() - 1),
-          components.get(components.size() - 1));
-    }
-
-    private final int numberOfShardKeyCoders;
-    private final int numberOfMetadataShardKeyCoders;
-    private final List<Coder<?>> keyComponentCoders;
-    private final Coder<V> valueCoder;
-
-    private IsmRecordCoder(
-        int numberOfShardKeyCoders,
-        int numberOfMetadataShardKeyCoders,
-        List<Coder<?>> keyComponentCoders, Coder<V> valueCoder) {
-      this.numberOfShardKeyCoders = numberOfShardKeyCoders;
-      this.numberOfMetadataShardKeyCoders = numberOfMetadataShardKeyCoders;
-      this.keyComponentCoders = keyComponentCoders;
-      this.valueCoder = valueCoder;
-    }
-
-    /** Returns the list of key component coders. */
-    public List<Coder<?>> getKeyComponentCoders() {
-      return keyComponentCoders;
-    }
-
-    /** Returns the key coder at the specified index. */
-    public Coder getKeyComponentCoder(int index) {
-      return keyComponentCoders.get(index);
-    }
-
-    /** Returns the value coder. */
-    public Coder<V> getValueCoder() {
-      return valueCoder;
-    }
-
-    @Override
-    public void encode(IsmRecord<V> value, OutputStream outStream,
-        Coder.Context context) throws CoderException, IOException {
-      if (value.getKeyComponents().size() != keyComponentCoders.size()) {
-        throw new CoderException(String.format(
-            "Expected %s key component(s) but received key component(s) %s.",
-            keyComponentCoders.size(), value.getKeyComponents()));
-      }
-      for (int i = 0; i < keyComponentCoders.size(); ++i) {
-        getKeyComponentCoder(i).encode(value.getKeyComponent(i), outStream, context.nested());
-      }
-      if (isMetadataKey(value.getKeyComponents())) {
-        ByteArrayCoder.of().encode(value.getMetadata(), outStream, context.nested());
-      } else {
-        valueCoder.encode(value.getValue(), outStream, context.nested());
-      }
-    }
-
-    @Override
-    public IsmRecord<V> decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      List<Object> keyComponents = new ArrayList<>(keyComponentCoders.size());
-      for (Coder<?> keyCoder : keyComponentCoders) {
-        keyComponents.add(keyCoder.decode(inStream, context.nested()));
-      }
-      if (isMetadataKey(keyComponents)) {
-        return IsmRecord.<V>meta(
-            keyComponents, ByteArrayCoder.of().decode(inStream, context.nested()));
-      } else {
-        return IsmRecord.<V>of(keyComponents, valueCoder.decode(inStream, context.nested()));
-      }
-    }
-
-    int getNumberOfShardKeyCoders(List<?> keyComponents) {
-      if (isMetadataKey(keyComponents)) {
-        return numberOfMetadataShardKeyCoders;
-      } else {
-        return numberOfShardKeyCoders;
-      }
-    }
-
-    /**
-     * Computes the shard id for the given key component(s).
-     *
-     * The shard keys are encoded into their byte representations and hashed using the
-     * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
-     * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
-     * using {@code 1225801234} as the seed value. We ensure that shard ids for
-     * metadata keys and normal keys do not overlap.
-     */
-    public <V, T> int hash(List<?> keyComponents) {
-      return encodeAndHash(keyComponents, new RandomAccessData(), new ArrayList<Integer>());
-    }
-
-    /**
-     * Computes the shard id for the given key component(s).
-     *
-     * Mutates {@code keyBytes} such that when returned, contains the encoded
-     * version of the key components.
-     */
-    <V, T> int encodeAndHash(List<?> keyComponents, RandomAccessData keyBytesToMutate) {
-      return encodeAndHash(keyComponents, keyBytesToMutate, new ArrayList<Integer>());
-    }
-
-    /**
-     * Computes the shard id for the given key component(s).
-     *
-     * Mutates {@code keyBytes} such that when returned, contains the encoded
-     * version of the key components. Also, mutates {@code keyComponentByteOffsetsToMutate} to
-     * store the location where each key component's encoded byte representation ends within
-     * {@code keyBytes}.
-     */
-    <V, T> int encodeAndHash(
-        List<?> keyComponents,
-        RandomAccessData keyBytesToMutate,
-        List<Integer> keyComponentByteOffsetsToMutate) {
-      checkNotNull(keyComponents);
-      checkArgument(keyComponents.size() <= keyComponentCoders.size(),
-          "Expected at most %s key component(s) but received %s.",
-          keyComponentCoders.size(), keyComponents);
-
-      final int numberOfKeyCodersToUse;
-      final int shardOffset;
-      if (isMetadataKey(keyComponents)) {
-        numberOfKeyCodersToUse = numberOfMetadataShardKeyCoders;
-        shardOffset = SHARD_BITS + 1;
-      } else {
-        numberOfKeyCodersToUse = numberOfShardKeyCoders;
-        shardOffset = 0;
-      }
-
-      checkArgument(numberOfKeyCodersToUse <= keyComponents.size(),
-          "Expected at least %s key component(s) but received %s.",
-          numberOfShardKeyCoders, keyComponents);
-
-      try {
-        // Encode the shard portion
-        for (int i = 0; i < numberOfKeyCodersToUse; ++i) {
-          getKeyComponentCoder(i).encode(
-              keyComponents.get(i), keyBytesToMutate.asOutputStream(), Context.NESTED);
-          keyComponentByteOffsetsToMutate.add(keyBytesToMutate.size());
-        }
-        int rval = HASH_FUNCTION.hashBytes(
-            keyBytesToMutate.array(), 0, keyBytesToMutate.size()).asInt() & SHARD_BITS;
-        rval += shardOffset;
-
-        // Encode the remainder
-        for (int i = numberOfKeyCodersToUse; i < keyComponents.size(); ++i) {
-          getKeyComponentCoder(i).encode(
-              keyComponents.get(i), keyBytesToMutate.asOutputStream(), Context.NESTED);
-          keyComponentByteOffsetsToMutate.add(keyBytesToMutate.size());
-        }
-        return rval;
-      } catch (IOException e) {
-        throw new IllegalStateException(
-            String.format("Failed to hash %s with coder %s", keyComponents, this), e);
-      }
-    }
-
-    @Override
-    public List<Coder<?>> getCoderArguments() {
-      return ImmutableList.<Coder<?>>builder()
-          .addAll(keyComponentCoders)
-          .add(valueCoder)
-          .build();
-    }
-
-    @Override
-    public CloudObject asCloudObject() {
-      CloudObject cloudObject = super.asCloudObject();
-      addLong(cloudObject, PropertyNames.NUM_SHARD_CODERS, numberOfShardKeyCoders);
-      addLong(cloudObject, PropertyNames.NUM_METADATA_SHARD_CODERS, numberOfMetadataShardKeyCoders);
-      return cloudObject;
-    }
-
-    @Override
-    public void verifyDeterministic() throws Coder.NonDeterministicException {
-      verifyDeterministic("Key component coders expected to be deterministic.", keyComponentCoders);
-      verifyDeterministic("Value coder expected to be deterministic.", valueCoder);
-    }
-
-    @Override
-    public boolean consistentWithEquals() {
-      for (Coder<?> keyComponentCoder : keyComponentCoders) {
-        if (!keyComponentCoder.consistentWithEquals()) {
-          return false;
-        }
-      }
-      return valueCoder.consistentWithEquals();
-    }
-
-    @Override
-    public Object structuralValue(IsmRecord<V> record) throws Exception {
-      checkState(record.getKeyComponents().size() == keyComponentCoders.size(),
-          "Expected the number of key component coders %s "
-          + "to match the number of key components %s.",
-          keyComponentCoders.size(), record.getKeyComponents());
-
-      if (record != null && consistentWithEquals()) {
-        ArrayList<Object> keyComponentStructuralValues = new ArrayList<>();
-        for (int i = 0; i < keyComponentCoders.size(); ++i) {
-          keyComponentStructuralValues.add(
-              getKeyComponentCoder(i).structuralValue(record.getKeyComponent(i)));
-        }
-        if (isMetadataKey(record.getKeyComponents())) {
-          return IsmRecord.meta(keyComponentStructuralValues, record.getMetadata());
-        } else {
-          return IsmRecord.of(keyComponentStructuralValues,
-              valueCoder.structuralValue(record.getValue()));
-        }
-      }
-      return super.structuralValue(record);
-    }
-  }
-
-  /**
-   * Validates that the key portion of the given coder is deterministic.
-   */
-  static void validateCoderIsCompatible(IsmRecordCoder<?> coder) {
-    for (Coder<?> keyComponentCoder : coder.getKeyComponentCoders()) {
-      try {
-          keyComponentCoder.verifyDeterministic();
-      } catch (NonDeterministicException e) {
-        throw new IllegalArgumentException(
-            String.format("Key component coder %s is expected to be deterministic.",
-                keyComponentCoder), e);
-      }
-    }
-  }
-
-  /** Returns true if and only if any of the passed in key components represent a metadata key. */
-  public static boolean isMetadataKey(List<?> keyComponents) {
-    for (Object keyComponent : keyComponents) {
-      if (keyComponent == METADATA_KEY) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  /** A marker object representing the wildcard metadata key component. */
-  private static final Object METADATA_KEY = new Object() {
-    @Override
-    public String toString() {
-      return "META";
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      return this == obj;
-    }
-
-    @Override
-    public int hashCode() {
-      return -1248902349;
-    }
-  };
-
-  /**
-   * An object representing a wild card for a key component.
-   * Encoded using {@link MetadataKeyCoder}.
-   */
-  public static Object getMetadataKey() {
-    return METADATA_KEY;
-  }
-
-  /**
-   * A coder for metadata key component. Can be used to wrap key component coder allowing for
-   * the metadata key component to be used as a place holder instead of an actual key.
-   */
-  public static class MetadataKeyCoder<K> extends StandardCoder<K> {
-    public static <K> MetadataKeyCoder<K> of(Coder<K> keyCoder) {
-      checkNotNull(keyCoder);
-      return new MetadataKeyCoder<>(keyCoder);
-    }
-
-    /**
-     * Returns an IsmRecordCoder with the specified coders. Note that this method is not meant
-     * to be called by users but used by Jackson when decoding this coder.
-     */
-    @JsonCreator
-    public static MetadataKeyCoder<?> of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
-      Preconditions.checkArgument(components.size() == 1,
-          "Expecting one component, got " + components.size());
-      return of(components.get(0));
-    }
-
-    private final Coder<K> keyCoder;
-
-    private MetadataKeyCoder(Coder<K> keyCoder) {
-      this.keyCoder = keyCoder;
-    }
-
-    public Coder<K> getKeyCoder() {
-      return keyCoder;
-    }
-
-    @Override
-    public void encode(K value, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      if (value == METADATA_KEY) {
-        outStream.write(0);
-      } else {
-        outStream.write(1);
-        keyCoder.encode(value, outStream, context.nested());
-      }
-    }
-
-    @Override
-    public K decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      int marker = inStream.read();
-      if (marker == 0) {
-        return (K) getMetadataKey();
-      } else if (marker == 1) {
-        return keyCoder.decode(inStream, context.nested());
-      } else {
-        throw new CoderException(String.format("Expected marker but got %s.", marker));
-      }
-    }
-
-    @Override
-    public List<Coder<?>> getCoderArguments() {
-      return ImmutableList.<Coder<?>>of(keyCoder);
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic("Expected key coder to be deterministic", keyCoder);
-    }
-  }
-
-  /**
-   * A shard descriptor containing shard id, the data block offset, and the index offset for the
-   * given shard.
-   */
-  public static class IsmShard {
-    private final int id;
-    private final long blockOffset;
-    private final long indexOffset;
-
-    /** Returns an IsmShard with the given id, block offset and no index offset. */
-    public static IsmShard of(int id, long blockOffset) {
-      IsmShard ismShard = new IsmShard(id, blockOffset, -1);
-      checkState(id >= 0,
-          "%s attempting to be written with negative shard id.",
-          ismShard);
-      checkState(blockOffset >= 0,
-          "%s attempting to be written with negative block offset.",
-          ismShard);
-      return ismShard;
-    }
-
-    /** Returns an IsmShard with the given id, block offset, and index offset. */
-    public static IsmShard of(int id, long blockOffset, long indexOffset) {
-      IsmShard ismShard = new IsmShard(id, blockOffset, indexOffset);
-      checkState(id >= 0,
-          "%s attempting to be written with negative shard id.",
-          ismShard);
-      checkState(blockOffset >= 0,
-          "%s attempting to be written with negative block offset.",
-          ismShard);
-      checkState(indexOffset >= 0,
-          "%s attempting to be written with negative index offset.",
-          ismShard);
-      return ismShard;
-    }
-
-    private IsmShard(int id, long blockOffset, long indexOffset) {
-      this.id = id;
-      this.blockOffset = blockOffset;
-      this.indexOffset = indexOffset;
-    }
-
-    /** Return the shard id. */
-    public int getId() {
-      return id;
-    }
-
-    /** Return the absolute position within the Ism file where the data block begins. */
-    public long getBlockOffset() {
-      return blockOffset;
-    }
-
-    /**
-     * Return the absolute position within the Ism file where the index block begins.
-     * Throws {@link IllegalStateException} if the index offset was never specified.
-     */
-    public long getIndexOffset() {
-      checkState(indexOffset >= 0,
-            "Unable to fetch index offset because it was never specified.");
-      return indexOffset;
-    }
-
-    /** Returns a new IsmShard like this one with the specified index offset. */
-    public IsmShard withIndexOffset(long indexOffset) {
-      return of(id, blockOffset, indexOffset);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(IsmShard.class)
-          .add("id", id)
-          .add("blockOffset", blockOffset)
-          .add("indexOffset", indexOffset)
-          .toString();
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (!(obj instanceof IsmShard)) {
-        return false;
-      }
-      IsmShard other = (IsmShard) obj;
-      return Objects.equal(id, other.id)
-          && Objects.equal(blockOffset, other.blockOffset)
-          && Objects.equal(indexOffset, other.indexOffset);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(id, blockOffset, indexOffset);
-    }
-  }
-
-  /**
-   * A {@link ListCoder} wrapping a {@link IsmShardCoder} used to encode the shard index.
-   * See {@link ListCoder} for its encoding specification and {@link IsmShardCoder} for its
-   * encoding specification.
-   */
-  public static final Coder<List<IsmShard>> ISM_SHARD_INDEX_CODER =
-      ListCoder.of(IsmShardCoder.of());
-
-  /**
-   * A coder for {@link IsmShard}s.
-   *
-   * The shard descriptor is encoded as:
-   * <ul>
-   *   <li>id (variable length integer encoding)</li>
-   *   <li>blockOffset (variable length long encoding)</li>
-   *   <li>indexOffset (variable length long encoding)</li>
-   * </ul>
-   */
-  public static class IsmShardCoder extends AtomicCoder<IsmShard> {
-    private static final IsmShardCoder INSTANCE = new IsmShardCoder();
-
-    /** Returns an IsmShardCoder. */
-    @JsonCreator
-    public static IsmShardCoder of() {
-      return INSTANCE;
-    }
-
-    private IsmShardCoder() {
-    }
-
-    @Override
-    public void encode(IsmShard value, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      checkState(value.getIndexOffset() >= 0,
-          "%s attempting to be written without index offset.",
-          value);
-      VarIntCoder.of().encode(value.getId(), outStream, context.nested());
-      VarLongCoder.of().encode(value.getBlockOffset(), outStream, context.nested());
-      VarLongCoder.of().encode(value.getIndexOffset(), outStream, context.nested());
-    }
-
-    @Override
-    public IsmShard decode(
-        InputStream inStream, Coder.Context context) throws CoderException, IOException {
-      return IsmShard.of(
-          VarIntCoder.of().decode(inStream, context),
-          VarLongCoder.of().decode(inStream, context),
-          VarLongCoder.of().decode(inStream, context));
-    }
-
-    @Override
-    public boolean consistentWithEquals() {
-      return true;
-    }
-  }
-
-  /**
-   * The prefix used before each key which contains the number of shared and unshared
-   * bytes from the previous key that was read. The key prefix along with the previous key
-   * and the unshared key bytes allows one to construct the current key by doing the following
-   * {@code currentKey = previousKey[0 : sharedBytes] + read(unsharedBytes)}.
-   *
-   * <p>The key prefix is encoded as:
-   * <ul>
-   *   <li>number of shared key bytes (variable length integer coding)</li>
-   *   <li>number of unshared key bytes (variable length integer coding)</li>
-   * </ul>
-   */
-  static class KeyPrefix {
-    private final int sharedKeySize;
-    private final int unsharedKeySize;
-
-    KeyPrefix(int sharedBytes, int unsharedBytes) {
-      this.sharedKeySize = sharedBytes;
-      this.unsharedKeySize = unsharedBytes;
-    }
-
-    public int getSharedKeySize() {
-      return sharedKeySize;
-    }
-
-    public int getUnsharedKeySize() {
-      return unsharedKeySize;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(sharedKeySize, unsharedKeySize);
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == this) {
-        return true;
-      }
-      if (!(other instanceof KeyPrefix)) {
-        return false;
-      }
-      KeyPrefix keyPrefix = (KeyPrefix) other;
-      return sharedKeySize == keyPrefix.sharedKeySize
-          && unsharedKeySize == keyPrefix.unsharedKeySize;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(this)
-          .add("sharedKeySize", sharedKeySize)
-          .add("unsharedKeySize", unsharedKeySize)
-          .toString();
-    }
-  }
-
-  /** A {@link Coder} for {@link KeyPrefix}. */
-  static final class KeyPrefixCoder extends AtomicCoder<KeyPrefix> {
-    private static final KeyPrefixCoder INSTANCE = new KeyPrefixCoder();
-
-    @JsonCreator
-    public static KeyPrefixCoder of() {
-      return INSTANCE;
-    }
-
-    @Override
-    public void encode(KeyPrefix value, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      VarInt.encode(value.sharedKeySize, outStream);
-      VarInt.encode(value.unsharedKeySize, outStream);
-    }
-
-    @Override
-    public KeyPrefix decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      return new KeyPrefix(VarInt.decodeInt(inStream), VarInt.decodeInt(inStream));
-    }
-
-    @Override
-    public boolean consistentWithEquals() {
-      return true;
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(KeyPrefix value, Coder.Context context) {
-      return true;
-    }
-
-    @Override
-    protected long getEncodedElementByteSize(KeyPrefix value, Coder.Context context)
-        throws Exception {
-      Preconditions.checkNotNull(value);
-      return VarInt.getLength(value.sharedKeySize) + VarInt.getLength(value.unsharedKeySize);
-    }
-  }
-
-  /**
-   * The footer stores the relevant information required to locate the index and bloom filter.
-   * It also stores a version byte and the number of keys stored.
-   *
-   * <p>The footer is encoded as the value containing:
-   * <ul>
-   *   <li>start of bloom filter offset (big endian long coding)</li>
-   *   <li>start of shard index position offset (big endian long coding)</li>
-   *   <li>number of keys in file (big endian long coding)</li>
-   *   <li>0x01 (version key as a single byte)</li>
-   * </ul>
-   */
-  static class Footer {
-    static final int LONG_BYTES = 8;
-    static final int FIXED_LENGTH = 3 * LONG_BYTES + 1;
-    static final byte VERSION = 2;
-
-    private final long indexPosition;
-    private final long bloomFilterPosition;
-    private final long numberOfKeys;
-
-    Footer(long indexPosition, long bloomFilterPosition, long numberOfKeys) {
-      this.indexPosition = indexPosition;
-      this.bloomFilterPosition = bloomFilterPosition;
-      this.numberOfKeys = numberOfKeys;
-    }
-
-    public long getIndexPosition() {
-      return indexPosition;
-    }
-
-    public long getBloomFilterPosition() {
-      return bloomFilterPosition;
-    }
-
-    public long getNumberOfKeys() {
-      return numberOfKeys;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == this) {
-        return true;
-      }
-      if (!(other instanceof Footer)) {
-        return false;
-      }
-      Footer footer = (Footer) other;
-      return indexPosition == footer.indexPosition
-          && bloomFilterPosition == footer.bloomFilterPosition
-          && numberOfKeys == footer.numberOfKeys;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(indexPosition, bloomFilterPosition, numberOfKeys);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(this)
-          .add("version", Footer.VERSION)
-          .add("indexPosition", indexPosition)
-          .add("bloomFilterPosition", bloomFilterPosition)
-          .add("numberOfKeys", numberOfKeys)
-          .toString();
-    }
-  }
-
-  /** A {@link Coder} for {@link Footer}. */
-  static final class FooterCoder extends AtomicCoder<Footer> {
-    private static final FooterCoder INSTANCE = new FooterCoder();
-
-    @JsonCreator
-    public static FooterCoder of() {
-      return INSTANCE;
-    }
-
-    @Override
-    public void encode(Footer value, OutputStream outStream, Coder.Context context)
-        throws CoderException, IOException {
-      DataOutputStream dataOut = new DataOutputStream(outStream);
-      dataOut.writeLong(value.indexPosition);
-      dataOut.writeLong(value.bloomFilterPosition);
-      dataOut.writeLong(value.numberOfKeys);
-      dataOut.write(Footer.VERSION);
-    }
-
-    @Override
-    public Footer decode(InputStream inStream, Coder.Context context)
-        throws CoderException, IOException {
-      DataInputStream dataIn = new DataInputStream(inStream);
-      Footer footer = new Footer(dataIn.readLong(), dataIn.readLong(), dataIn.readLong());
-      int version = dataIn.read();
-      if (version != Footer.VERSION) {
-        throw new IOException("Unknown version " + version + ". "
-            + "Only version 2 is currently supported.");
-      }
-      return footer;
-    }
-
-    @Override
-    public boolean consistentWithEquals() {
-      return true;
-    }
-
-    @Override
-    public boolean isRegisterByteSizeObserverCheap(Footer value, Coder.Context context) {
-      return true;
-    }
-
-    @Override
-    protected long getEncodedElementByteSize(Footer value, Coder.Context context)
-        throws Exception {
-      return Footer.FIXED_LENGTH;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
deleted file mode 100644
index af0a345..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/worker/package-info.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Implementation of the harness that runs on each Google Compute Engine instance to coordinate
- * execution of Pipeline code.
- */
-@ParametersAreNonnullByDefault
-package com.google.cloud.dataflow.sdk.runners.worker;
-
-import javax.annotation.ParametersAreNonnullByDefault;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
deleted file mode 100644
index 5705dc4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/testing/CoderProperties.java
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.testing;
-
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.emptyIterable;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.hasItem;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.Serializer;
-import com.google.cloud.dataflow.sdk.util.Structs;
-import com.google.cloud.dataflow.sdk.util.UnownedInputStream;
-import com.google.cloud.dataflow.sdk.util.UnownedOutputStream;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Iterables;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Properties for use in {@link Coder} tests. These are implemented with junit assertions
- * rather than as predicates for the sake of error messages.
- *
- * <p>We serialize and deserialize the coder to make sure that any state information required by
- * the coder is preserved. This causes tests written such that coders that lose information during
- * serialization or change state during encoding/decoding will fail.
- */
-public class CoderProperties {
-
-  /**
-   * All the contexts, for use in test cases.
-   */
-   public static final List<Coder.Context> ALL_CONTEXTS = Arrays.asList(
-       Coder.Context.OUTER, Coder.Context.NESTED);
-
-  /**
-   * Verifies that for the given {@code Coder<T>}, and values of
-   * type {@code T}, if the values are equal then the encoded bytes are equal, in any
-   * {@code Coder.Context}.
-   */
-  public static <T> void coderDeterministic(
-      Coder<T> coder, T value1, T value2)
-      throws Exception {
-    for (Coder.Context context : ALL_CONTEXTS) {
-      coderDeterministicInContext(coder, context, value1, value2);
-    }
-  }
-
-  /**
-   * Verifies that for the given {@code Coder<T>}, {@code Coder.Context}, and values of
-   * type {@code T}, if the values are equal then the encoded bytes are equal.
-   */
-  public static <T> void coderDeterministicInContext(
-      Coder<T> coder, Coder.Context context, T value1, T value2)
-      throws Exception {
-
-    try {
-      coder.verifyDeterministic();
-    } catch (NonDeterministicException e) {
-      fail("Expected that the coder is deterministic");
-    }
-    assertThat("Expected that the passed in values are equal()", value1, equalTo(value2));
-    assertThat(
-        encode(coder, context, value1),
-        equalTo(encode(coder, context, value2)));
-  }
-
-  /**
-   * Verifies that for the given {@code Coder<T>},
-   * and value of type {@code T}, encoding followed by decoding yields an
-   * equal value of type {@code T}, in any {@code Coder.Context}.
-   */
-  public static <T> void coderDecodeEncodeEqual(
-      Coder<T> coder, T value)
-      throws Exception {
-    for (Coder.Context context : ALL_CONTEXTS) {
-      coderDecodeEncodeEqualInContext(coder, context, value);
-    }
-  }
-
-  /**
-   * Verifies that for the given {@code Coder<T>}, {@code Coder.Context},
-   * and value of type {@code T}, encoding followed by decoding yields an
-   * equal value of type {@code T}.
-   */
-  public static <T> void coderDecodeEncodeEqualInContext(
-      Coder<T> coder, Coder.Context context, T value)
-      throws Exception {
-    assertThat(decodeEncode(coder, context, value), equalTo(value));
-  }
-
-  /**
-   * Verifies that for the given {@code Coder<Collection<T>>},
-   * and value of type {@code Collection<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in any {@code Coder.Context}.
-   */
-  public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeContentsEqual(
-      Coder<CollectionT> coder, CollectionT value)
-      throws Exception {
-    for (Coder.Context context : ALL_CONTEXTS) {
-      coderDecodeEncodeContentsEqualInContext(coder, context, value);
-    }
-  }
-
-  /**
-   * Verifies that for the given {@code Coder<Collection<T>>},
-   * and value of type {@code Collection<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in the given {@code Coder.Context}.
-   */
-  @SuppressWarnings("unchecked")
-  public static <T, CollectionT extends Collection<T>> void coderDecodeEncodeContentsEqualInContext(
-      Coder<CollectionT> coder, Coder.Context context, CollectionT value)
-      throws Exception {
-    // Matchers.containsInAnyOrder() requires at least one element
-    Collection<T> result = decodeEncode(coder, context, value);
-    if (value.isEmpty()) {
-      assertThat(result, emptyIterable());
-    } else {
-      // This is the only Matchers.containInAnyOrder() overload that takes literal values
-      assertThat(result, containsInAnyOrder((T[]) value.toArray()));
-    }
-  }
-
-  /**
-   * Verifies that for the given {@code Coder<Collection<T>>},
-   * and value of type {@code Collection<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in any {@code Coder.Context}.
-   */
-  public static <T, IterableT extends Iterable<T>> void coderDecodeEncodeContentsInSameOrder(
-      Coder<IterableT> coder, IterableT value)
-      throws Exception {
-    for (Coder.Context context : ALL_CONTEXTS) {
-      CoderProperties.<T, IterableT>coderDecodeEncodeContentsInSameOrderInContext(
-          coder, context, value);
-    }
-  }
-
-  /**
-   * Verifies that for the given {@code Coder<Iterable<T>>},
-   * and value of type {@code Iterable<T>}, encoding followed by decoding yields an
-   * equal value of type {@code Collection<T>}, in the given {@code Coder.Context}.
-   */
-  @SuppressWarnings("unchecked")
-  public static <T, IterableT extends Iterable<T>> void
-      coderDecodeEncodeContentsInSameOrderInContext(
-          Coder<IterableT> coder, Coder.Context context, IterableT value)
-      throws Exception {
-    Iterable<T> result = decodeEncode(coder, context, value);
-    // Matchers.contains() requires at least one element
-    if (Iterables.isEmpty(value)) {
-      assertThat(result, emptyIterable());
-    } else {
-      // This is the only Matchers.contains() overload that takes literal values
-      assertThat(result, contains((T[]) Iterables.toArray(value, Object.class)));
-    }
-  }
-
-  public static <T> void coderSerializable(Coder<T> coder) {
-    SerializableUtils.ensureSerializable(coder);
-  }
-
-  public static <T> void coderConsistentWithEquals(
-      Coder<T> coder, T value1, T value2)
-      throws Exception {
-
-    for (Coder.Context context : ALL_CONTEXTS) {
-      CoderProperties.<T>coderConsistentWithEqualsInContext(coder, context, value1, value2);
-    }
-  }
-
-  public static <T> void coderConsistentWithEqualsInContext(
-      Coder<T> coder, Coder.Context context, T value1, T value2) throws Exception {
-
-    assertEquals(
-        value1.equals(value2),
-        Arrays.equals(
-            encode(coder, context, value1),
-            encode(coder, context, value2)));
-  }
-
-  public static <T> void coderHasEncodingId(Coder<T> coder, String encodingId) throws Exception {
-    assertThat(coder.getEncodingId(), equalTo(encodingId));
-    assertThat(Structs.getString(coder.asCloudObject(), PropertyNames.ENCODING_ID, ""),
-        equalTo(encodingId));
-  }
-
-  public static <T> void coderAllowsEncoding(Coder<T> coder, String encodingId) throws Exception {
-    assertThat(coder.getAllowedEncodings(), hasItem(encodingId));
-    assertThat(
-        String.format("Expected to find \"%s\" in property \"%s\" of %s",
-            encodingId, PropertyNames.ALLOWED_ENCODINGS, coder.asCloudObject()),
-        Structs.getStrings(
-            coder.asCloudObject(),
-            PropertyNames.ALLOWED_ENCODINGS,
-            Collections.<String>emptyList()),
-        hasItem(encodingId));
-  }
-
-  public static <T> void structuralValueConsistentWithEquals(
-      Coder<T> coder, T value1, T value2)
-      throws Exception {
-
-    for (Coder.Context context : ALL_CONTEXTS) {
-      CoderProperties.<T>structuralValueConsistentWithEqualsInContext(
-          coder, context, value1, value2);
-    }
-  }
-
-  public static <T> void structuralValueConsistentWithEqualsInContext(
-      Coder<T> coder, Coder.Context context, T value1, T value2) throws Exception {
-
-    assertEquals(
-        coder.structuralValue(value1).equals(coder.structuralValue(value2)),
-        Arrays.equals(
-            encode(coder, context, value1),
-            encode(coder, context, value2)));
-  }
-
-
-  private static final String DECODING_WIRE_FORMAT_MESSAGE =
-      "Decoded value from known wire format does not match expected value."
-      + " This probably means that this Coder no longer correctly decodes"
-      + " a prior wire format. Changing the wire formats this Coder can read"
-      + " should be avoided, as it is likely to cause breakage."
-      + " If you truly intend to change the backwards compatibility for this Coder "
-      + " then you must remove any now-unsupported encodings from getAllowedEncodings().";
-
-  public static <T> void coderDecodesBase64(Coder<T> coder, String base64Encoding, T value)
-      throws Exception {
-    assertThat(DECODING_WIRE_FORMAT_MESSAGE, CoderUtils.decodeFromBase64(coder, base64Encoding),
-        equalTo(value));
-  }
-
-  public static <T> void coderDecodesBase64(
-      Coder<T> coder, List<String> base64Encodings, List<T> values) throws Exception {
-    assertThat("List of base64 encodings has different size than List of values",
-        base64Encodings.size(), equalTo(values.size()));
-
-    for (int i = 0; i < base64Encodings.size(); i++) {
-      coderDecodesBase64(coder, base64Encodings.get(i), values.get(i));
-    }
-  }
-
-  private static final String ENCODING_WIRE_FORMAT_MESSAGE =
-      "Encoded value does not match expected wire format."
-      + " Changing the wire format should be avoided, as it is likely to cause breakage."
-      + " If you truly intend to change the wire format for this Coder "
-      + " then you must update getEncodingId() to a new value and add any supported"
-      + " prior formats to getAllowedEncodings()."
-      + " See com.google.cloud.dataflow.sdk.coders.PrintBase64Encoding for how to generate"
-      + " new test data.";
-
-  public static <T> void coderEncodesBase64(Coder<T> coder, T value, String base64Encoding)
-      throws Exception {
-    assertThat(ENCODING_WIRE_FORMAT_MESSAGE, CoderUtils.encodeToBase64(coder, value),
-        equalTo(base64Encoding));
-  }
-
-  public static <T> void coderEncodesBase64(
-      Coder<T> coder, List<T> values, List<String> base64Encodings) throws Exception {
-    assertThat("List of base64 encodings has different size than List of values",
-        base64Encodings.size(), equalTo(values.size()));
-
-    for (int i = 0; i < base64Encodings.size(); i++) {
-      coderEncodesBase64(coder, values.get(i), base64Encodings.get(i));
-    }
-  }
-
-  @SuppressWarnings("unchecked")
-  public static <T, IterableT extends Iterable<T>> void coderDecodesBase64ContentsEqual(
-      Coder<IterableT> coder, String base64Encoding, IterableT expected) throws Exception {
-
-    IterableT result = CoderUtils.decodeFromBase64(coder, base64Encoding);
-    if (Iterables.isEmpty(expected)) {
-      assertThat(ENCODING_WIRE_FORMAT_MESSAGE, result, emptyIterable());
-    } else {
-      assertThat(ENCODING_WIRE_FORMAT_MESSAGE, result,
-          containsInAnyOrder((T[]) Iterables.toArray(expected, Object.class)));
-    }
-  }
-
-  public static <T, IterableT extends Iterable<T>> void coderDecodesBase64ContentsEqual(
-      Coder<IterableT> coder, List<String> base64Encodings, List<IterableT> expected)
-          throws Exception {
-    assertThat("List of base64 encodings has different size than List of values",
-        base64Encodings.size(), equalTo(expected.size()));
-
-    for (int i = 0; i < base64Encodings.size(); i++) {
-      coderDecodesBase64ContentsEqual(coder, base64Encodings.get(i), expected.get(i));
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////
-
-  @VisibleForTesting
-  static <T> byte[] encode(
-      Coder<T> coder, Coder.Context context, T value) throws CoderException, IOException {
-    @SuppressWarnings("unchecked")
-    Coder<T> deserializedCoder = Serializer.deserialize(coder.asCloudObject(), Coder.class);
-
-    ByteArrayOutputStream os = new ByteArrayOutputStream();
-    deserializedCoder.encode(value, new UnownedOutputStream(os), context);
-    return os.toByteArray();
-  }
-
-  @VisibleForTesting
-  static <T> T decode(
-      Coder<T> coder, Coder.Context context, byte[] bytes) throws CoderException, IOException {
-    @SuppressWarnings("unchecked")
-    Coder<T> deserializedCoder = Serializer.deserialize(coder.asCloudObject(), Coder.class);
-
-    ByteArrayInputStream is = new ByteArrayInputStream(bytes);
-    return deserializedCoder.decode(new UnownedInputStream(is), context);
-  }
-
-  private static <T> T decodeEncode(Coder<T> coder, Coder.Context context, T value)
-      throws CoderException, IOException {
-    return decode(coder, context, encode(coder, context, value));
-  }
-}

[52/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

Directory reorganization

Move Java SDK-specific Javadoc information from "javadoc/" into "sdks/java/javadoc".


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/75cfa4ac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/75cfa4ac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/75cfa4ac

Branch: refs/heads/master
Commit: 75cfa4ac4114610d4296f0cef57d2148ff62055d
Parents: 7bef2b7
Author: Davor Bonaci <da...@google.com>
Authored: Wed Mar 23 16:55:05 2016 -0700
Committer: Davor Bonaci <da...@google.com>
Committed: Wed Mar 23 18:12:26 2016 -0700

----------------------------------------------------------------------
 javadoc/README.md                               |  4 ---
 javadoc/apiclient-docs/package-list             | 34 --------------------
 javadoc/avro-docs/package-list                  | 30 -----------------
 javadoc/bq-docs/package-list                    |  2 --
 javadoc/dataflow-sdk-docs/package-list          | 11 -------
 javadoc/datastore-docs/package-list             |  2 --
 javadoc/guava-docs/package-list                 | 15 ---------
 javadoc/hamcrest-docs/package-list              | 10 ------
 javadoc/jackson-annotations-docs/package-list   |  1 -
 javadoc/jackson-databind-docs/package-list      | 20 ------------
 javadoc/joda-docs/package-list                  |  7 ----
 javadoc/junit-docs/package-list                 |  7 ----
 javadoc/oauth-docs/package-list                 | 11 -------
 javadoc/overview.html                           | 31 ------------------
 sdks/java/core/pom.xml                          |  3 +-
 sdks/java/javadoc/README.md                     |  4 +++
 sdks/java/javadoc/apiclient-docs/package-list   | 34 ++++++++++++++++++++
 sdks/java/javadoc/avro-docs/package-list        | 30 +++++++++++++++++
 sdks/java/javadoc/bq-docs/package-list          |  2 ++
 .../java/javadoc/dataflow-sdk-docs/package-list | 11 +++++++
 sdks/java/javadoc/datastore-docs/package-list   |  2 ++
 sdks/java/javadoc/guava-docs/package-list       | 15 +++++++++
 sdks/java/javadoc/hamcrest-docs/package-list    | 10 ++++++
 .../jackson-annotations-docs/package-list       |  1 +
 .../javadoc/jackson-databind-docs/package-list  | 20 ++++++++++++
 sdks/java/javadoc/joda-docs/package-list        |  7 ++++
 sdks/java/javadoc/junit-docs/package-list       |  7 ++++
 sdks/java/javadoc/oauth-docs/package-list       | 11 +++++++
 sdks/java/javadoc/overview.html                 | 31 ++++++++++++++++++
 29 files changed, 187 insertions(+), 186 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/README.md
----------------------------------------------------------------------
diff --git a/javadoc/README.md b/javadoc/README.md
deleted file mode 100644
index 8240d3c..0000000
--- a/javadoc/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# SDK Javadoc
-
-This directory contains package-info files for external javadoc we would like
-our javadoc to link to using `-linkoffline`.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/apiclient-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/apiclient-docs/package-list b/javadoc/apiclient-docs/package-list
deleted file mode 100644
index 3ec1471..0000000
--- a/javadoc/apiclient-docs/package-list
+++ /dev/null
@@ -1,34 +0,0 @@
-com.google.api.client.googleapis
-com.google.api.client.googleapis.apache
-com.google.api.client.googleapis.auth.clientlogin
-com.google.api.client.googleapis.auth.oauth2
-com.google.api.client.googleapis.batch
-com.google.api.client.googleapis.batch.json
-com.google.api.client.googleapis.compute
-com.google.api.client.googleapis.extensions.android.accounts
-com.google.api.client.googleapis.extensions.android.gms.auth
-com.google.api.client.googleapis.extensions.appengine.auth.oauth2
-com.google.api.client.googleapis.extensions.appengine.notifications
-com.google.api.client.googleapis.extensions.appengine.testing.auth.oauth2
-com.google.api.client.googleapis.extensions.java6.auth.oauth2
-com.google.api.client.googleapis.extensions.servlet.notifications
-com.google.api.client.googleapis.javanet
-com.google.api.client.googleapis.json
-com.google.api.client.googleapis.media
-com.google.api.client.googleapis.notifications
-com.google.api.client.googleapis.notifications.json
-com.google.api.client.googleapis.notifications.json.gson
-com.google.api.client.googleapis.notifications.json.jackson2
-com.google.api.client.googleapis.services
-com.google.api.client.googleapis.services.json
-com.google.api.client.googleapis.services.protobuf
-com.google.api.client.googleapis.testing
-com.google.api.client.googleapis.testing.auth.oauth2
-com.google.api.client.googleapis.testing.compute
-com.google.api.client.googleapis.testing.json
-com.google.api.client.googleapis.testing.notifications
-com.google.api.client.googleapis.testing.services
-com.google.api.client.googleapis.testing.services.json
-com.google.api.client.googleapis.testing.services.protobuf
-com.google.api.client.googleapis.util
-com.google.api.client.googleapis.xml.atom

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/avro-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/avro-docs/package-list b/javadoc/avro-docs/package-list
deleted file mode 100644
index 319ff01..0000000
--- a/javadoc/avro-docs/package-list
+++ /dev/null
@@ -1,30 +0,0 @@
-org.apache.avro
-org.apache.avro.compiler.idl
-org.apache.avro.compiler.specific
-org.apache.avro.data
-org.apache.avro.file
-org.apache.avro.generic
-org.apache.avro.hadoop.file
-org.apache.avro.hadoop.io
-org.apache.avro.hadoop.util
-org.apache.avro.io
-org.apache.avro.io.parsing
-org.apache.avro.ipc
-org.apache.avro.ipc.generic
-org.apache.avro.ipc.reflect
-org.apache.avro.ipc.specific
-org.apache.avro.ipc.stats
-org.apache.avro.ipc.trace
-org.apache.avro.mapred
-org.apache.avro.mapred.tether
-org.apache.avro.mapreduce
-org.apache.avro.mojo
-org.apache.avro.protobuf
-org.apache.avro.reflect
-org.apache.avro.specific
-org.apache.avro.thrift
-org.apache.avro.tool
-org.apache.avro.util
-org.apache.trevni
-org.apache.trevni.avro
-org.apache.trevni.avro.mapreduce

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/bq-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/bq-docs/package-list b/javadoc/bq-docs/package-list
deleted file mode 100644
index 384b3fc..0000000
--- a/javadoc/bq-docs/package-list
+++ /dev/null
@@ -1,2 +0,0 @@
-com.google.api.services.bigquery
-com.google.api.services.bigquery.model

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/dataflow-sdk-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/dataflow-sdk-docs/package-list b/javadoc/dataflow-sdk-docs/package-list
deleted file mode 100644
index a26f5a3..0000000
--- a/javadoc/dataflow-sdk-docs/package-list
+++ /dev/null
@@ -1,11 +0,0 @@
-com.google.cloud.dataflow.sdk
-com.google.cloud.dataflow.sdk.annotations
-com.google.cloud.dataflow.sdk.coders
-com.google.cloud.dataflow.sdk.io
-com.google.cloud.dataflow.sdk.options
-com.google.cloud.dataflow.sdk.runners
-com.google.cloud.dataflow.sdk.testing
-com.google.cloud.dataflow.sdk.transforms
-com.google.cloud.dataflow.sdk.transforms.join
-com.google.cloud.dataflow.sdk.transforms.windowing
-com.google.cloud.dataflow.sdk.values

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/datastore-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/datastore-docs/package-list b/javadoc/datastore-docs/package-list
deleted file mode 100644
index ebbafd8..0000000
--- a/javadoc/datastore-docs/package-list
+++ /dev/null
@@ -1,2 +0,0 @@
-com.google.api.services.datastore
-com.google.api.services.datastore.client

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/guava-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/guava-docs/package-list b/javadoc/guava-docs/package-list
deleted file mode 100644
index f855178..0000000
--- a/javadoc/guava-docs/package-list
+++ /dev/null
@@ -1,15 +0,0 @@
-com.google.common.annotations
-com.google.common.base
-com.google.common.cache
-com.google.common.collect
-com.google.common.escape
-com.google.common.eventbus
-com.google.common.hash
-com.google.common.html
-com.google.common.io
-com.google.common.math
-com.google.common.net
-com.google.common.primitives
-com.google.common.reflect
-com.google.common.util.concurrent
-com.google.common.xml

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/hamcrest-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/hamcrest-docs/package-list b/javadoc/hamcrest-docs/package-list
deleted file mode 100644
index 3f5e945..0000000
--- a/javadoc/hamcrest-docs/package-list
+++ /dev/null
@@ -1,10 +0,0 @@
-org.hamcrest
-org.hamcrest.beans
-org.hamcrest.collection
-org.hamcrest.core
-org.hamcrest.integration
-org.hamcrest.internal
-org.hamcrest.number
-org.hamcrest.object
-org.hamcrest.text
-org.hamcrest.xml

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/jackson-annotations-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/jackson-annotations-docs/package-list b/javadoc/jackson-annotations-docs/package-list
deleted file mode 100644
index 768b3ba..0000000
--- a/javadoc/jackson-annotations-docs/package-list
+++ /dev/null
@@ -1 +0,0 @@
-com.fasterxml.jackson.annotation

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/jackson-databind-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/jackson-databind-docs/package-list b/javadoc/jackson-databind-docs/package-list
deleted file mode 100644
index 8a2cd8b..0000000
--- a/javadoc/jackson-databind-docs/package-list
+++ /dev/null
@@ -1,20 +0,0 @@
-com.fasterxml.jackson.databind
-com.fasterxml.jackson.databind.annotation
-com.fasterxml.jackson.databind.cfg
-com.fasterxml.jackson.databind.deser
-com.fasterxml.jackson.databind.deser.impl
-com.fasterxml.jackson.databind.deser.std
-com.fasterxml.jackson.databind.exc
-com.fasterxml.jackson.databind.ext
-com.fasterxml.jackson.databind.introspect
-com.fasterxml.jackson.databind.jsonFormatVisitors
-com.fasterxml.jackson.databind.jsonschema
-com.fasterxml.jackson.databind.jsontype
-com.fasterxml.jackson.databind.jsontype.impl
-com.fasterxml.jackson.databind.module
-com.fasterxml.jackson.databind.node
-com.fasterxml.jackson.databind.ser
-com.fasterxml.jackson.databind.ser.impl
-com.fasterxml.jackson.databind.ser.std
-com.fasterxml.jackson.databind.type
-com.fasterxml.jackson.databind.util

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/joda-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/joda-docs/package-list b/javadoc/joda-docs/package-list
deleted file mode 100644
index 2ab05aa..0000000
--- a/javadoc/joda-docs/package-list
+++ /dev/null
@@ -1,7 +0,0 @@
-org.joda.time
-org.joda.time.base
-org.joda.time.chrono
-org.joda.time.convert
-org.joda.time.field
-org.joda.time.format
-org.joda.time.tz

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/junit-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/junit-docs/package-list b/javadoc/junit-docs/package-list
deleted file mode 100644
index 0735177..0000000
--- a/javadoc/junit-docs/package-list
+++ /dev/null
@@ -1,7 +0,0 @@
-org.hamcrest.core
-org.junit
-org.junit.matchers
-org.junit.runner
-org.junit.runner.manipulation
-org.junit.runner.notification
-org.junit.runners

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/oauth-docs/package-list
----------------------------------------------------------------------
diff --git a/javadoc/oauth-docs/package-list b/javadoc/oauth-docs/package-list
deleted file mode 100644
index 38fc046..0000000
--- a/javadoc/oauth-docs/package-list
+++ /dev/null
@@ -1,11 +0,0 @@
-com.google.api.client.auth.oauth
-com.google.api.client.auth.oauth2
-com.google.api.client.auth.openidconnect
-com.google.api.client.extensions.appengine.auth
-com.google.api.client.extensions.appengine.auth.oauth2
-com.google.api.client.extensions.auth.helpers
-com.google.api.client.extensions.auth.helpers.oauth
-com.google.api.client.extensions.java6.auth.oauth2
-com.google.api.client.extensions.jetty.auth.oauth2
-com.google.api.client.extensions.servlet.auth
-com.google.api.client.extensions.servlet.auth.oauth2

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/javadoc/overview.html
----------------------------------------------------------------------
diff --git a/javadoc/overview.html b/javadoc/overview.html
deleted file mode 100644
index 4ffd33f..0000000
--- a/javadoc/overview.html
+++ /dev/null
@@ -1,31 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <title>Google Cloud Dataflow Java SDK</title>
-  </head>
-  <body>
-    <p>The Google Cloud Dataflow SDK for Java provides a simple and elegant
-       programming model to express your data processing pipelines;
-       see <a href="https://cloud.google.com/dataflow/">our product page</a>
-       for more information and getting started instructions.</p>
-
-    <p>The easiest way to use the Google Cloud Dataflow SDK for Java is via
-       one of the released artifacts from the
-       <a href="http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22">
-       Maven Central Repository</a>.
-       See our <a href="https://cloud.google.com/dataflow/release-notes/java">
-       release notes</a> for more information about each released version.<p>
-
-    <p>Version numbers use the form <i>major</i>.<i>minor</i>.<i>incremental</i>
-       and are incremented as follows:<p>
-    <ul>
-      <li>major version for incompatible API changes</li>
-      <li>minor version for new functionality added in a backward-compatible manner</li>
-      <li>incremental version for forward-compatible bug fixes</li>
-    </ul>
-
-    <p>Please note that APIs marked
-    {@link com.google.cloud.dataflow.sdk.annotations.Experimental @Experimental}
-    may change at any point and are not guaranteed to remain compatible across versions.</p>
-  </body>
-</html>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/core/pom.xml b/sdks/java/core/pom.xml
index e90446c..2b9e4a9 100644
--- a/sdks/java/core/pom.xml
+++ b/sdks/java/core/pom.xml
@@ -157,7 +157,8 @@
       </plugin>
 
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-javadoc-plugin</artifactId>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
         <configuration>
           <windowtitle>Google Cloud Dataflow SDK ${project.version} API</windowtitle>
           <doctitle>Google Cloud Dataflow SDK for Java, version ${project.version}</doctitle>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/README.md
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/README.md b/sdks/java/javadoc/README.md
new file mode 100644
index 0000000..8240d3c
--- /dev/null
+++ b/sdks/java/javadoc/README.md
@@ -0,0 +1,4 @@
+# SDK Javadoc
+
+This directory contains package-info files for external javadoc we would like
+our javadoc to link to using `-linkoffline`.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/apiclient-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/apiclient-docs/package-list b/sdks/java/javadoc/apiclient-docs/package-list
new file mode 100644
index 0000000..3ec1471
--- /dev/null
+++ b/sdks/java/javadoc/apiclient-docs/package-list
@@ -0,0 +1,34 @@
+com.google.api.client.googleapis
+com.google.api.client.googleapis.apache
+com.google.api.client.googleapis.auth.clientlogin
+com.google.api.client.googleapis.auth.oauth2
+com.google.api.client.googleapis.batch
+com.google.api.client.googleapis.batch.json
+com.google.api.client.googleapis.compute
+com.google.api.client.googleapis.extensions.android.accounts
+com.google.api.client.googleapis.extensions.android.gms.auth
+com.google.api.client.googleapis.extensions.appengine.auth.oauth2
+com.google.api.client.googleapis.extensions.appengine.notifications
+com.google.api.client.googleapis.extensions.appengine.testing.auth.oauth2
+com.google.api.client.googleapis.extensions.java6.auth.oauth2
+com.google.api.client.googleapis.extensions.servlet.notifications
+com.google.api.client.googleapis.javanet
+com.google.api.client.googleapis.json
+com.google.api.client.googleapis.media
+com.google.api.client.googleapis.notifications
+com.google.api.client.googleapis.notifications.json
+com.google.api.client.googleapis.notifications.json.gson
+com.google.api.client.googleapis.notifications.json.jackson2
+com.google.api.client.googleapis.services
+com.google.api.client.googleapis.services.json
+com.google.api.client.googleapis.services.protobuf
+com.google.api.client.googleapis.testing
+com.google.api.client.googleapis.testing.auth.oauth2
+com.google.api.client.googleapis.testing.compute
+com.google.api.client.googleapis.testing.json
+com.google.api.client.googleapis.testing.notifications
+com.google.api.client.googleapis.testing.services
+com.google.api.client.googleapis.testing.services.json
+com.google.api.client.googleapis.testing.services.protobuf
+com.google.api.client.googleapis.util
+com.google.api.client.googleapis.xml.atom

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/avro-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/avro-docs/package-list b/sdks/java/javadoc/avro-docs/package-list
new file mode 100644
index 0000000..319ff01
--- /dev/null
+++ b/sdks/java/javadoc/avro-docs/package-list
@@ -0,0 +1,30 @@
+org.apache.avro
+org.apache.avro.compiler.idl
+org.apache.avro.compiler.specific
+org.apache.avro.data
+org.apache.avro.file
+org.apache.avro.generic
+org.apache.avro.hadoop.file
+org.apache.avro.hadoop.io
+org.apache.avro.hadoop.util
+org.apache.avro.io
+org.apache.avro.io.parsing
+org.apache.avro.ipc
+org.apache.avro.ipc.generic
+org.apache.avro.ipc.reflect
+org.apache.avro.ipc.specific
+org.apache.avro.ipc.stats
+org.apache.avro.ipc.trace
+org.apache.avro.mapred
+org.apache.avro.mapred.tether
+org.apache.avro.mapreduce
+org.apache.avro.mojo
+org.apache.avro.protobuf
+org.apache.avro.reflect
+org.apache.avro.specific
+org.apache.avro.thrift
+org.apache.avro.tool
+org.apache.avro.util
+org.apache.trevni
+org.apache.trevni.avro
+org.apache.trevni.avro.mapreduce

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/bq-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/bq-docs/package-list b/sdks/java/javadoc/bq-docs/package-list
new file mode 100644
index 0000000..384b3fc
--- /dev/null
+++ b/sdks/java/javadoc/bq-docs/package-list
@@ -0,0 +1,2 @@
+com.google.api.services.bigquery
+com.google.api.services.bigquery.model

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/dataflow-sdk-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/dataflow-sdk-docs/package-list b/sdks/java/javadoc/dataflow-sdk-docs/package-list
new file mode 100644
index 0000000..a26f5a3
--- /dev/null
+++ b/sdks/java/javadoc/dataflow-sdk-docs/package-list
@@ -0,0 +1,11 @@
+com.google.cloud.dataflow.sdk
+com.google.cloud.dataflow.sdk.annotations
+com.google.cloud.dataflow.sdk.coders
+com.google.cloud.dataflow.sdk.io
+com.google.cloud.dataflow.sdk.options
+com.google.cloud.dataflow.sdk.runners
+com.google.cloud.dataflow.sdk.testing
+com.google.cloud.dataflow.sdk.transforms
+com.google.cloud.dataflow.sdk.transforms.join
+com.google.cloud.dataflow.sdk.transforms.windowing
+com.google.cloud.dataflow.sdk.values

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/datastore-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/datastore-docs/package-list b/sdks/java/javadoc/datastore-docs/package-list
new file mode 100644
index 0000000..ebbafd8
--- /dev/null
+++ b/sdks/java/javadoc/datastore-docs/package-list
@@ -0,0 +1,2 @@
+com.google.api.services.datastore
+com.google.api.services.datastore.client

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/guava-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/guava-docs/package-list b/sdks/java/javadoc/guava-docs/package-list
new file mode 100644
index 0000000..f855178
--- /dev/null
+++ b/sdks/java/javadoc/guava-docs/package-list
@@ -0,0 +1,15 @@
+com.google.common.annotations
+com.google.common.base
+com.google.common.cache
+com.google.common.collect
+com.google.common.escape
+com.google.common.eventbus
+com.google.common.hash
+com.google.common.html
+com.google.common.io
+com.google.common.math
+com.google.common.net
+com.google.common.primitives
+com.google.common.reflect
+com.google.common.util.concurrent
+com.google.common.xml

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/hamcrest-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/hamcrest-docs/package-list b/sdks/java/javadoc/hamcrest-docs/package-list
new file mode 100644
index 0000000..3f5e945
--- /dev/null
+++ b/sdks/java/javadoc/hamcrest-docs/package-list
@@ -0,0 +1,10 @@
+org.hamcrest
+org.hamcrest.beans
+org.hamcrest.collection
+org.hamcrest.core
+org.hamcrest.integration
+org.hamcrest.internal
+org.hamcrest.number
+org.hamcrest.object
+org.hamcrest.text
+org.hamcrest.xml

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/jackson-annotations-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/jackson-annotations-docs/package-list b/sdks/java/javadoc/jackson-annotations-docs/package-list
new file mode 100644
index 0000000..768b3ba
--- /dev/null
+++ b/sdks/java/javadoc/jackson-annotations-docs/package-list
@@ -0,0 +1 @@
+com.fasterxml.jackson.annotation

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/jackson-databind-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/jackson-databind-docs/package-list b/sdks/java/javadoc/jackson-databind-docs/package-list
new file mode 100644
index 0000000..8a2cd8b
--- /dev/null
+++ b/sdks/java/javadoc/jackson-databind-docs/package-list
@@ -0,0 +1,20 @@
+com.fasterxml.jackson.databind
+com.fasterxml.jackson.databind.annotation
+com.fasterxml.jackson.databind.cfg
+com.fasterxml.jackson.databind.deser
+com.fasterxml.jackson.databind.deser.impl
+com.fasterxml.jackson.databind.deser.std
+com.fasterxml.jackson.databind.exc
+com.fasterxml.jackson.databind.ext
+com.fasterxml.jackson.databind.introspect
+com.fasterxml.jackson.databind.jsonFormatVisitors
+com.fasterxml.jackson.databind.jsonschema
+com.fasterxml.jackson.databind.jsontype
+com.fasterxml.jackson.databind.jsontype.impl
+com.fasterxml.jackson.databind.module
+com.fasterxml.jackson.databind.node
+com.fasterxml.jackson.databind.ser
+com.fasterxml.jackson.databind.ser.impl
+com.fasterxml.jackson.databind.ser.std
+com.fasterxml.jackson.databind.type
+com.fasterxml.jackson.databind.util

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/joda-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/joda-docs/package-list b/sdks/java/javadoc/joda-docs/package-list
new file mode 100644
index 0000000..2ab05aa
--- /dev/null
+++ b/sdks/java/javadoc/joda-docs/package-list
@@ -0,0 +1,7 @@
+org.joda.time
+org.joda.time.base
+org.joda.time.chrono
+org.joda.time.convert
+org.joda.time.field
+org.joda.time.format
+org.joda.time.tz

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/junit-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/junit-docs/package-list b/sdks/java/javadoc/junit-docs/package-list
new file mode 100644
index 0000000..0735177
--- /dev/null
+++ b/sdks/java/javadoc/junit-docs/package-list
@@ -0,0 +1,7 @@
+org.hamcrest.core
+org.junit
+org.junit.matchers
+org.junit.runner
+org.junit.runner.manipulation
+org.junit.runner.notification
+org.junit.runners

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/oauth-docs/package-list
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/oauth-docs/package-list b/sdks/java/javadoc/oauth-docs/package-list
new file mode 100644
index 0000000..38fc046
--- /dev/null
+++ b/sdks/java/javadoc/oauth-docs/package-list
@@ -0,0 +1,11 @@
+com.google.api.client.auth.oauth
+com.google.api.client.auth.oauth2
+com.google.api.client.auth.openidconnect
+com.google.api.client.extensions.appengine.auth
+com.google.api.client.extensions.appengine.auth.oauth2
+com.google.api.client.extensions.auth.helpers
+com.google.api.client.extensions.auth.helpers.oauth
+com.google.api.client.extensions.java6.auth.oauth2
+com.google.api.client.extensions.jetty.auth.oauth2
+com.google.api.client.extensions.servlet.auth
+com.google.api.client.extensions.servlet.auth.oauth2

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/75cfa4ac/sdks/java/javadoc/overview.html
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/overview.html b/sdks/java/javadoc/overview.html
new file mode 100644
index 0000000..4ffd33f
--- /dev/null
+++ b/sdks/java/javadoc/overview.html
@@ -0,0 +1,31 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Google Cloud Dataflow Java SDK</title>
+  </head>
+  <body>
+    <p>The Google Cloud Dataflow SDK for Java provides a simple and elegant
+       programming model to express your data processing pipelines;
+       see <a href="https://cloud.google.com/dataflow/">our product page</a>
+       for more information and getting started instructions.</p>
+
+    <p>The easiest way to use the Google Cloud Dataflow SDK for Java is via
+       one of the released artifacts from the
+       <a href="http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22com.google.cloud.dataflow%22">
+       Maven Central Repository</a>.
+       See our <a href="https://cloud.google.com/dataflow/release-notes/java">
+       release notes</a> for more information about each released version.<p>
+
+    <p>Version numbers use the form <i>major</i>.<i>minor</i>.<i>incremental</i>
+       and are incremented as follows:<p>
+    <ul>
+      <li>major version for incompatible API changes</li>
+      <li>minor version for new functionality added in a backward-compatible manner</li>
+      <li>incremental version for forward-compatible bug fixes</li>
+    </ul>
+
+    <p>Please note that APIs marked
+    {@link com.google.cloud.dataflow.sdk.annotations.Experimental @Experimental}
+    may change at any point and are not guaranteed to remain compatible across versions.</p>
+  </body>
+</html>

[39/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
deleted file mode 100644
index 5ab8582..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableServiceImpl.java
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.io.bigtable;
-
-import com.google.bigtable.admin.table.v1.GetTableRequest;
-import com.google.bigtable.v1.MutateRowRequest;
-import com.google.bigtable.v1.Mutation;
-import com.google.bigtable.v1.ReadRowsRequest;
-import com.google.bigtable.v1.Row;
-import com.google.bigtable.v1.RowRange;
-import com.google.bigtable.v1.SampleRowKeysRequest;
-import com.google.bigtable.v1.SampleRowKeysResponse;
-import com.google.cloud.bigtable.config.BigtableOptions;
-import com.google.cloud.bigtable.grpc.BigtableSession;
-import com.google.cloud.bigtable.grpc.async.AsyncExecutor;
-import com.google.cloud.bigtable.grpc.async.HeapSizeManager;
-import com.google.cloud.bigtable.grpc.scanner.ResultScanner;
-import com.google.cloud.dataflow.sdk.io.bigtable.BigtableIO.BigtableSource;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.base.MoreObjects;
-import com.google.common.io.Closer;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.Empty;
-
-import io.grpc.Status.Code;
-import io.grpc.StatusRuntimeException;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * An implementation of {@link BigtableService} that actually communicates with the Cloud Bigtable
- * service.
- */
-class BigtableServiceImpl implements BigtableService {
-  private static final Logger logger = LoggerFactory.getLogger(BigtableService.class);
-
-  public BigtableServiceImpl(BigtableOptions options) {
-    this.options = options;
-  }
-
-  private final BigtableOptions options;
-
-  @Override
-  public BigtableWriterImpl openForWriting(String tableId) throws IOException {
-    BigtableSession session = new BigtableSession(options);
-    String tableName = options.getClusterName().toTableNameStr(tableId);
-    return new BigtableWriterImpl(session, tableName);
-  }
-
-  @Override
-  public boolean tableExists(String tableId) throws IOException {
-    if (!BigtableSession.isAlpnProviderEnabled()) {
-      logger.info(
-          "Skipping existence check for table {} (BigtableOptions {}) because ALPN is not"
-              + " configured.",
-          tableId,
-          options);
-      return true;
-    }
-
-    try (BigtableSession session = new BigtableSession(options)) {
-      GetTableRequest getTable =
-          GetTableRequest.newBuilder()
-              .setName(options.getClusterName().toTableNameStr(tableId))
-              .build();
-      session.getTableAdminClient().getTable(getTable);
-      return true;
-    } catch (StatusRuntimeException e) {
-      if (e.getStatus().getCode() == Code.NOT_FOUND) {
-        return false;
-      }
-      String message =
-          String.format(
-              "Error checking whether table %s (BigtableOptions %s) exists", tableId, options);
-      logger.error(message, e);
-      throw new IOException(message, e);
-    }
-  }
-
-  private class BigtableReaderImpl implements Reader {
-    private BigtableSession session;
-    private final BigtableSource source;
-    private ResultScanner<Row> results;
-    private Row currentRow;
-
-    public BigtableReaderImpl(BigtableSession session, BigtableSource source) {
-      this.session = session;
-      this.source = source;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      RowRange range =
-          RowRange.newBuilder()
-              .setStartKey(source.getRange().getStartKey().getValue())
-              .setEndKey(source.getRange().getEndKey().getValue())
-              .build();
-      ReadRowsRequest.Builder requestB =
-          ReadRowsRequest.newBuilder()
-              .setRowRange(range)
-              .setTableName(options.getClusterName().toTableNameStr(source.getTableId()));
-      if (source.getRowFilter() != null) {
-        requestB.setFilter(source.getRowFilter());
-      }
-      results = session.getDataClient().readRows(requestB.build());
-      return advance();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      currentRow = results.next();
-      return (currentRow != null);
-    }
-
-    @Override
-    public void close() throws IOException {
-      // Goal: by the end of this function, both results and session are null and closed,
-      // independent of what errors they throw or prior state.
-
-      if (session == null) {
-        // Only possible when previously closed, so we know that results is also null.
-        return;
-      }
-
-      // Session does not implement Closeable -- it's AutoCloseable. So we can't register it with
-      // the Closer, but we can use the Closer to simplify the error handling.
-      try (Closer closer = Closer.create()) {
-        if (results != null) {
-          closer.register(results);
-          results = null;
-        }
-
-        session.close();
-      } finally {
-        session = null;
-      }
-    }
-
-    @Override
-    public Row getCurrentRow() throws NoSuchElementException {
-      if (currentRow == null) {
-        throw new NoSuchElementException();
-      }
-      return currentRow;
-    }
-  }
-
-  private static class BigtableWriterImpl implements Writer {
-    private BigtableSession session;
-    private AsyncExecutor executor;
-    private final MutateRowRequest.Builder partialBuilder;
-
-    public BigtableWriterImpl(BigtableSession session, String tableName) {
-      this.session = session;
-      this.executor =
-          new AsyncExecutor(
-              session.getDataClient(),
-              new HeapSizeManager(
-                  AsyncExecutor.ASYNC_MUTATOR_MAX_MEMORY_DEFAULT,
-                  AsyncExecutor.MAX_INFLIGHT_RPCS_DEFAULT));
-
-      partialBuilder = MutateRowRequest.newBuilder().setTableName(tableName);
-    }
-
-    @Override
-    public void close() throws IOException {
-      try {
-        if (executor != null) {
-          executor.flush();
-          executor = null;
-        }
-      } finally {
-        if (session != null) {
-          session.close();
-          session = null;
-        }
-      }
-    }
-
-    @Override
-    public ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> record)
-        throws IOException {
-      MutateRowRequest r =
-          partialBuilder
-              .clone()
-              .setRowKey(record.getKey())
-              .addAllMutations(record.getValue())
-              .build();
-      try {
-        return executor.mutateRowAsync(r);
-      } catch (InterruptedException e) {
-        Thread.currentThread().interrupt();
-        throw new IOException("Write interrupted", e);
-      }
-    }
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects
-        .toStringHelper(BigtableServiceImpl.class)
-        .add("options", options)
-        .toString();
-  }
-
-  @Override
-  public Reader createReader(BigtableSource source) throws IOException {
-    BigtableSession session = new BigtableSession(options);
-    return new BigtableReaderImpl(session, source);
-  }
-
-  @Override
-  public List<SampleRowKeysResponse> getSampleRowKeys(BigtableSource source) throws IOException {
-    try (BigtableSession session = new BigtableSession(options)) {
-      SampleRowKeysRequest request =
-          SampleRowKeysRequest.newBuilder()
-              .setTableName(options.getClusterName().toTableNameStr(source.getTableId()))
-              .build();
-      return session.getDataClient().sampleRowKeys(request);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/package-info.java
deleted file mode 100644
index 112a954..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines transforms for reading and writing from Google Cloud Bigtable.
- *
- * @see com.google.cloud.dataflow.sdk.io.bigtable.BigtableIO
- */
-package com.google.cloud.dataflow.sdk.io.bigtable;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
deleted file mode 100644
index de0bd86..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/package-info.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines transforms for reading and writing common storage formats, including
- * {@link com.google.cloud.dataflow.sdk.io.AvroIO},
- * {@link com.google.cloud.dataflow.sdk.io.BigQueryIO}, and
- * {@link com.google.cloud.dataflow.sdk.io.TextIO}.
- *
- * <p>The classes in this package provide {@code Read} transforms that create PCollections
- * from existing storage:
- * <pre>{@code
- * PCollection<TableRow> inputData = pipeline.apply(
- *     BigQueryIO.Read.named("Read")
- *                    .from("clouddataflow-readonly:samples.weather_stations");
- * }</pre>
- * and {@code Write} transforms that persist PCollections to external storage:
- * <pre> {@code
- * PCollection<Integer> numbers = ...;
- * numbers.apply(TextIO.Write.named("WriteNumbers")
- *                           .to("gs://my_bucket/path/to/numbers"));
- * } </pre>
- */
-package com.google.cloud.dataflow.sdk.io;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
deleted file mode 100644
index 30772da..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKey.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io.range;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.protobuf.ByteString;
-import com.google.protobuf.ByteString.ByteIterator;
-
-import java.io.Serializable;
-
-/**
- * A class representing a key consisting of an array of bytes. Arbitrary-length
- * {@code byte[]} keys are typical in key-value stores such as Google Cloud Bigtable.
- *
- * <p>Instances of {@link ByteKey} are immutable.
- *
- * <p>{@link ByteKey} implements {@link Comparable Comparable&lt;ByteKey&gt;} by comparing the
- * arrays in lexicographic order. The smallest {@link ByteKey} is a zero-length array; the successor
- * to a key is the same key with an additional 0 byte appended; and keys have unbounded size.
- *
- * <p>Note that the empty {@link ByteKey} compares smaller than all other keys, but some systems
- * have the semantic that when an empty {@link ByteKey} is used as an upper bound, it represents
- * the largest possible key. In these cases, implementors should use {@link #isEmpty} to test
- * whether an upper bound key is empty.
- */
-public final class ByteKey implements Comparable<ByteKey>, Serializable {
-  /** An empty key. */
-  public static final ByteKey EMPTY = ByteKey.of();
-
-  /**
-   * Creates a new {@link ByteKey} backed by the specified {@link ByteString}.
-   */
-  public static ByteKey of(ByteString value) {
-    return new ByteKey(value);
-  }
-
-  /**
-   * Creates a new {@link ByteKey} backed by a copy of the specified {@code byte[]}.
-   *
-   * <p>Makes a copy of the underlying array.
-   */
-  public static ByteKey copyFrom(byte[] bytes) {
-    return of(ByteString.copyFrom(bytes));
-  }
-
-  /**
-   * Creates a new {@link ByteKey} backed by a copy of the specified {@code int[]}. This method is
-   * primarily used as a convenience to create a {@link ByteKey} in code without casting down to
-   * signed Java {@link Byte bytes}:
-   *
-   * <pre>{@code
-   * ByteKey key = ByteKey.of(0xde, 0xad, 0xbe, 0xef);
-   * }</pre>
-   *
-   * <p>Makes a copy of the input.
-   */
-  public static ByteKey of(int... bytes) {
-    byte[] ret = new byte[bytes.length];
-    for (int i = 0; i < bytes.length; ++i) {
-      ret[i] = (byte) (bytes[i] & 0xff);
-    }
-    return ByteKey.copyFrom(ret);
-  }
-
-  /**
-   * Returns an immutable {@link ByteString} representing this {@link ByteKey}.
-   *
-   * <p>Does not copy.
-   */
-  public ByteString getValue() {
-    return value;
-  }
-
-  /**
-   * Returns a newly-allocated {@code byte[]} representing this {@link ByteKey}.
-   *
-   * <p>Copies the underlying {@code byte[]}.
-   */
-  public byte[] getBytes() {
-    return value.toByteArray();
-  }
-
-  /**
-   * Returns {@code true} if the {@code byte[]} backing this {@link ByteKey} is of length 0.
-   */
-  public boolean isEmpty() {
-    return value.isEmpty();
-  }
-
-  /**
-   * {@link ByteKey} implements {@link Comparable Comparable&lt;ByteKey&gt;} by comparing the
-   * arrays in lexicographic order. The smallest {@link ByteKey} is a zero-length array; the
-   * successor to a key is the same key with an additional 0 byte appended; and keys have unbounded
-   * size.
-   */
-  @Override
-  public int compareTo(ByteKey other) {
-    checkNotNull(other, "other");
-    ByteIterator thisIt = value.iterator();
-    ByteIterator otherIt = other.value.iterator();
-    while (thisIt.hasNext() && otherIt.hasNext()) {
-      // (byte & 0xff) converts [-128,127] bytes to [0,255] ints.
-      int cmp = (thisIt.nextByte() & 0xff) - (otherIt.nextByte() & 0xff);
-      if (cmp != 0) {
-        return cmp;
-      }
-    }
-    // If we get here, the prefix of both arrays is equal up to the shorter array. The array with
-    // more bytes is larger.
-    return value.size() - other.value.size();
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////
-  private final ByteString value;
-
-  private ByteKey(ByteString value) {
-    this.value = value;
-  }
-
-  /** Array used as a helper in {@link #toString}. */
-  private static final char[] HEX =
-      new char[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
-
-  // Prints the key as a string "[deadbeef]".
-  @Override
-  public String toString() {
-    char[] encoded = new char[2 * value.size() + 2];
-    encoded[0] = '[';
-    int cnt = 1;
-    ByteIterator iterator = value.iterator();
-    while (iterator.hasNext()) {
-      byte b = iterator.nextByte();
-      encoded[cnt] = HEX[(b & 0xF0) >>> 4];
-      ++cnt;
-      encoded[cnt] = HEX[b & 0xF];
-      ++cnt;
-    }
-    encoded[cnt] = ']';
-    return new String(encoded);
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (o == this) {
-      return true;
-    }
-    if (!(o instanceof ByteKey)) {
-      return false;
-    }
-    ByteKey other = (ByteKey) o;
-    return (other.value.size() == value.size()) && this.compareTo(other) == 0;
-  }
-
-  @Override
-  public int hashCode() {
-    return value.hashCode();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java
deleted file mode 100644
index 6f58d39..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRange.java
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io.range;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-import static com.google.common.base.Verify.verify;
-
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ImmutableList;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.Serializable;
-import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * A class representing a range of {@link ByteKey ByteKeys}.
- *
- * <p>Instances of {@link ByteKeyRange} are immutable.
- *
- * <p>A {@link ByteKeyRange} enforces the restriction that its start and end keys must form a valid,
- * non-empty range {@code [startKey, endKey)} that is inclusive of the start key and exclusive of
- * the end key.
- *
- * <p>When the end key is empty, it is treated as the largest possible key.
- *
- * <h3>Interpreting {@link ByteKey} in a {@link ByteKeyRange}</h3>
- *
- * <p>The primary role of {@link ByteKeyRange} is to provide functionality for
- * {@link #estimateFractionForKey(ByteKey)}, {@link #interpolateKey(double)}, and
- * {@link #split(int)}, which are used for Google Cloud Dataflow's
- * <a href="https://cloud.google.com/dataflow/service/dataflow-service-desc#AutoScaling">Autoscaling
- * and Dynamic Work Rebalancing</a> features.
- *
- * <p>{@link ByteKeyRange} implements these features by treating a {@link ByteKey}'s underlying
- * {@code byte[]} as the binary expansion of floating point numbers in the range {@code [0.0, 1.0]}.
- * For example, the keys {@code ByteKey.of(0x80)}, {@code ByteKey.of(0xc0)}, and
- * {@code ByteKey.of(0xe0)} are interpreted as {@code 0.5}, {@code 0.75}, and {@code 0.875}
- * respectively. The empty {@code ByteKey.EMPTY} is interpreted as {@code 0.0} when used as the
- * start of a range and {@code 1.0} when used as the end key.
- *
- * <p>Key interpolation, fraction estimation, and range splitting are all interpreted in these
- * floating-point semantics. See the respective implementations for further details. <b>Note:</b>
- * the underlying implementations of these functions use {@link BigInteger} and {@link BigDecimal},
- * so they can be slow and should not be called in hot loops. Dataflow's dynamic work
- * rebalancing will only invoke these functions during periodic control operations, so they are not
- * called on the critical path.
- *
- * @see ByteKey
- */
-public final class ByteKeyRange implements Serializable {
-  private static final Logger logger = LoggerFactory.getLogger(ByteKeyRange.class);
-
-  /** The range of all keys, with empty start and end keys. */
-  public static final ByteKeyRange ALL_KEYS = ByteKeyRange.of(ByteKey.EMPTY, ByteKey.EMPTY);
-
-  /**
-   * Creates a new {@link ByteKeyRange} with the given start and end keys.
-   *
-   * <p>Note that if {@code endKey} is empty, it is treated as the largest possible key.
-   *
-   * @see ByteKeyRange
-   *
-   * @throws IllegalArgumentException if {@code endKey} is less than or equal to {@code startKey},
-   *     unless {@code endKey} is empty indicating the maximum possible {@link ByteKey}.
-   */
-  public static ByteKeyRange of(ByteKey startKey, ByteKey endKey) {
-    return new ByteKeyRange(startKey, endKey);
-  }
-
-  /**
-   * Returns the {@link ByteKey} representing the lower bound of this {@link ByteKeyRange}.
-   */
-  public ByteKey getStartKey() {
-    return startKey;
-  }
-
-  /**
-   * Returns the {@link ByteKey} representing the upper bound of this {@link ByteKeyRange}.
-   *
-   * <p>Note that if {@code endKey} is empty, it is treated as the largest possible key.
-   */
-  public ByteKey getEndKey() {
-    return endKey;
-  }
-
-  /**
-   * Returns {@code true} if the specified {@link ByteKey} is contained within this range.
-   */
-  public Boolean containsKey(ByteKey key) {
-    return key.compareTo(startKey) >= 0 && endsAfterKey(key);
-  }
-
-  /**
-   * Returns {@code true} if the specified {@link ByteKeyRange} overlaps this range.
-   */
-  public Boolean overlaps(ByteKeyRange other) {
-    // If each range starts before the other range ends, then they must overlap.
-    //     { [] } -- one range inside the other   OR   { [ } ] -- partial overlap.
-    return endsAfterKey(other.startKey) && other.endsAfterKey(startKey);
-  }
-
-  /**
-   * Returns a list of up to {@code numSplits + 1} {@link ByteKey ByteKeys} in ascending order,
-   * where the keys have been interpolated to form roughly equal sub-ranges of this
-   * {@link ByteKeyRange}, assuming a uniform distribution of keys within this range.
-   *
-   * <p>The first {@link ByteKey} in the result is guaranteed to be equal to {@link #getStartKey},
-   * and the last {@link ByteKey} in the result is guaranteed to be equal to {@link #getEndKey}.
-   * Thus the resulting list exactly spans the same key range as this {@link ByteKeyRange}.
-   *
-   * <p>Note that the number of keys returned is not always equal to {@code numSplits + 1}.
-   * Specifically, if this range is unsplittable (e.g., because the start and end keys are equal
-   * up to padding by zero bytes), the list returned will only contain the start and end key.
-   *
-   * @throws IllegalArgumentException if the specified number of splits is < 1
-   * @see ByteKeyRange the ByteKeyRange class Javadoc for more information about split semantics.
-   */
-  public List<ByteKey> split(int numSplits) {
-    checkArgument(numSplits > 0, "numSplits %s must be a positive integer", numSplits);
-
-    try {
-      ImmutableList.Builder<ByteKey> ret = ImmutableList.builder();
-      ret.add(startKey);
-      for (int i = 1; i < numSplits; ++i) {
-        ret.add(interpolateKey(i / (double) numSplits));
-      }
-      ret.add(endKey);
-      return ret.build();
-    } catch (IllegalStateException e) {
-      // The range is not splittable -- just return
-      return ImmutableList.of(startKey, endKey);
-    }
-  }
-
-  /**
-   * Returns the fraction of this range {@code [startKey, endKey)} that is in the interval
-   * {@code [startKey, key)}.
-   *
-   * @throws IllegalArgumentException if {@code key} does not fall within this range
-   * @see ByteKeyRange the ByteKeyRange class Javadoc for more information about fraction semantics.
-   */
-  public double estimateFractionForKey(ByteKey key) {
-    checkNotNull(key, "key");
-    checkArgument(!key.isEmpty(), "Cannot compute fraction for an empty key");
-    checkArgument(
-        key.compareTo(startKey) >= 0, "Expected key %s >= range start key %s", key, startKey);
-
-    if (key.equals(endKey)) {
-      return 1.0;
-    }
-    checkArgument(containsKey(key), "Cannot compute fraction for %s outside this %s", key, this);
-
-    byte[] startBytes = startKey.getBytes();
-    byte[] endBytes = endKey.getBytes();
-    byte[] keyBytes = key.getBytes();
-    // If the endKey is unspecified, add a leading 1 byte to it and a leading 0 byte to all other
-    // keys, to get a concrete least upper bound for the desired range.
-    if (endKey.isEmpty()) {
-      startBytes = addHeadByte(startBytes, (byte) 0);
-      endBytes = addHeadByte(endBytes, (byte) 1);
-      keyBytes = addHeadByte(keyBytes, (byte) 0);
-    }
-
-    // Pad to the longest of all 3 keys.
-    int paddedKeyLength = Math.max(Math.max(startBytes.length, endBytes.length), keyBytes.length);
-    BigInteger rangeStartInt = paddedPositiveInt(startBytes, paddedKeyLength);
-    BigInteger rangeEndInt = paddedPositiveInt(endBytes, paddedKeyLength);
-    BigInteger keyInt = paddedPositiveInt(keyBytes, paddedKeyLength);
-
-    // Keys are equal subject to padding by 0.
-    BigInteger range = rangeEndInt.subtract(rangeStartInt);
-    if (range.equals(BigInteger.ZERO)) {
-      logger.warn(
-          "Using 0.0 as the default fraction for this near-empty range {} where start and end keys"
-              + " differ only by trailing zeros.",
-          this);
-      return 0.0;
-    }
-
-    // Compute the progress (key-start)/(end-start) scaling by 2^64, dividing (which rounds),
-    // and then scaling down after the division. This gives ample precision when converted to
-    // double.
-    BigInteger progressScaled = keyInt.subtract(rangeStartInt).shiftLeft(64);
-    return progressScaled.divide(range).doubleValue() / Math.pow(2, 64);
-  }
-
-  /**
-   * Returns a {@link ByteKey} {@code key} such that {@code [startKey, key)} represents
-   * approximately the specified fraction of the range {@code [startKey, endKey)}. The interpolation
-   * is computed assuming a uniform distribution of keys.
-   *
-   * <p>For example, given the largest possible range (defined by empty start and end keys), the
-   * fraction {@code 0.5} will return the {@code ByteKey.of(0x80)}, which will also be returned for
-   * ranges {@code [0x40, 0xc0)} and {@code [0x6f, 0x91)}.
-   *
-   * <p>The key returned will never be empty.
-   *
-   * @throws IllegalArgumentException if {@code fraction} is outside the range [0, 1)
-   * @throws IllegalStateException if this range cannot be interpolated
-   * @see ByteKeyRange the ByteKeyRange class Javadoc for more information about fraction semantics.
-   */
-  public ByteKey interpolateKey(double fraction) {
-    checkArgument(
-        fraction >= 0.0 && fraction < 1.0, "Fraction %s must be in the range [0, 1)", fraction);
-    byte[] startBytes = startKey.getBytes();
-    byte[] endBytes = endKey.getBytes();
-    // If the endKey is unspecified, add a leading 1 byte to it and a leading 0 byte to all other
-    // keys, to get a concrete least upper bound for the desired range.
-    if (endKey.isEmpty()) {
-      startBytes = addHeadByte(startBytes, (byte) 0);
-      endBytes = addHeadByte(endBytes, (byte) 1);
-    }
-
-    // Pad to the longest key.
-    int paddedKeyLength = Math.max(startBytes.length, endBytes.length);
-    BigInteger rangeStartInt = paddedPositiveInt(startBytes, paddedKeyLength);
-    BigInteger rangeEndInt = paddedPositiveInt(endBytes, paddedKeyLength);
-
-    // If the keys are equal subject to padding by 0, we can't interpolate.
-    BigInteger range = rangeEndInt.subtract(rangeStartInt);
-    checkState(
-        !range.equals(BigInteger.ZERO),
-        "Refusing to interpolate for near-empty %s where start and end keys differ only by trailing"
-            + " zero bytes.",
-        this);
-
-    // Add precision so that range is at least 53 (double mantissa length) bits long. This way, we
-    // can interpolate small ranges finely, e.g., split the range key 3 to key 4 into 1024 parts.
-    // We add precision to range by adding zero bytes to the end of the keys, aka shifting the
-    // underlying BigInteger left by a multiple of 8 bits.
-    int bytesNeeded = ((53 - range.bitLength()) + 7) / 8;
-    if (bytesNeeded > 0) {
-      range = range.shiftLeft(bytesNeeded * 8);
-      rangeStartInt = rangeStartInt.shiftLeft(bytesNeeded * 8);
-      paddedKeyLength += bytesNeeded;
-    }
-
-    BigInteger interpolatedOffset =
-        new BigDecimal(range).multiply(BigDecimal.valueOf(fraction)).toBigInteger();
-
-    int outputKeyLength = endKey.isEmpty() ? (paddedKeyLength - 1) : paddedKeyLength;
-    return ByteKey.copyFrom(
-        fixupHeadZeros(rangeStartInt.add(interpolatedOffset).toByteArray(), outputKeyLength));
-  }
-
-  /**
-   * Returns new {@link ByteKeyRange} like this one, but with the specified start key.
-   */
-  public ByteKeyRange withStartKey(ByteKey startKey) {
-    return new ByteKeyRange(startKey, endKey);
-  }
-
-  /**
-   * Returns new {@link ByteKeyRange} like this one, but with the specified end key.
-   */
-  public ByteKeyRange withEndKey(ByteKey endKey) {
-    return new ByteKeyRange(startKey, endKey);
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////
-  private final ByteKey startKey;
-  private final ByteKey endKey;
-
-  private ByteKeyRange(ByteKey startKey, ByteKey endKey) {
-    this.startKey = checkNotNull(startKey, "startKey");
-    this.endKey = checkNotNull(endKey, "endKey");
-    checkArgument(endsAfterKey(startKey), "Start %s must be less than end %s", startKey, endKey);
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(ByteKeyRange.class)
-        .add("startKey", startKey)
-        .add("endKey", endKey)
-        .toString();
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (o == this) {
-      return true;
-    }
-    if (!(o instanceof ByteKeyRange)) {
-      return false;
-    }
-    ByteKeyRange other = (ByteKeyRange) o;
-    return Objects.equals(startKey, other.startKey) && Objects.equals(endKey, other.endKey);
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(startKey, endKey);
-  }
-
-  /**
-   * Returns a copy of the specified array with the specified byte added at the front.
-   */
-  private static byte[] addHeadByte(byte[] array, byte b) {
-    byte[] ret = new byte[array.length + 1];
-    ret[0] = b;
-    System.arraycopy(array, 0, ret, 1, array.length);
-    return ret;
-  }
-
-  /**
-   * Ensures the array is exactly {@code size} bytes long. Returns the input array if the condition
-   * is met, otherwise either adds or removes zero bytes from the beginning of {@code array}.
-   */
-  private static byte[] fixupHeadZeros(byte[] array, int size) {
-    int padding = size - array.length;
-    if (padding == 0) {
-      return array;
-    }
-
-    if (padding < 0) {
-      // There is one zero byte at the beginning, added by BigInteger to make there be a sign
-      // bit when converting to bytes.
-      verify(
-          padding == -1,
-          "key %s: expected length %d with exactly one byte of padding, found %d",
-          ByteKey.copyFrom(array),
-          size,
-          -padding);
-      verify(
-          (array[0] == 0) && ((array[1] & 0x80) == 0x80),
-          "key %s: is 1 byte longer than expected, indicating BigInteger padding. Expect first byte"
-              + " to be zero with set MSB in second byte.",
-          ByteKey.copyFrom(array));
-      return Arrays.copyOfRange(array, 1, array.length);
-    }
-
-    byte[] ret = new byte[size];
-    System.arraycopy(array, 0, ret, padding, array.length);
-    return ret;
-  }
-
-  /**
-   * Returns {@code true} when the specified {@code key} is smaller this range's end key. The only
-   * semantic change from {@code (key.compareTo(getEndKey()) < 0)} is that the empty end key is
-   * treated as larger than all possible {@link ByteKey keys}.
-   */
-  boolean endsAfterKey(ByteKey key) {
-    return endKey.isEmpty() || key.compareTo(endKey) < 0;
-  }
-
-  /** Builds a BigInteger out of the specified array, padded to the desired byte length. */
-  private static BigInteger paddedPositiveInt(byte[] bytes, int length) {
-    int bytePaddingNeeded = length - bytes.length;
-    checkArgument(
-        bytePaddingNeeded >= 0, "Required bytes.length {} < length {}", bytes.length, length);
-    BigInteger ret = new BigInteger(1, bytes);
-    return (bytePaddingNeeded == 0) ? ret : ret.shiftLeft(8 * bytePaddingNeeded);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java
deleted file mode 100644
index f6796cc..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/ByteKeyRangeTracker.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io.range;
-
-import static com.google.common.base.MoreObjects.toStringHelper;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link RangeTracker} for {@link ByteKey ByteKeys} in {@link ByteKeyRange ByteKeyRanges}.
- *
- * @see ByteKey
- * @see ByteKeyRange
- */
-public final class ByteKeyRangeTracker implements RangeTracker<ByteKey> {
-  private static final Logger logger = LoggerFactory.getLogger(ByteKeyRangeTracker.class);
-
-  /** Instantiates a new {@link ByteKeyRangeTracker} with the specified range. */
-  public static ByteKeyRangeTracker of(ByteKeyRange range) {
-    return new ByteKeyRangeTracker(range);
-  }
-
-  @Override
-  public synchronized ByteKey getStartPosition() {
-    return range.getStartKey();
-  }
-
-  @Override
-  public synchronized ByteKey getStopPosition() {
-    return range.getEndKey();
-  }
-
-  @Override
-  public synchronized boolean tryReturnRecordAt(boolean isAtSplitPoint, ByteKey recordStart) {
-    if (isAtSplitPoint && !range.containsKey(recordStart)) {
-      return false;
-    }
-    position = recordStart;
-    return true;
-  }
-
-  @Override
-  public synchronized boolean trySplitAtPosition(ByteKey splitPosition) {
-    // Unstarted.
-    if (position == null) {
-      logger.warn(
-          "{}: Rejecting split request at {} because no records have been returned.",
-          this,
-          splitPosition);
-      return false;
-    }
-
-    // Started, but not after current position.
-    if (splitPosition.compareTo(position) <= 0) {
-      logger.warn(
-          "{}: Rejecting split request at {} because it is not after current position {}.",
-          this,
-          splitPosition,
-          position);
-      return false;
-    }
-
-    // Sanity check.
-    if (!range.containsKey(splitPosition)) {
-      logger.warn(
-          "{}: Rejecting split request at {} because it is not within the range.",
-          this,
-          splitPosition);
-      return false;
-    }
-
-    range = range.withEndKey(splitPosition);
-    return true;
-  }
-
-  @Override
-  public synchronized double getFractionConsumed() {
-    if (position == null) {
-      return 0;
-    }
-    return range.estimateFractionForKey(position);
-  }
-
-  ///////////////////////////////////////////////////////////////////////////////
-  private ByteKeyRange range;
-  @Nullable private ByteKey position;
-
-  private ByteKeyRangeTracker(ByteKeyRange range) {
-    this.range = range;
-    this.position = null;
-  }
-
-  @Override
-  public String toString() {
-    return toStringHelper(ByteKeyRangeTracker.class)
-        .add("range", range)
-        .add("position", position)
-        .toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
deleted file mode 100644
index b237217..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/OffsetRangeTracker.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.io.range;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A {@link RangeTracker} for non-negative positions of type {@code long}.
- */
-public class OffsetRangeTracker implements RangeTracker<Long> {
-  private static final Logger LOG = LoggerFactory.getLogger(OffsetRangeTracker.class);
-
-  private final long startOffset;
-  private long stopOffset;
-  private long lastRecordStart = -1L;
-  private long offsetOfLastSplitPoint = -1L;
-
-  /**
-   * Offset corresponding to infinity. This can only be used as the upper-bound of a range, and
-   * indicates reading all of the records until the end without specifying exactly what the end is.
-   *
-   * <p>Infinite ranges cannot be split because it is impossible to estimate progress within them.
-   */
-  public static final long OFFSET_INFINITY = Long.MAX_VALUE;
-
-  /**
-   * Creates an {@code OffsetRangeTracker} for the specified range.
-   */
-  public OffsetRangeTracker(long startOffset, long stopOffset) {
-    this.startOffset = startOffset;
-    this.stopOffset = stopOffset;
-  }
-
-  @Override
-  public synchronized Long getStartPosition() {
-    return startOffset;
-  }
-
-  @Override
-  public synchronized Long getStopPosition() {
-    return stopOffset;
-  }
-
-  @Override
-  public boolean tryReturnRecordAt(boolean isAtSplitPoint, Long recordStart) {
-    return tryReturnRecordAt(isAtSplitPoint, recordStart.longValue());
-  }
-
-  public synchronized boolean tryReturnRecordAt(boolean isAtSplitPoint, long recordStart) {
-    if (lastRecordStart == -1 && !isAtSplitPoint) {
-      throw new IllegalStateException(
-          String.format("The first record [starting at %d] must be at a split point", recordStart));
-    }
-    if (recordStart < lastRecordStart) {
-      throw new IllegalStateException(
-          String.format(
-              "Trying to return record [starting at %d] "
-                  + "which is before the last-returned record [starting at %d]",
-              recordStart,
-              lastRecordStart));
-    }
-    if (isAtSplitPoint) {
-      if (offsetOfLastSplitPoint != -1L && recordStart == offsetOfLastSplitPoint) {
-        throw new IllegalStateException(
-            String.format(
-                "Record at a split point has same offset as the previous split point: "
-                    + "previous split point at %d, current record starts at %d",
-                offsetOfLastSplitPoint, recordStart));
-      }
-      if (recordStart >= stopOffset) {
-        return false;
-      }
-      offsetOfLastSplitPoint = recordStart;
-    }
-
-    lastRecordStart = recordStart;
-    return true;
-  }
-
-  @Override
-  public boolean trySplitAtPosition(Long splitOffset) {
-    return trySplitAtPosition(splitOffset.longValue());
-  }
-
-  public synchronized boolean trySplitAtPosition(long splitOffset) {
-    if (stopOffset == OFFSET_INFINITY) {
-      LOG.debug("Refusing to split {} at {}: stop position unspecified", this, splitOffset);
-      return false;
-    }
-    if (lastRecordStart == -1) {
-      LOG.debug("Refusing to split {} at {}: unstarted", this, splitOffset);
-      return false;
-    }
-
-    // Note: technically it is correct to split at any position after the last returned
-    // split point, not just the last returned record.
-    // TODO: Investigate whether in practice this is useful or, rather, confusing.
-    if (splitOffset <= lastRecordStart) {
-      LOG.debug(
-          "Refusing to split {} at {}: already past proposed split position", this, splitOffset);
-      return false;
-    }
-    if (splitOffset < startOffset || splitOffset >= stopOffset) {
-      LOG.debug(
-          "Refusing to split {} at {}: proposed split position out of range", this, splitOffset);
-      return false;
-    }
-    LOG.debug("Agreeing to split {} at {}", this, splitOffset);
-    this.stopOffset = splitOffset;
-    return true;
-  }
-
-  /**
-   * Returns a position {@code P} such that the range {@code [start, P)} represents approximately
-   * the given fraction of the range {@code [start, end)}. Assumes that the density of records
-   * in the range is approximately uniform.
-   */
-  public synchronized long getPositionForFractionConsumed(double fraction) {
-    if (stopOffset == OFFSET_INFINITY) {
-      throw new IllegalArgumentException(
-          "getPositionForFractionConsumed is not applicable to an unbounded range: " + this);
-    }
-    return (long) Math.ceil(startOffset + fraction * (stopOffset - startOffset));
-  }
-
-  @Override
-  public synchronized double getFractionConsumed() {
-    if (stopOffset == OFFSET_INFINITY) {
-      return 0.0;
-    }
-    if (lastRecordStart == -1) {
-      return 0.0;
-    }
-    // E.g., when reading [3, 6) and lastRecordStart is 4, that means we consumed 3,4 of 3,4,5
-    // which is (4 - 3 + 1) / (6 - 3) = 67%.
-    // Also, clamp to at most 1.0 because the last consumed position can extend past the
-    // stop position.
-    return Math.min(1.0, 1.0 * (lastRecordStart - startOffset + 1) / (stopOffset - startOffset));
-  }
-
-  @Override
-  public synchronized String toString() {
-    String stopString = (stopOffset == OFFSET_INFINITY) ? "infinity" : String.valueOf(stopOffset);
-    if (lastRecordStart >= 0) {
-      return String.format(
-          "<at [starting at %d] of offset range [%d, %s)>",
-          lastRecordStart,
-          startOffset,
-          stopString);
-    } else {
-      return String.format("<unstarted in offset range [%d, %s)>", startOffset, stopString);
-    }
-  }
-
-  /**
-   * Returns a copy of this tracker for testing purposes (to simplify testing methods with
-   * side effects).
-   */
-  @VisibleForTesting
-  OffsetRangeTracker copy() {
-    OffsetRangeTracker res = new OffsetRangeTracker(startOffset, stopOffset);
-    res.lastRecordStart = this.lastRecordStart;
-    return res;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java
deleted file mode 100644
index 84359f1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/RangeTracker.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.io.range;
-
-/**
- * A {@code RangeTracker} is a thread-safe helper object for implementing dynamic work rebalancing
- * in position-based {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader}
- * subclasses.
- *
- * <h3>Usage of the RangeTracker class hierarchy</h3>
- * The abstract {@code RangeTracker} interface should not be used per se - all users should use its
- * subclasses directly. We declare it here because all subclasses have roughly the same interface
- * and the same properties, to centralize the documentation. Currently we provide one
- * implementation - {@link OffsetRangeTracker}.
- *
- * <h3>Position-based sources</h3>
- * A position-based source is one where the source can be described by a range of positions of
- * an ordered type and the records returned by the reader can be described by positions of the
- * same type.
- *
- * <p>In case a record occupies a range of positions in the source, the most important thing about
- * the record is the position where it starts.
- *
- * <p>Defining the semantics of positions for a source is entirely up to the source class, however
- * the chosen definitions have to obey certain properties in order to make it possible to correctly
- * split the source into parts, including dynamic splitting. Two main aspects need to be defined:
- * <ul>
- *   <li>How to assign starting positions to records.
- *   <li>Which records should be read by a source with a range {@code [A, B)}.
- * </ul>
- * Moreover, reading a range must be <i>efficient</i>, i.e., the performance of reading a range
- * should not significantly depend on the location of the range. For example, reading the range
- * {@code [A, B)} should not require reading all data before {@code A}.
- *
- * <p>The sections below explain exactly what properties these definitions must satisfy, and
- * how to use a {@code RangeTracker} with a properly defined source.
- *
- * <h3>Properties of position-based sources</h3>
- * The main requirement for position-based sources is <i>associativity</i>: reading records from
- * {@code [A, B)} and records from {@code [B, C)} should give the same records as reading from
- * {@code [A, C)}, where {@code A <= B <= C}. This property ensures that no matter how a range
- * of positions is split into arbitrarily many sub-ranges, the total set of records described by
- * them stays the same.
- *
- * <p>The other important property is how the source's range relates to positions of records in
- * the source. In many sources each record can be identified by a unique starting position.
- * In this case:
- * <ul>
- *   <li>All records returned by a source {@code [A, B)} must have starting positions
- *   in this range.
- *   <li>All but the last record should end within this range. The last record may or may not
- *   extend past the end of the range.
- *   <li>Records should not overlap.
- * </ul>
- * Such sources should define "read {@code [A, B)}" as "read from the first record starting at or
- * after A, up to but not including the first record starting at or after B".
- *
- * <p>Some examples of such sources include reading lines or CSV from a text file, reading keys and
- * values from a BigTable, etc.
- *
- * <p>The concept of <i>split points</i> allows to extend the definitions for dealing with sources
- * where some records cannot be identified by a unique starting position.
- *
- * <p>In all cases, all records returned by a source {@code [A, B)} must <i>start</i> at or after
- * {@code A}.
- *
- * <h3>Split points</h3>
- *
- * <p>Some sources may have records that are not directly addressable. For example, imagine a file
- * format consisting of a sequence of compressed blocks. Each block can be assigned an offset, but
- * records within the block cannot be directly addressed without decompressing the block. Let us
- * refer to this hypothetical format as <i>CBF (Compressed Blocks Format)</i>.
- *
- * <p>Many such formats can still satisfy the associativity property. For example, in CBF, reading
- * {@code [A, B)} can mean "read all the records in all blocks whose starting offset is in
- * {@code [A, B)}".
- *
- * <p>To support such complex formats, we introduce the notion of <i>split points</i>. We say that
- * a record is a split point if there exists a position {@code A} such that the record is the first
- * one to be returned when reading the range  {@code [A, infinity)}. In CBF, the only split points
- * would be the first records in each block.
- *
- * <p>Split points allow us to define the meaning of a record's position and a source's range
- * in all cases:
- * <ul>
- *   <li>For a record that is at a split point, its position is defined to be the largest
- *   {@code A} such that reading a source with the range {@code [A, infinity)} returns this record;
- *   <li>Positions of other records are only required to be non-decreasing;
- *   <li>Reading the source {@code [A, B)} must return records starting from the first split point
- *   at or after {@code A}, up to but not including the first split point at or after {@code B}.
- *   In particular, this means that the first record returned by a source MUST always be
- *   a split point.
- *   <li>Positions of split points must be unique.
- * </ul>
- * As a result, for any decomposition of the full range of the source into position ranges, the
- * total set of records will be the full set of records in the source, and each record
- * will be read exactly once.
- *
- * <h3>Consumed positions</h3>
- * As the source is being read, and records read from it are being passed to the downstream
- * transforms in the pipeline, we say that positions in the source are being <i>consumed</i>.
- * When a reader has read a record (or promised to a caller that a record will be returned),
- * positions up to and including the record's start position are considered <i>consumed</i>.
- *
- * <p>Dynamic splitting can happen only at <i>unconsumed</i> positions. If the reader just
- * returned a record at offset 42 in a file, dynamic splitting can happen only at offset 43 or
- * beyond, as otherwise that record could be read twice (by the current reader and by a reader
- * of the task starting at 43).
- *
- * <h3>Example</h3>
- * The following example uses an {@link OffsetRangeTracker} to support dynamically splitting
- * a source with integer positions (offsets).
- * <pre> {@code
- *   class MyReader implements BoundedReader<Foo> {
- *     private MySource currentSource;
- *     private final OffsetRangeTracker tracker = new OffsetRangeTracker();
- *     ...
- *     MyReader(MySource source) {
- *       this.currentSource = source;
- *       this.tracker = new MyRangeTracker<>(source.getStartOffset(), source.getEndOffset())
- *     }
- *     ...
- *     boolean start() {
- *       ... (general logic for locating the first record) ...
- *       if (!tracker.tryReturnRecordAt(true, recordStartOffset)) return false;
- *       ... (any logic that depends on the record being returned, e.g. counting returned records)
- *       return true;
- *     }
- *     boolean advance() {
- *       ... (general logic for locating the next record) ...
- *       if (!tracker.tryReturnRecordAt(isAtSplitPoint, recordStartOffset)) return false;
- *       ... (any logic that depends on the record being returned, e.g. counting returned records)
- *       return true;
- *     }
- *
- *     double getFractionConsumed() {
- *       return tracker.getFractionConsumed();
- *     }
- *   }
- * } </pre>
- *
- * <h3>Usage with different models of iteration</h3>
- * When using this class to protect a
- * {@link com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader}, follow the pattern
- * described above.
- *
- * <p>When using this class to protect iteration in the {@code hasNext()/next()}
- * model, consider the record consumed when {@code hasNext()} is about to return true, rather than
- * when {@code next()} is called, because {@code hasNext()} returning true is promising the caller
- * that {@code next()} will have an element to return - so {@link #trySplitAtPosition} must not
- * split the range in a way that would make the record promised by {@code hasNext()} belong to
- * a different range.
- *
- * <p>Also note that implementations of {@code hasNext()} need to ensure
- * that they call {@link #tryReturnRecordAt} only once even if {@code hasNext()} is called
- * repeatedly, due to the requirement on uniqueness of split point positions.
- *
- * @param <PositionT> Type of positions used by the source to define ranges and identify records.
- */
-public interface RangeTracker<PositionT> {
-  /**
-   * Returns the starting position of the current range, inclusive.
-   */
-  PositionT getStartPosition();
-
-  /**
-   * Returns the ending position of the current range, exclusive.
-   */
-  PositionT getStopPosition();
-
-  /**
-   * Atomically determines whether a record at the given position can be returned and updates
-   * internal state. In particular:
-   * <ul>
-   *   <li>If {@code isAtSplitPoint} is {@code true}, and {@code recordStart} is outside the current
-   *   range, returns {@code false};
-   *   <li>Otherwise, updates the last-consumed position to {@code recordStart} and returns
-   *   {@code true}.
-   * </ul>
-   * <p>This method MUST be called on all split point records. It may be called on every record.
-   */
-  boolean tryReturnRecordAt(boolean isAtSplitPoint, PositionT recordStart);
-
-  /**
-   * Atomically splits the current range [{@link #getStartPosition}, {@link #getStopPosition})
-   * into a "primary" part [{@link #getStartPosition}, {@code splitPosition})
-   * and a "residual" part [{@code splitPosition}, {@link #getStopPosition}), assuming the current
-   * last-consumed position is within [{@link #getStartPosition}, splitPosition)
-   * (i.e., {@code splitPosition} has not been consumed yet).
-   *
-   * <p>Updates the current range to be the primary and returns {@code true}. This means that
-   * all further calls on the current object will interpret their arguments relative to the
-   * primary range.
-   *
-   * <p>If the split position has already been consumed, or if no {@link #tryReturnRecordAt} call
-   * was made yet, returns {@code false}. The second condition is to prevent dynamic splitting
-   * during reader start-up.
-   */
-  boolean trySplitAtPosition(PositionT splitPosition);
-
-  /**
-   * Returns the approximate fraction of positions in the source that have been consumed by
-   * successful {@link #tryReturnRecordAt} calls, or 0.0 if no such calls have happened.
-   */
-  double getFractionConsumed();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java
deleted file mode 100644
index beb77bf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/range/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Provides thread-safe helpers for implementing dynamic work rebalancing in position-based
- * bounded sources.
- *
- * <p>See {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} to get started.
- */
-package com.google.cloud.dataflow.sdk.io.range;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
deleted file mode 100644
index 60d62d3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/ApplicationNameOptions.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-/**
- * Options that allow setting the application name.
- */
-public interface ApplicationNameOptions extends PipelineOptions {
-  /**
-   * Name of application, for display purposes.
-   *
-   * <p>Defaults to the name of the class that constructs the {@link PipelineOptions}
-   * via the {@link PipelineOptionsFactory}.
-   */
-  @Description("Name of application for display purposes. Defaults to the name of the class that "
-      + "constructs the PipelineOptions via the PipelineOptionsFactory.")
-  String getAppName();
-  void setAppName(String value);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
deleted file mode 100644
index ed4eb24..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BigQueryOptions.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-/**
- * Properties needed when using BigQuery with the Dataflow SDK.
- */
-@Description("Options that are used to configure BigQuery. See "
-    + "https://cloud.google.com/bigquery/what-is-bigquery for details on BigQuery.")
-public interface BigQueryOptions extends ApplicationNameOptions, GcpOptions,
-    PipelineOptions, StreamingOptions {
-  @Description("Temporary dataset for BigQuery table operations. "
-      + "Supported values are \"bigquery.googleapis.com/{dataset}\"")
-  @Default.String("bigquery.googleapis.com/cloud_dataflow")
-  String getTempDatasetId();
-  void setTempDatasetId(String value);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
deleted file mode 100644
index 43a46b0..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/BlockingDataflowPipelineOptions.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.runners.BlockingDataflowPipelineRunner;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-import java.io.PrintStream;
-
-/**
- * Options that are used to configure the {@link BlockingDataflowPipelineRunner}.
- */
-@Description("Configure options on the BlockingDataflowPipelineRunner.")
-public interface BlockingDataflowPipelineOptions extends DataflowPipelineOptions {
-  /**
-   * Output stream for job status messages.
-   */
-  @Description("Where messages generated during execution of the Dataflow job will be output.")
-  @JsonIgnore
-  @Hidden
-  @Default.InstanceFactory(StandardOutputFactory.class)
-  PrintStream getJobMessageOutput();
-  void setJobMessageOutput(PrintStream value);
-
-  /**
-   * Returns a default of {@link System#out}.
-   */
-  public static class StandardOutputFactory implements DefaultValueFactory<PrintStream> {
-    @Override
-    public PrintStream create(PipelineOptions options) {
-      return System.out;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
deleted file mode 100644
index 2e1ad94..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/CloudDebuggerOptions.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.api.services.clouddebugger.v2.model.Debuggee;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-
-import javax.annotation.Nullable;
-
-/**
- * Options for controlling Cloud Debugger.
- */
-@Description("[Experimental] Used to configure the Cloud Debugger")
-@Experimental
-@Hidden
-public interface CloudDebuggerOptions {
-
-  /**
-   * Whether to enable the Cloud Debugger snapshot agent for the current job.
-   */
-  @Description("Whether to enable the Cloud Debugger snapshot agent for the current job.")
-  boolean getEnableCloudDebugger();
-  void setEnableCloudDebugger(boolean enabled);
-
-  @Description("The Cloud Debugger debugee to associate with. This should not be set directly.")
-  @Hidden
-  @Nullable Debuggee getDebuggee();
-  void setDebuggee(Debuggee debuggee);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
deleted file mode 100644
index cadc011..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineDebugOptions.java
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.api.services.dataflow.Dataflow;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.util.DataflowPathValidator;
-import com.google.cloud.dataflow.sdk.util.GcsStager;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.util.PathValidator;
-import com.google.cloud.dataflow.sdk.util.Stager;
-import com.google.cloud.dataflow.sdk.util.Transport;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * Internal. Options used to control execution of the Dataflow SDK for
- * debugging and testing purposes.
- */
-@Description("[Internal] Options used to control execution of the Dataflow SDK for "
-    + "debugging and testing purposes.")
-@Hidden
-public interface DataflowPipelineDebugOptions extends PipelineOptions {
-
-  /**
-   * The list of backend experiments to enable.
-   *
-   * <p>Dataflow provides a number of experimental features that can be enabled
-   * with this flag.
-   *
-   * <p>Please sync with the Dataflow team before enabling any experiments.
-   */
-  @Description("[Experimental] Dataflow provides a number of experimental features that can "
-      + "be enabled with this flag. Please sync with the Dataflow team before enabling any "
-      + "experiments.")
-  @Experimental
-  List<String> getExperiments();
-  void setExperiments(List<String> value);
-
-  /**
-   * The root URL for the Dataflow API. {@code dataflowEndpoint} can override this value
-   * if it contains an absolute URL, otherwise {@code apiRootUrl} will be combined with
-   * {@code dataflowEndpoint} to generate the full URL to communicate with the Dataflow API.
-   */
-  @Description("The root URL for the Dataflow API. dataflowEndpoint can override this "
-      + "value if it contains an absolute URL, otherwise apiRootUrl will be combined with "
-      + "dataflowEndpoint to generate the full URL to communicate with the Dataflow API.")
-  @Default.String(Dataflow.DEFAULT_ROOT_URL)
-  String getApiRootUrl();
-  void setApiRootUrl(String value);
-
-  /**
-   * Dataflow endpoint to use.
-   *
-   * <p>Defaults to the current version of the Google Cloud Dataflow
-   * API, at the time the current SDK version was released.
-   *
-   * <p>If the string contains "://", then this is treated as a URL,
-   * otherwise {@link #getApiRootUrl()} is used as the root
-   * URL.
-   */
-  @Description("The URL for the Dataflow API. If the string contains \"://\", this"
-      + " will be treated as the entire URL, otherwise will be treated relative to apiRootUrl.")
-  @Default.String(Dataflow.DEFAULT_SERVICE_PATH)
-  String getDataflowEndpoint();
-  void setDataflowEndpoint(String value);
-
-  /**
-   * The path to write the translated Dataflow job specification out to
-   * at job submission time. The Dataflow job specification will be represented in JSON
-   * format.
-   */
-  @Description("The path to write the translated Dataflow job specification out to "
-      + "at job submission time. The Dataflow job specification will be represented in JSON "
-      + "format.")
-  String getDataflowJobFile();
-  void setDataflowJobFile(String value);
-
-  /**
-   * The class of the validator that should be created and used to validate paths.
-   * If pathValidator has not been set explicitly, an instance of this class will be
-   * constructed and used as the path validator.
-   */
-  @Description("The class of the validator that should be created and used to validate paths. "
-      + "If pathValidator has not been set explicitly, an instance of this class will be "
-      + "constructed and used as the path validator.")
-  @Default.Class(DataflowPathValidator.class)
-  Class<? extends PathValidator> getPathValidatorClass();
-  void setPathValidatorClass(Class<? extends PathValidator> validatorClass);
-
-  /**
-   * The path validator instance that should be used to validate paths.
-   * If no path validator has been set explicitly, the default is to use the instance factory that
-   * constructs a path validator based upon the currently set pathValidatorClass.
-   */
-  @JsonIgnore
-  @Description("The path validator instance that should be used to validate paths. "
-      + "If no path validator has been set explicitly, the default is to use the instance factory "
-      + "that constructs a path validator based upon the currently set pathValidatorClass.")
-  @Default.InstanceFactory(PathValidatorFactory.class)
-  PathValidator getPathValidator();
-  void setPathValidator(PathValidator validator);
-
-  /**
-   * The class responsible for staging resources to be accessible by workers
-   * during job execution. If stager has not been set explicitly, an instance of this class
-   * will be created and used as the resource stager.
-   */
-  @Description("The class of the stager that should be created and used to stage resources. "
-      + "If stager has not been set explicitly, an instance of the this class will be created "
-      + "and used as the resource stager.")
-  @Default.Class(GcsStager.class)
-  Class<? extends Stager> getStagerClass();
-  void setStagerClass(Class<? extends Stager> stagerClass);
-
-  /**
-   * The resource stager instance that should be used to stage resources.
-   * If no stager has been set explicitly, the default is to use the instance factory
-   * that constructs a resource stager based upon the currently set stagerClass.
-   */
-  @JsonIgnore
-  @Description("The resource stager instance that should be used to stage resources. "
-      + "If no stager has been set explicitly, the default is to use the instance factory "
-      + "that constructs a resource stager based upon the currently set stagerClass.")
-  @Default.InstanceFactory(StagerFactory.class)
-  Stager getStager();
-  void setStager(Stager stager);
-
-  /**
-   * An instance of the Dataflow client. Defaults to creating a Dataflow client
-   * using the current set of options.
-   */
-  @JsonIgnore
-  @Description("An instance of the Dataflow client. Defaults to creating a Dataflow client "
-      + "using the current set of options.")
-  @Default.InstanceFactory(DataflowClientFactory.class)
-  Dataflow getDataflowClient();
-  void setDataflowClient(Dataflow value);
-
-  /** Returns the default Dataflow client built from the passed in PipelineOptions. */
-  public static class DataflowClientFactory implements DefaultValueFactory<Dataflow> {
-    @Override
-    public Dataflow create(PipelineOptions options) {
-        return Transport.newDataflowClient(options.as(DataflowPipelineOptions.class)).build();
-    }
-  }
-
-  /**
-   * Root URL for use with the Pubsub API.
-   */
-  @Description("Root URL for use with the Pubsub API")
-  @Default.String("https://pubsub.googleapis.com")
-  String getPubsubRootUrl();
-  void setPubsubRootUrl(String value);
-
-  /**
-   * Whether to update the currently running pipeline with the same name as this one.
-   *
-   * @deprecated This property is replaced by {@link DataflowPipelineOptions#getUpdate()}
-   */
-  @Deprecated
-  @Description("If set, replace the existing pipeline with the name specified by --jobName with "
-      + "this pipeline, preserving state.")
-  boolean getUpdate();
-  @Deprecated
-  void setUpdate(boolean value);
-
-  /**
-   * Mapping of old PTranform names to new ones, specified as JSON
-   * <code>{"oldName":"newName",...}</code>. To mark a transform as deleted, make newName the
-   * empty string.
-   */
-  @JsonIgnore
-  @Description(
-      "Mapping of old PTranform names to new ones, specified as JSON "
-      + "{\"oldName\":\"newName\",...}. To mark a transform as deleted, make newName the empty "
-      + "string.")
-  Map<String, String> getTransformNameMapping();
-  void setTransformNameMapping(Map<String, String> value);
-
-  /**
-   * Custom windmill_main binary to use with the streaming runner.
-   */
-  @Description("Custom windmill_main binary to use with the streaming runner")
-  String getOverrideWindmillBinary();
-  void setOverrideWindmillBinary(String value);
-
-  /**
-   * Number of threads to use on the Dataflow worker harness. If left unspecified,
-   * the Dataflow service will compute an appropriate number of threads to use.
-   */
-  @Description("Number of threads to use on the Dataflow worker harness. If left unspecified, "
-      + "the Dataflow service will compute an appropriate number of threads to use.")
-  int getNumberOfWorkerHarnessThreads();
-  void setNumberOfWorkerHarnessThreads(int value);
-
-  /**
-   * If {@literal true}, save a heap dump before killing a thread or process which is GC
-   * thrashing or out of memory. The location of the heap file will either be echoed back
-   * to the user, or the user will be given the opportunity to download the heap file.
-   *
-   * <p>
-   * CAUTION: Heap dumps can of comparable size to the default boot disk. Consider increasing
-   * the boot disk size before setting this flag to true.
-   */
-  @Description("If {@literal true}, save a heap dump before killing a thread or process "
-      + "which is GC thrashing or out of memory.")
-  boolean getDumpHeapOnOOM();
-  void setDumpHeapOnOOM(boolean dumpHeapBeforeExit);
-
-  /**
-   * Creates a {@link PathValidator} object using the class specified in
-   * {@link #getPathValidatorClass()}.
-   */
-  public static class PathValidatorFactory implements DefaultValueFactory<PathValidator> {
-      @Override
-      public PathValidator create(PipelineOptions options) {
-      DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
-      return InstanceBuilder.ofType(PathValidator.class)
-          .fromClass(debugOptions.getPathValidatorClass())
-          .fromFactoryMethod("fromOptions")
-          .withArg(PipelineOptions.class, options)
-          .build();
-    }
-  }
-
-  /**
-   * Creates a {@link Stager} object using the class specified in
-   * {@link #getStagerClass()}.
-   */
-  public static class StagerFactory implements DefaultValueFactory<Stager> {
-      @Override
-      public Stager create(PipelineOptions options) {
-      DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
-      return InstanceBuilder.ofType(Stager.class)
-          .fromClass(debugOptions.getStagerClass())
-          .fromFactoryMethod("fromOptions")
-          .withArg(PipelineOptions.class, options)
-          .build();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
deleted file mode 100644
index 1aa4342..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.runners.DataflowPipeline;
-import com.google.common.base.MoreObjects;
-
-import org.joda.time.DateTimeUtils;
-import org.joda.time.DateTimeZone;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-/**
- * Options that can be used to configure the {@link DataflowPipeline}.
- */
-@Description("Options that configure the Dataflow pipeline.")
-public interface DataflowPipelineOptions extends
-    PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
-    DataflowPipelineWorkerPoolOptions, BigQueryOptions,
-    GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions,
-    DataflowProfilingOptions {
-
-  static final String DATAFLOW_STORAGE_LOCATION = "Dataflow Storage Location";
-
-  @Description("Project id. Required when running a Dataflow in the cloud. "
-      + "See https://cloud.google.com/storage/docs/projects for further details.")
-  @Override
-  @Validation.Required
-  @Default.InstanceFactory(DefaultProjectFactory.class)
-  String getProject();
-  @Override
-  void setProject(String value);
-
-  /**
-   * GCS path for temporary files, e.g. gs://bucket/object
-   *
-   * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
-   *
-   * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
-   * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
-   * {@link #getStagingLocation()}.
-   */
-  @Description("GCS path for temporary files, eg \"gs://bucket/object\". "
-      + "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
-      + "At least one of tempLocation or stagingLocation must be set. If tempLocation is unset, "
-      + "defaults to using stagingLocation.")
-  @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
-  String getTempLocation();
-  void setTempLocation(String value);
-
-  /**
-   * GCS path for staging local files, e.g. gs://bucket/object
-   *
-   * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
-   *
-   * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
-   * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
-   * {@link #getStagingLocation()}.
-   */
-  @Description("GCS path for staging local files, e.g. \"gs://bucket/object\". "
-      + "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
-      + "At least one of stagingLocation or tempLocation must be set. If stagingLocation is unset, "
-      + "defaults to using tempLocation.")
-  @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
-  String getStagingLocation();
-  void setStagingLocation(String value);
-
-  /**
-   * The Dataflow job name is used as an idempotence key within the Dataflow service.
-   * If there is an existing job that is currently active, another active job with the same
-   * name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.
-   */
-  @Description("The Dataflow job name is used as an idempotence key within the Dataflow service. "
-      + "If there is an existing job that is currently active, another active job with the same "
-      + "name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.")
-  @Default.InstanceFactory(JobNameFactory.class)
-  String getJobName();
-  void setJobName(String value);
-
-  /**
-   * Whether to update the currently running pipeline with the same name as this one.
-   */
-  @Override
-  @SuppressWarnings("deprecation") // base class member deprecated in favor of this one.
-  @Description(
-      "If set, replace the existing pipeline with the name specified by --jobName with "
-          + "this pipeline, preserving state.")
-  boolean getUpdate();
-  @Override
-  @SuppressWarnings("deprecation") // base class member deprecated in favor of this one.
-  void setUpdate(boolean value);
-
-  /**
-   * Returns a normalized job name constructed from {@link ApplicationNameOptions#getAppName()}, the
-   * local system user name (if available), and the current time. The normalization makes sure that
-   * the job name matches the required pattern of [a-z]([-a-z0-9]*[a-z0-9])? and length limit of 40
-   * characters.
-   *
-   * <p>This job name factory is only able to generate one unique name per second per application
-   * and user combination.
-   */
-  public static class JobNameFactory implements DefaultValueFactory<String> {
-    private static final DateTimeFormatter FORMATTER =
-        DateTimeFormat.forPattern("MMddHHmmss").withZone(DateTimeZone.UTC);
-
-    @Override
-    public String create(PipelineOptions options) {
-      String appName = options.as(ApplicationNameOptions.class).getAppName();
-      String normalizedAppName = appName == null || appName.length() == 0 ? "dataflow"
-          : appName.toLowerCase()
-                   .replaceAll("[^a-z0-9]", "0")
-                   .replaceAll("^[^a-z]", "a");
-      String userName = MoreObjects.firstNonNull(System.getProperty("user.name"), "");
-      String normalizedUserName = userName.toLowerCase()
-                                          .replaceAll("[^a-z0-9]", "0");
-      String datePart = FORMATTER.print(DateTimeUtils.currentTimeMillis());
-      return normalizedAppName + "-" + normalizedUserName + "-" + datePart;
-    }
-  }
-}

[35/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
deleted file mode 100644
index cd0ebc6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ /dev/null
@@ -1,3003 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.runners;
-
-import static com.google.cloud.dataflow.sdk.util.StringUtils.approximatePTransformName;
-import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
-import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.api.services.clouddebugger.v2.Clouddebugger;
-import com.google.api.services.clouddebugger.v2.model.Debuggee;
-import com.google.api.services.clouddebugger.v2.model.RegisterDebuggeeRequest;
-import com.google.api.services.clouddebugger.v2.model.RegisterDebuggeeResponse;
-import com.google.api.services.dataflow.Dataflow;
-import com.google.api.services.dataflow.model.DataflowPackage;
-import com.google.api.services.dataflow.model.Job;
-import com.google.api.services.dataflow.model.ListJobsResponse;
-import com.google.api.services.dataflow.model.WorkerPool;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
-import com.google.cloud.dataflow.sdk.PipelineResult.State;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.AvroCoder;
-import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.ListCoder;
-import com.google.cloud.dataflow.sdk.coders.MapCoder;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.io.AvroIO;
-import com.google.cloud.dataflow.sdk.io.BigQueryIO;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink;
-import com.google.cloud.dataflow.sdk.io.PubsubIO;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
-import com.google.cloud.dataflow.sdk.io.TextIO;
-import com.google.cloud.dataflow.sdk.io.UnboundedSource;
-import com.google.cloud.dataflow.sdk.io.Write;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
-import com.google.cloud.dataflow.sdk.options.StreamingOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
-import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
-import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
-import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
-import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
-import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
-import com.google.cloud.dataflow.sdk.transforms.Aggregator;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
-import com.google.cloud.dataflow.sdk.transforms.WithKeys;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
-import com.google.cloud.dataflow.sdk.util.PCollectionViews;
-import com.google.cloud.dataflow.sdk.util.PathValidator;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.Reshuffle;
-import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Strings;
-import com.google.common.base.Utf8;
-import com.google.common.collect.ForwardingMap;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Multimap;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.joda.time.DateTimeUtils;
-import org.joda.time.DateTimeZone;
-import org.joda.time.Duration;
-import org.joda.time.format.DateTimeFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.net.URLClassLoader;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-/**
- * A {@link PipelineRunner} that executes the operations in the
- * pipeline by first translating them to the Dataflow representation
- * using the {@link DataflowPipelineTranslator} and then submitting
- * them to a Dataflow service for execution.
- *
- * <p><h3>Permissions</h3>
- * When reading from a Dataflow source or writing to a Dataflow sink using
- * {@code DataflowPipelineRunner}, the Google cloudservices account and the Google compute engine
- * service account of the GCP project running the Dataflow Job will need access to the corresponding
- * source/sink.
- *
- * <p>Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
- * Dataflow Security and Permissions</a> for more details.
- */
-public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob> {
-  private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineRunner.class);
-
-  /** Provided configuration options. */
-  private final DataflowPipelineOptions options;
-
-  /** Client for the Dataflow service. This is used to actually submit jobs. */
-  private final Dataflow dataflowClient;
-
-  /** Translator for this DataflowPipelineRunner, based on options. */
-  private final DataflowPipelineTranslator translator;
-
-  /** Custom transforms implementations. */
-  private final Map<Class<?>, Class<?>> overrides;
-
-  /** A set of user defined functions to invoke at different points in execution. */
-  private DataflowPipelineRunnerHooks hooks;
-
-  // Environment version information.
-  private static final String ENVIRONMENT_MAJOR_VERSION = "4";
-
-  // Default Docker container images that execute Dataflow worker harness, residing in Google
-  // Container Registry, separately for Batch and Streaming.
-  public static final String BATCH_WORKER_HARNESS_CONTAINER_IMAGE
-      = "dataflow.gcr.io/v1beta3/java-batch:1.5.0";
-  public static final String STREAMING_WORKER_HARNESS_CONTAINER_IMAGE
-      = "dataflow.gcr.io/v1beta3/java-streaming:1.5.0";
-
-  // The limit of CreateJob request size.
-  private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;
-
-  private final Set<PCollection<?>> pcollectionsRequiringIndexedFormat;
-
-  /**
-   * Project IDs must contain lowercase letters, digits, or dashes.
-   * IDs must start with a letter and may not end with a dash.
-   * This regex isn't exact - this allows for patterns that would be rejected by
-   * the service, but this is sufficient for basic validation of project IDs.
-   */
-  public static final String PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]+[a-z0-9]";
-
-  /**
-   * Construct a runner from the provided options.
-   *
-   * @param options Properties that configure the runner.
-   * @return The newly created runner.
-   */
-  public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
-    // (Re-)register standard IO factories. Clobbers any prior credentials.
-    IOChannelUtils.registerStandardIOFactories(options);
-
-    DataflowPipelineOptions dataflowOptions =
-        PipelineOptionsValidator.validate(DataflowPipelineOptions.class, options);
-    ArrayList<String> missing = new ArrayList<>();
-
-    if (dataflowOptions.getAppName() == null) {
-      missing.add("appName");
-    }
-    if (missing.size() > 0) {
-      throw new IllegalArgumentException(
-          "Missing required values: " + Joiner.on(',').join(missing));
-    }
-
-    PathValidator validator = dataflowOptions.getPathValidator();
-    if (dataflowOptions.getStagingLocation() != null) {
-      validator.validateOutputFilePrefixSupported(dataflowOptions.getStagingLocation());
-    }
-    if (dataflowOptions.getTempLocation() != null) {
-      validator.validateOutputFilePrefixSupported(dataflowOptions.getTempLocation());
-    }
-    if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
-      dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
-    } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
-      try {
-        dataflowOptions.setStagingLocation(
-            IOChannelUtils.resolve(dataflowOptions.getTempLocation(), "staging"));
-      } catch (IOException e) {
-        throw new IllegalArgumentException("Unable to resolve PipelineOptions.stagingLocation "
-            + "from PipelineOptions.tempLocation. Please set the staging location explicitly.", e);
-      }
-    }
-
-    if (dataflowOptions.getFilesToStage() == null) {
-      dataflowOptions.setFilesToStage(detectClassPathResourcesToStage(
-          DataflowPipelineRunner.class.getClassLoader()));
-      LOG.info("PipelineOptions.filesToStage was not specified. "
-          + "Defaulting to files from the classpath: will stage {} files. "
-          + "Enable logging at DEBUG level to see which files will be staged.",
-          dataflowOptions.getFilesToStage().size());
-      LOG.debug("Classpath elements: {}", dataflowOptions.getFilesToStage());
-    }
-
-    // Verify jobName according to service requirements.
-    String jobName = dataflowOptions.getJobName().toLowerCase();
-    Preconditions.checkArgument(
-        jobName.matches("[a-z]([-a-z0-9]*[a-z0-9])?"),
-        "JobName invalid; the name must consist of only the characters "
-            + "[-a-z0-9], starting with a letter and ending with a letter "
-            + "or number");
-
-    // Verify project
-    String project = dataflowOptions.getProject();
-    if (project.matches("[0-9]*")) {
-      throw new IllegalArgumentException("Project ID '" + project
-          + "' invalid. Please make sure you specified the Project ID, not project number.");
-    } else if (!project.matches(PROJECT_ID_REGEXP)) {
-      throw new IllegalArgumentException("Project ID '" + project
-          + "' invalid. Please make sure you specified the Project ID, not project description.");
-    }
-
-    DataflowPipelineDebugOptions debugOptions =
-        dataflowOptions.as(DataflowPipelineDebugOptions.class);
-    // Verify the number of worker threads is a valid value
-    if (debugOptions.getNumberOfWorkerHarnessThreads() < 0) {
-      throw new IllegalArgumentException("Number of worker harness threads '"
-          + debugOptions.getNumberOfWorkerHarnessThreads()
-          + "' invalid. Please make sure the value is non-negative.");
-    }
-
-    return new DataflowPipelineRunner(dataflowOptions);
-  }
-
-  @VisibleForTesting protected DataflowPipelineRunner(DataflowPipelineOptions options) {
-    this.options = options;
-    this.dataflowClient = options.getDataflowClient();
-    this.translator = DataflowPipelineTranslator.fromOptions(options);
-    this.pcollectionsRequiringIndexedFormat = new HashSet<>();
-    this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>();
-
-    if (options.isStreaming()) {
-      overrides = ImmutableMap.<Class<?>, Class<?>>builder()
-          .put(Combine.GloballyAsSingletonView.class, StreamingCombineGloballyAsSingletonView.class)
-          .put(Create.Values.class, StreamingCreate.class)
-          .put(View.AsMap.class, StreamingViewAsMap.class)
-          .put(View.AsMultimap.class, StreamingViewAsMultimap.class)
-          .put(View.AsSingleton.class, StreamingViewAsSingleton.class)
-          .put(View.AsList.class, StreamingViewAsList.class)
-          .put(View.AsIterable.class, StreamingViewAsIterable.class)
-          .put(Write.Bound.class, StreamingWrite.class)
-          .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
-          .put(Read.Unbounded.class, StreamingUnboundedRead.class)
-          .put(Read.Bounded.class, UnsupportedIO.class)
-          .put(AvroIO.Read.Bound.class, UnsupportedIO.class)
-          .put(AvroIO.Write.Bound.class, UnsupportedIO.class)
-          .put(BigQueryIO.Read.Bound.class, UnsupportedIO.class)
-          .put(TextIO.Read.Bound.class, UnsupportedIO.class)
-          .put(TextIO.Write.Bound.class, UnsupportedIO.class)
-          .put(Window.Bound.class, AssignWindows.class)
-          .build();
-    } else {
-      ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.<Class<?>, Class<?>>builder();
-      builder.put(Read.Unbounded.class, UnsupportedIO.class);
-      builder.put(Window.Bound.class, AssignWindows.class);
-      builder.put(Write.Bound.class, BatchWrite.class);
-      builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
-      builder.put(TextIO.Write.Bound.class, BatchTextIOWrite.class);
-      if (options.getExperiments() == null
-          || !options.getExperiments().contains("disable_ism_side_input")) {
-        builder.put(View.AsMap.class, BatchViewAsMap.class);
-        builder.put(View.AsMultimap.class, BatchViewAsMultimap.class);
-        builder.put(View.AsSingleton.class, BatchViewAsSingleton.class);
-        builder.put(View.AsList.class, BatchViewAsList.class);
-        builder.put(View.AsIterable.class, BatchViewAsIterable.class);
-      }
-      overrides = builder.build();
-    }
-  }
-
-  /**
-   * Applies the given transform to the input. For transforms with customized definitions
-   * for the Dataflow pipeline runner, the application is intercepted and modified here.
-   */
-  @Override
-  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
-      PTransform<InputT, OutputT> transform, InputT input) {
-
-    if (Combine.GroupedValues.class.equals(transform.getClass())
-        || GroupByKey.class.equals(transform.getClass())) {
-
-      // For both Dataflow runners (streaming and batch), GroupByKey and GroupedValues are
-      // primitives. Returning a primitive output instead of the expanded definition
-      // signals to the translator that translation is necessary.
-      @SuppressWarnings("unchecked")
-      PCollection<?> pc = (PCollection<?>) input;
-      @SuppressWarnings("unchecked")
-      OutputT outputT = (OutputT) PCollection.createPrimitiveOutputInternal(
-          pc.getPipeline(),
-          transform instanceof GroupByKey
-              ? ((GroupByKey<?, ?>) transform).updateWindowingStrategy(pc.getWindowingStrategy())
-              : pc.getWindowingStrategy(),
-          pc.isBounded());
-      return outputT;
-    } else if (Window.Bound.class.equals(transform.getClass())) {
-      /*
-       * TODO: make this the generic way overrides are applied (using super.apply() rather than
-       * Pipeline.applyTransform(); this allows the apply method to be replaced without inserting
-       * additional nodes into the graph.
-       */
-      // casting to wildcard
-      @SuppressWarnings("unchecked")
-      OutputT windowed = (OutputT) applyWindow((Window.Bound<?>) transform, (PCollection<?>) input);
-      return windowed;
-    } else if (Flatten.FlattenPCollectionList.class.equals(transform.getClass())
-        && ((PCollectionList<?>) input).size() == 0) {
-      return (OutputT) Pipeline.applyTransform(input, Create.of());
-    } else if (overrides.containsKey(transform.getClass())) {
-      // It is the responsibility of whoever constructs overrides to ensure this is type safe.
-      @SuppressWarnings("unchecked")
-      Class<PTransform<InputT, OutputT>> transformClass =
-          (Class<PTransform<InputT, OutputT>>) transform.getClass();
-
-      @SuppressWarnings("unchecked")
-      Class<PTransform<InputT, OutputT>> customTransformClass =
-          (Class<PTransform<InputT, OutputT>>) overrides.get(transform.getClass());
-
-      PTransform<InputT, OutputT> customTransform =
-          InstanceBuilder.ofType(customTransformClass)
-          .withArg(DataflowPipelineRunner.class, this)
-          .withArg(transformClass, transform)
-          .build();
-
-      return Pipeline.applyTransform(input, customTransform);
-    } else {
-      return super.apply(transform, input);
-    }
-  }
-
-  private <T> PCollection<T> applyWindow(
-      Window.Bound<?> intitialTransform, PCollection<?> initialInput) {
-    // types are matched at compile time
-    @SuppressWarnings("unchecked")
-    Window.Bound<T> transform = (Window.Bound<T>) intitialTransform;
-    @SuppressWarnings("unchecked")
-    PCollection<T> input = (PCollection<T>) initialInput;
-    return super.apply(new AssignWindows<>(transform), input);
-  }
-
-  private String debuggerMessage(String projectId, String uniquifier) {
-    return String.format("To debug your job, visit Google Cloud Debugger at: "
-        + "https://console.developers.google.com/debug?project=%s&dbgee=%s",
-        projectId, uniquifier);
-  }
-
-  private void maybeRegisterDebuggee(DataflowPipelineOptions options, String uniquifier) {
-    if (!options.getEnableCloudDebugger()) {
-      return;
-    }
-
-    if (options.getDebuggee() != null) {
-      throw new RuntimeException("Should not specify the debuggee");
-    }
-
-    Clouddebugger debuggerClient = Transport.newClouddebuggerClient(options).build();
-    Debuggee debuggee = registerDebuggee(debuggerClient, uniquifier);
-    options.setDebuggee(debuggee);
-
-    System.out.println(debuggerMessage(options.getProject(), debuggee.getUniquifier()));
-  }
-
-  private Debuggee registerDebuggee(Clouddebugger debuggerClient, String uniquifier) {
-    RegisterDebuggeeRequest registerReq = new RegisterDebuggeeRequest();
-    registerReq.setDebuggee(new Debuggee()
-        .setProject(options.getProject())
-        .setUniquifier(uniquifier)
-        .setDescription(uniquifier)
-        .setAgentVersion("google.com/cloud-dataflow-java/v1"));
-
-    try {
-      RegisterDebuggeeResponse registerResponse =
-          debuggerClient.controller().debuggees().register(registerReq).execute();
-      Debuggee debuggee = registerResponse.getDebuggee();
-      if (debuggee.getStatus() != null && debuggee.getStatus().getIsError()) {
-        throw new RuntimeException("Unable to register with the debugger: " +
-            debuggee.getStatus().getDescription().getFormat());
-      }
-
-      return debuggee;
-    } catch (IOException e) {
-      throw new RuntimeException("Unable to register with the debugger: ", e);
-    }
-  }
-
-  @Override
-  public DataflowPipelineJob run(Pipeline pipeline) {
-    logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);
-
-    LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
-        + "related to Google Compute Engine usage and other Google Cloud Services.");
-
-    List<DataflowPackage> packages = options.getStager().stageFiles();
-
-
-    // Set a unique client_request_id in the CreateJob request.
-    // This is used to ensure idempotence of job creation across retried
-    // attempts to create a job. Specifically, if the service returns a job with
-    // a different client_request_id, it means the returned one is a different
-    // job previously created with the same job name, and that the job creation
-    // has been effectively rejected. The SDK should return
-    // Error::Already_Exists to user in that case.
-    int randomNum = new Random().nextInt(9000) + 1000;
-    String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC)
-        .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum;
-
-    // Try to create a debuggee ID. This must happen before the job is translated since it may
-    // update the options.
-    DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-    maybeRegisterDebuggee(dataflowOptions, requestId);
-
-    JobSpecification jobSpecification =
-        translator.translate(pipeline, this, packages);
-    Job newJob = jobSpecification.getJob();
-    newJob.setClientRequestId(requestId);
-
-    String version = DataflowReleaseInfo.getReleaseInfo().getVersion();
-    System.out.println("Dataflow SDK version: " + version);
-
-    newJob.getEnvironment().setUserAgent(DataflowReleaseInfo.getReleaseInfo());
-    // The Dataflow Service may write to the temporary directory directly, so
-    // must be verified.
-    if (!Strings.isNullOrEmpty(options.getTempLocation())) {
-      newJob.getEnvironment().setTempStoragePrefix(
-          dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
-    }
-    newJob.getEnvironment().setDataset(options.getTempDatasetId());
-    newJob.getEnvironment().setExperiments(options.getExperiments());
-
-    // Set the Docker container image that executes Dataflow worker harness, residing in Google
-    // Container Registry. Translator is guaranteed to create a worker pool prior to this point.
-    String workerHarnessContainerImage =
-        options.as(DataflowPipelineWorkerPoolOptions.class)
-        .getWorkerHarnessContainerImage();
-    for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) {
-      workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage);
-    }
-
-    // Requirements about the service.
-    Map<String, Object> environmentVersion = new HashMap<>();
-    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
-    newJob.getEnvironment().setVersion(environmentVersion);
-    // Default jobType is JAVA_BATCH_AUTOSCALING: A Java job with workers that the job can
-    // autoscale if specified.
-    String jobType = "JAVA_BATCH_AUTOSCALING";
-
-    if (options.isStreaming()) {
-      jobType = "STREAMING";
-    }
-    environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType);
-
-    if (hooks != null) {
-      hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment());
-    }
-
-    if (!Strings.isNullOrEmpty(options.getDataflowJobFile())) {
-      try (PrintWriter printWriter = new PrintWriter(
-          new File(options.getDataflowJobFile()))) {
-        String workSpecJson = DataflowPipelineTranslator.jobToString(newJob);
-        printWriter.print(workSpecJson);
-        LOG.info("Printed workflow specification to {}", options.getDataflowJobFile());
-      } catch (IllegalStateException ex) {
-        LOG.warn("Cannot translate workflow spec to json for debug.");
-      } catch (FileNotFoundException ex) {
-        LOG.warn("Cannot create workflow spec output file.");
-      }
-    }
-
-    String jobIdToUpdate = null;
-    if (options.getUpdate()) {
-      jobIdToUpdate = getJobIdFromName(options.getJobName());
-      newJob.setTransformNameMapping(options.getTransformNameMapping());
-      newJob.setReplaceJobId(jobIdToUpdate);
-    }
-    Job jobResult;
-    try {
-      jobResult = dataflowClient
-              .projects()
-              .jobs()
-              .create(options.getProject(), newJob)
-              .execute();
-    } catch (GoogleJsonResponseException e) {
-      String errorMessages = "Unexpected errors";
-      if (e.getDetails() != null) {
-        if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
-          errorMessages = "The size of the serialized JSON representation of the pipeline "
-              + "exceeds the allowable limit. "
-              + "For more information, please check the FAQ link below:\n"
-              + "https://cloud.google.com/dataflow/faq";
-        } else {
-          errorMessages = e.getDetails().getMessage();
-        }
-      }
-      throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e);
-    } catch (IOException e) {
-      throw new RuntimeException("Failed to create a workflow job", e);
-    }
-
-    // Obtain all of the extractors from the PTransforms used in the pipeline so the
-    // DataflowPipelineJob has access to them.
-    AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline);
-    Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
-        aggregatorExtractor.getAggregatorSteps();
-
-    DataflowAggregatorTransforms aggregatorTransforms =
-        new DataflowAggregatorTransforms(aggregatorSteps, jobSpecification.getStepNames());
-
-    // Use a raw client for post-launch monitoring, as status calls may fail
-    // regularly and need not be retried automatically.
-    DataflowPipelineJob dataflowPipelineJob =
-        new DataflowPipelineJob(options.getProject(), jobResult.getId(),
-            Transport.newRawDataflowClient(options).build(), aggregatorTransforms);
-
-    // If the service returned client request id, the SDK needs to compare it
-    // with the original id generated in the request, if they are not the same
-    // (i.e., the returned job is not created by this request), throw
-    // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedExcetpion
-    // depending on whether this is a reload or not.
-    if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
-        && !jobResult.getClientRequestId().equals(requestId)) {
-      // If updating a job.
-      if (options.getUpdate()) {
-        throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob,
-            String.format("The job named %s with id: %s has already been updated into job id: %s "
-                + "and cannot be updated again.",
-                newJob.getName(), jobIdToUpdate, jobResult.getId()));
-      } else {
-        throw new DataflowJobAlreadyExistsException(dataflowPipelineJob,
-            String.format("There is already an active job named %s with id: %s. If you want "
-                + "to submit a second job, try again by setting a different name using --jobName.",
-                newJob.getName(), jobResult.getId()));
-      }
-    }
-
-    LOG.info("To access the Dataflow monitoring console, please navigate to {}",
-        MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
-    System.out.println("Submitted job: " + jobResult.getId());
-
-    LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
-        MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId()));
-
-    return dataflowPipelineJob;
-  }
-
-  /**
-   * Returns the DataflowPipelineTranslator associated with this object.
-   */
-  public DataflowPipelineTranslator getTranslator() {
-    return translator;
-  }
-
-  /**
-   * Sets callbacks to invoke during execution see {@code DataflowPipelineRunnerHooks}.
-   */
-  @Experimental
-  public void setHooks(DataflowPipelineRunnerHooks hooks) {
-    this.hooks = hooks;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /** Outputs a warning about PCollection views without deterministic key coders. */
-  private void logWarningIfPCollectionViewHasNonDeterministicKeyCoder(Pipeline pipeline) {
-    // We need to wait till this point to determine the names of the transforms since only
-    // at this time do we know the hierarchy of the transforms otherwise we could
-    // have just recorded the full names during apply time.
-    if (!ptransformViewsWithNonDeterministicKeyCoders.isEmpty()) {
-      final SortedSet<String> ptransformViewNamesWithNonDeterministicKeyCoders = new TreeSet<>();
-      pipeline.traverseTopologically(new PipelineVisitor() {
-        @Override
-        public void visitValue(PValue value, TransformTreeNode producer) {
-        }
-
-        @Override
-        public void visitTransform(TransformTreeNode node) {
-          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
-            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
-          }
-        }
-
-        @Override
-        public void enterCompositeTransform(TransformTreeNode node) {
-          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
-            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
-          }
-        }
-
-        @Override
-        public void leaveCompositeTransform(TransformTreeNode node) {
-        }
-      });
-
-      LOG.warn("Unable to use indexed implementation for View.AsMap and View.AsMultimap for {} "
-          + "because the key coder is not deterministic. Falling back to singleton implementation "
-          + "which may cause memory and/or performance problems. Future major versions of "
-          + "Dataflow will require deterministic key coders.",
-          ptransformViewNamesWithNonDeterministicKeyCoders);
-    }
-  }
-
-  /**
-   * Returns true if the passed in {@link PCollection} needs to be materialiazed using
-   * an indexed format.
-   */
-  boolean doesPCollectionRequireIndexedFormat(PCollection<?> pcol) {
-    return pcollectionsRequiringIndexedFormat.contains(pcol);
-  }
-
-  /**
-   * Marks the passed in {@link PCollection} as requiring to be materialized using
-   * an indexed format.
-   */
-  private void addPCollectionRequiringIndexedFormat(PCollection<?> pcol) {
-    pcollectionsRequiringIndexedFormat.add(pcol);
-  }
-
-  /** A set of {@link View}s with non-deterministic key coders. */
-  Set<PTransform<?, ?>> ptransformViewsWithNonDeterministicKeyCoders;
-
-  /**
-   * Records that the {@link PTransform} requires a deterministic key coder.
-   */
-  private void recordViewUsesNonDeterministicKeyCoder(PTransform<?, ?> ptransform) {
-    ptransformViewsWithNonDeterministicKeyCoders.add(ptransform);
-  }
-
-  /**
-   * A {@link GroupByKey} transform for the {@link DataflowPipelineRunner} which sorts
-   * values using the secondary key {@code K2}.
-   *
-   * <p>The {@link PCollection} created created by this {@link PTransform} will have values in
-   * the empty window. Care must be taken *afterwards* to either re-window
-   * (using {@link Window#into}) or only use {@link PTransform}s that do not depend on the
-   * values being within a window.
-   */
-  static class GroupByKeyAndSortValuesOnly<K1, K2, V>
-      extends PTransform<PCollection<KV<K1, KV<K2, V>>>, PCollection<KV<K1, Iterable<KV<K2, V>>>>> {
-    private GroupByKeyAndSortValuesOnly() {
-    }
-
-    @Override
-    public PCollection<KV<K1, Iterable<KV<K2, V>>>> apply(PCollection<KV<K1, KV<K2, V>>> input) {
-      PCollection<KV<K1, Iterable<KV<K2, V>>>> rval =
-          PCollection.<KV<K1, Iterable<KV<K2, V>>>>createPrimitiveOutputInternal(
-          input.getPipeline(),
-          WindowingStrategy.globalDefault(),
-          IsBounded.BOUNDED);
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      KvCoder<K1, KV<K2, V>> inputCoder = (KvCoder) input.getCoder();
-      rval.setCoder(
-          KvCoder.of(inputCoder.getKeyCoder(),
-          IterableCoder.of(inputCoder.getValueCoder())));
-      return rval;
-    }
-  }
-
-  /**
-   * A {@link PTransform} that groups the values by a hash of the window's byte representation
-   * and sorts the values using the windows byte representation.
-   */
-  private static class GroupByWindowHashAsKeyAndWindowAsSortKey<T, W extends BoundedWindow> extends
-      PTransform<PCollection<T>, PCollection<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>>> {
-
-    /**
-     * A {@link DoFn} that for each element outputs a {@code KV} structure suitable for
-     * grouping by the hash of the window's byte representation and sorting the grouped values
-     * using the window's byte representation.
-     */
-    @SystemDoFnInternal
-    private static class UseWindowHashAsKeyAndWindowAsSortKeyDoFn<T, W extends BoundedWindow>
-        extends DoFn<T, KV<Integer, KV<W, WindowedValue<T>>>> implements DoFn.RequiresWindowAccess {
-
-      private final IsmRecordCoder<?> ismCoderForHash;
-      private UseWindowHashAsKeyAndWindowAsSortKeyDoFn(IsmRecordCoder<?> ismCoderForHash) {
-        this.ismCoderForHash = ismCoderForHash;
-      }
-
-      @Override
-      public void processElement(ProcessContext c) throws Exception {
-        @SuppressWarnings("unchecked")
-        W window = (W) c.window();
-        c.output(
-            KV.of(ismCoderForHash.hash(ImmutableList.of(window)),
-                KV.of(window,
-                    WindowedValue.of(
-                        c.element(),
-                        c.timestamp(),
-                        c.window(),
-                        c.pane()))));
-      }
-    }
-
-    private final IsmRecordCoder<?> ismCoderForHash;
-    private GroupByWindowHashAsKeyAndWindowAsSortKey(IsmRecordCoder<?> ismCoderForHash) {
-      this.ismCoderForHash = ismCoderForHash;
-    }
-
-    @Override
-    public PCollection<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>> apply(PCollection<T> input) {
-      @SuppressWarnings("unchecked")
-      Coder<W> windowCoder = (Coder<W>)
-          input.getWindowingStrategy().getWindowFn().windowCoder();
-      PCollection<KV<Integer, KV<W, WindowedValue<T>>>> rval =
-          input.apply(ParDo.of(
-              new UseWindowHashAsKeyAndWindowAsSortKeyDoFn<T, W>(ismCoderForHash)));
-      rval.setCoder(
-          KvCoder.of(
-              VarIntCoder.of(),
-              KvCoder.of(windowCoder,
-                  FullWindowedValueCoder.of(input.getCoder(), windowCoder))));
-      return rval.apply(new GroupByKeyAndSortValuesOnly<Integer, W, WindowedValue<T>>());
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} for the
-   * Dataflow runner in batch mode.
-   *
-   * <p>Creates a set of files in the {@link IsmFormat} sharded by the hash of the windows
-   * byte representation and with records having:
-   * <ul>
-   *   <li>Key 1: Window</li>
-   *   <li>Value: Windowed value</li>
-   * </ul>
-   */
-  static class BatchViewAsSingleton<T>
-      extends PTransform<PCollection<T>, PCollectionView<T>> {
-
-    /**
-     * A {@link DoFn} that outputs {@link IsmRecord}s. These records are structured as follows:
-     * <ul>
-     *   <li>Key 1: Window
-     *   <li>Value: Windowed value
-     * </ul>
-     */
-    static class IsmRecordForSingularValuePerWindowDoFn<T, W extends BoundedWindow>
-        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
-                     IsmRecord<WindowedValue<T>>> {
-
-      @Override
-      public void processElement(ProcessContext c) throws Exception {
-        Iterator<KV<W, WindowedValue<T>>> iterator = c.element().getValue().iterator();
-        while (iterator.hasNext()) {
-          KV<W, WindowedValue<T>> next = iterator.next();
-          c.output(
-              IsmRecord.of(
-                  ImmutableList.of(next.getKey()), next.getValue()));
-        }
-      }
-    }
-
-    private final DataflowPipelineRunner runner;
-    private final View.AsSingleton<T> transform;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchViewAsSingleton(DataflowPipelineRunner runner, View.AsSingleton<T> transform) {
-      this.runner = runner;
-      this.transform = transform;
-    }
-
-    @Override
-    public PCollectionView<T> apply(PCollection<T> input) {
-      return BatchViewAsSingleton.<T, T, T, BoundedWindow>applyForSingleton(
-          runner,
-          input,
-          new IsmRecordForSingularValuePerWindowDoFn<T, BoundedWindow>(),
-          transform.hasDefaultValue(),
-          transform.defaultValue(),
-          input.getCoder());
-    }
-
-    static <T, FinalT, ViewT, W extends BoundedWindow> PCollectionView<ViewT>
-        applyForSingleton(
-            DataflowPipelineRunner runner,
-            PCollection<T> input,
-            DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
-                 IsmRecord<WindowedValue<FinalT>>> doFn,
-            boolean hasDefault,
-            FinalT defaultValue,
-            Coder<FinalT> defaultValueCoder) {
-
-      @SuppressWarnings("unchecked")
-      Coder<W> windowCoder = (Coder<W>)
-          input.getWindowingStrategy().getWindowFn().windowCoder();
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      PCollectionView<ViewT> view =
-          (PCollectionView<ViewT>) PCollectionViews.<FinalT, W>singletonView(
-              input.getPipeline(),
-              (WindowingStrategy) input.getWindowingStrategy(),
-              hasDefault,
-              defaultValue,
-              defaultValueCoder);
-
-      IsmRecordCoder<WindowedValue<FinalT>> ismCoder =
-          coderForSingleton(windowCoder, defaultValueCoder);
-
-      PCollection<IsmRecord<WindowedValue<FinalT>>> reifiedPerWindowAndSorted = input
-              .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
-              .apply(ParDo.of(doFn));
-      reifiedPerWindowAndSorted.setCoder(ismCoder);
-
-      runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
-      return reifiedPerWindowAndSorted.apply(
-          CreatePCollectionView.<IsmRecord<WindowedValue<FinalT>>, ViewT>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "BatchViewAsSingleton";
-    }
-
-    static <T> IsmRecordCoder<WindowedValue<T>> coderForSingleton(
-        Coder<? extends BoundedWindow> windowCoder, Coder<T> valueCoder) {
-      return IsmRecordCoder.of(
-          1, // We hash using only the window
-          0, // There are no metadata records
-          ImmutableList.<Coder<?>>of(windowCoder),
-          FullWindowedValueCoder.of(valueCoder, windowCoder));
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsIterable View.AsIterable} for the
-   * Dataflow runner in batch mode.
-   *
-   * <p>Creates a set of {@code Ism} files sharded by the hash of the windows byte representation
-   * and with records having:
-   * <ul>
-   *   <li>Key 1: Window</li>
-   *   <li>Key 2: Index offset within window</li>
-   *   <li>Value: Windowed value</li>
-   * </ul>
-   */
-  static class BatchViewAsIterable<T>
-      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
-
-    private final DataflowPipelineRunner runner;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchViewAsIterable(DataflowPipelineRunner runner, View.AsIterable<T> transform) {
-      this.runner = runner;
-    }
-
-    @Override
-    public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
-      PCollectionView<Iterable<T>> view = PCollectionViews.iterableView(
-          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
-      return BatchViewAsList.applyForIterableLike(runner, input, view);
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsList View.AsList} for the
-   * Dataflow runner in batch mode.
-   *
-   * <p>Creates a set of {@code Ism} files sharded by the hash of the window's byte representation
-   * and with records having:
-   * <ul>
-   *   <li>Key 1: Window</li>
-   *   <li>Key 2: Index offset within window</li>
-   *   <li>Value: Windowed value</li>
-   * </ul>
-   */
-  static class BatchViewAsList<T>
-      extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
-    /**
-     * A {@link DoFn} which creates {@link IsmRecord}s assuming that each element is within the
-     * global window. Each {@link IsmRecord} has
-     * <ul>
-     *   <li>Key 1: Global window</li>
-     *   <li>Key 2: Index offset within window</li>
-     *   <li>Value: Windowed value</li>
-     * </ul>
-     */
-    @SystemDoFnInternal
-    static class ToIsmRecordForGlobalWindowDoFn<T>
-        extends DoFn<T, IsmRecord<WindowedValue<T>>> {
-
-      long indexInBundle;
-      @Override
-      public void startBundle(Context c) throws Exception {
-        indexInBundle = 0;
-      }
-
-      @Override
-      public void processElement(ProcessContext c) throws Exception {
-        c.output(IsmRecord.of(
-            ImmutableList.of(GlobalWindow.INSTANCE, indexInBundle),
-            WindowedValue.of(
-                c.element(),
-                c.timestamp(),
-                GlobalWindow.INSTANCE,
-                c.pane())));
-        indexInBundle += 1;
-      }
-    }
-
-    /**
-     * A {@link DoFn} which creates {@link IsmRecord}s comparing successive elements windows
-     * to locate the window boundaries. The {@link IsmRecord} has:
-     * <ul>
-     *   <li>Key 1: Window</li>
-     *   <li>Key 2: Index offset within window</li>
-     *   <li>Value: Windowed value</li>
-     * </ul>
-     */
-    @SystemDoFnInternal
-    static class ToIsmRecordForNonGlobalWindowDoFn<T, W extends BoundedWindow>
-        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
-                     IsmRecord<WindowedValue<T>>> {
-
-      private final Coder<W> windowCoder;
-      ToIsmRecordForNonGlobalWindowDoFn(Coder<W> windowCoder) {
-        this.windowCoder = windowCoder;
-      }
-
-      @Override
-      public void processElement(ProcessContext c) throws Exception {
-        long elementsInWindow = 0;
-        Optional<Object> previousWindowStructuralValue = Optional.absent();
-        for (KV<W, WindowedValue<T>> value : c.element().getValue()) {
-          Object currentWindowStructuralValue = windowCoder.structuralValue(value.getKey());
-          // Compare to see if this is a new window so we can reset the index counter i
-          if (previousWindowStructuralValue.isPresent()
-              && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
-            // Reset i since we have a new window.
-            elementsInWindow = 0;
-          }
-          c.output(IsmRecord.of(
-              ImmutableList.of(value.getKey(), elementsInWindow),
-              value.getValue()));
-          previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
-          elementsInWindow += 1;
-        }
-      }
-    }
-
-    private final DataflowPipelineRunner runner;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchViewAsList(DataflowPipelineRunner runner, View.AsList<T> transform) {
-      this.runner = runner;
-    }
-
-    @Override
-    public PCollectionView<List<T>> apply(PCollection<T> input) {
-      PCollectionView<List<T>> view = PCollectionViews.listView(
-          input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
-      return applyForIterableLike(runner, input, view);
-    }
-
-    static <T, W extends BoundedWindow, ViewT> PCollectionView<ViewT> applyForIterableLike(
-        DataflowPipelineRunner runner,
-        PCollection<T> input,
-        PCollectionView<ViewT> view) {
-
-      @SuppressWarnings("unchecked")
-      Coder<W> windowCoder = (Coder<W>)
-          input.getWindowingStrategy().getWindowFn().windowCoder();
-
-      IsmRecordCoder<WindowedValue<T>> ismCoder = coderForListLike(windowCoder, input.getCoder());
-
-      // If we are working in the global window, we do not need to do a GBK using the window
-      // as the key since all the elements of the input PCollection are already such.
-      // We just reify the windowed value while converting them to IsmRecords and generating
-      // an index based upon where we are within the bundle. Each bundle
-      // maps to one file exactly.
-      if (input.getWindowingStrategy().getWindowFn() instanceof GlobalWindows) {
-        PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted =
-            input.apply(ParDo.of(new ToIsmRecordForGlobalWindowDoFn<T>()));
-        reifiedPerWindowAndSorted.setCoder(ismCoder);
-
-        runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
-        return reifiedPerWindowAndSorted.apply(
-            CreatePCollectionView.<IsmRecord<WindowedValue<T>>, ViewT>of(view));
-      }
-
-      PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted = input
-              .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
-              .apply(ParDo.of(new ToIsmRecordForNonGlobalWindowDoFn<T, W>(windowCoder)));
-      reifiedPerWindowAndSorted.setCoder(ismCoder);
-
-      runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
-      return reifiedPerWindowAndSorted.apply(
-          CreatePCollectionView.<IsmRecord<WindowedValue<T>>, ViewT>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "BatchViewAsList";
-    }
-
-    static <T> IsmRecordCoder<WindowedValue<T>> coderForListLike(
-        Coder<? extends BoundedWindow> windowCoder, Coder<T> valueCoder) {
-      // TODO: swap to use a variable length long coder which has values which compare
-      // the same as their byte representation compare lexicographically within the key coder
-      return IsmRecordCoder.of(
-          1, // We hash using only the window
-          0, // There are no metadata records
-          ImmutableList.of(windowCoder, BigEndianLongCoder.of()),
-          FullWindowedValueCoder.of(valueCoder, windowCoder));
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsMap View.AsMap} for the
-   * Dataflow runner in batch mode.
-   *
-   * <p>Creates a set of {@code Ism} files sharded by the hash of the key's byte
-   * representation. Each record is structured as follows:
-   * <ul>
-   *   <li>Key 1: User key K</li>
-   *   <li>Key 2: Window</li>
-   *   <li>Key 3: 0L (constant)</li>
-   *   <li>Value: Windowed value</li>
-   * </ul>
-   *
-   * <p>Alongside the data records, there are the following metadata records:
-   * <ul>
-   *   <li>Key 1: Metadata Key</li>
-   *   <li>Key 2: Window</li>
-   *   <li>Key 3: Index [0, size of map]</li>
-   *   <li>Value: variable length long byte representation of size of map if index is 0,
-   *              otherwise the byte representation of a key</li>
-   * </ul>
-   * The {@code [META, Window, 0]} record stores the number of unique keys per window, while
-   * {@code [META, Window, i]}  for {@code i} in {@code [1, size of map]} stores a the users key.
-   * This allows for one to access the size of the map by looking at {@code [META, Window, 0]}
-   * and iterate over all the keys by accessing {@code [META, Window, i]} for {@code i} in
-   * {@code [1, size of map]}.
-   *
-   * <p>Note that in the case of a non-deterministic key coder, we fallback to using
-   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} printing
-   * a warning to users to specify a deterministic key coder.
-   */
-  static class BatchViewAsMap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
-
-    /**
-     * A {@link DoFn} which groups elements by window boundaries. For each group,
-     * the group of elements is transformed into a {@link TransformedMap}.
-     * The transformed {@code Map<K, V>} is backed by a {@code Map<K, WindowedValue<V>>}
-     * and contains a function {@code WindowedValue<V> -> V}.
-     *
-     * <p>Outputs {@link IsmRecord}s having:
-     * <ul>
-     *   <li>Key 1: Window</li>
-     *   <li>Value: Transformed map containing a transform that removes the encapsulation
-     *              of the window around each value,
-     *              {@code Map<K, WindowedValue<V>> -> Map<K, V>}.</li>
-     * </ul>
-     */
-    static class ToMapDoFn<K, V, W extends BoundedWindow>
-        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<KV<K, V>>>>>,
-                     IsmRecord<WindowedValue<TransformedMap<K,
-                                             WindowedValue<V>,
-                                             V>>>> {
-
-      private final Coder<W> windowCoder;
-      ToMapDoFn(Coder<W> windowCoder) {
-        this.windowCoder = windowCoder;
-      }
-
-      @Override
-      public void processElement(ProcessContext c)
-          throws Exception {
-        Optional<Object> previousWindowStructuralValue = Optional.absent();
-        Optional<W> previousWindow = Optional.absent();
-        Map<K, WindowedValue<V>> map = new HashMap<>();
-        for (KV<W, WindowedValue<KV<K, V>>> kv : c.element().getValue()) {
-          Object currentWindowStructuralValue = windowCoder.structuralValue(kv.getKey());
-          if (previousWindowStructuralValue.isPresent()
-              && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
-            // Construct the transformed map containing all the elements since we
-            // are at a window boundary.
-            c.output(IsmRecord.of(
-                ImmutableList.of(previousWindow.get()),
-                valueInEmptyWindows(new TransformedMap<>(WindowedValueToValue.<V>of(), map))));
-            map = new HashMap<>();
-          }
-
-          // Verify that the user isn't trying to insert the same key multiple times.
-          checkState(!map.containsKey(kv.getValue().getValue().getKey()),
-              "Multiple values [%s, %s] found for single key [%s] within window [%s].",
-              map.get(kv.getValue().getValue().getKey()),
-              kv.getValue().getValue().getValue(),
-              kv.getKey());
-          map.put(kv.getValue().getValue().getKey(),
-                  kv.getValue().withValue(kv.getValue().getValue().getValue()));
-          previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
-          previousWindow = Optional.of(kv.getKey());
-        }
-
-        // The last value for this hash is guaranteed to be at a window boundary
-        // so we output a transformed map containing all the elements since the last
-        // window boundary.
-        c.output(IsmRecord.of(
-            ImmutableList.of(previousWindow.get()),
-            valueInEmptyWindows(new TransformedMap<>(WindowedValueToValue.<V>of(), map))));
-      }
-    }
-
-    private final DataflowPipelineRunner runner;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchViewAsMap(DataflowPipelineRunner runner, View.AsMap<K, V> transform) {
-      this.runner = runner;
-    }
-
-    @Override
-    public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
-      return this.<BoundedWindow>applyInternal(input);
-    }
-
-    private <W extends BoundedWindow> PCollectionView<Map<K, V>>
-        applyInternal(PCollection<KV<K, V>> input) {
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-      try {
-        PCollectionView<Map<K, V>> view = PCollectionViews.mapView(
-            input.getPipeline(), input.getWindowingStrategy(), inputCoder);
-        return BatchViewAsMultimap.applyForMapLike(runner, input, view, true /* unique keys */);
-      } catch (NonDeterministicException e) {
-        runner.recordViewUsesNonDeterministicKeyCoder(this);
-
-        // Since the key coder is not deterministic, we convert the map into a singleton
-        // and return a singleton view equivalent.
-        return applyForSingletonFallback(input);
-      }
-    }
-
-    @Override
-    protected String getKindString() {
-      return "BatchViewAsMap";
-    }
-
-    /** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */
-    private <W extends BoundedWindow> PCollectionView<Map<K, V>>
-        applyForSingletonFallback(PCollection<KV<K, V>> input) {
-      @SuppressWarnings("unchecked")
-      Coder<W> windowCoder = (Coder<W>)
-          input.getWindowingStrategy().getWindowFn().windowCoder();
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Coder<Function<WindowedValue<V>, V>> transformCoder =
-          (Coder) SerializableCoder.of(WindowedValueToValue.class);
-
-      Coder<TransformedMap<K, WindowedValue<V>, V>> finalValueCoder =
-          TransformedMapCoder.of(
-          transformCoder,
-          MapCoder.of(
-              inputCoder.getKeyCoder(),
-              FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder)));
-
-      TransformedMap<K, WindowedValue<V>, V> defaultValue = new TransformedMap<>(
-          WindowedValueToValue.<V>of(),
-          ImmutableMap.<K, WindowedValue<V>>of());
-
-      return BatchViewAsSingleton.<KV<K, V>,
-                                   TransformedMap<K, WindowedValue<V>, V>,
-                                   Map<K, V>,
-                                   W> applyForSingleton(
-          runner,
-          input,
-          new ToMapDoFn<K, V, W>(windowCoder),
-          true,
-          defaultValue,
-          finalValueCoder);
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsMultimap View.AsMultimap} for the
-   * Dataflow runner in batch mode.
-   *
-   * <p>Creates a set of {@code Ism} files sharded by the hash of the key's byte
-   * representation. Each record is structured as follows:
-   * <ul>
-   *   <li>Key 1: User key K</li>
-   *   <li>Key 2: Window</li>
-   *   <li>Key 3: Index offset for a given key and window.</li>
-   *   <li>Value: Windowed value</li>
-   * </ul>
-   *
-   * <p>Alongside the data records, there are the following metadata records:
-   * <ul>
-   *   <li>Key 1: Metadata Key</li>
-   *   <li>Key 2: Window</li>
-   *   <li>Key 3: Index [0, size of map]</li>
-   *   <li>Value: variable length long byte representation of size of map if index is 0,
-   *              otherwise the byte representation of a key</li>
-   * </ul>
-   * The {@code [META, Window, 0]} record stores the number of unique keys per window, while
-   * {@code [META, Window, i]}  for {@code i} in {@code [1, size of map]} stores a the users key.
-   * This allows for one to access the size of the map by looking at {@code [META, Window, 0]}
-   * and iterate over all the keys by accessing {@code [META, Window, i]} for {@code i} in
-   * {@code [1, size of map]}.
-   *
-   * <p>Note that in the case of a non-deterministic key coder, we fallback to using
-   * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} printing
-   * a warning to users to specify a deterministic key coder.
-   */
-  static class BatchViewAsMultimap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
-    /**
-     * A {@link PTransform} that groups elements by the hash of window's byte representation
-     * if the input {@link PCollection} is not within the global window. Otherwise by the hash
-     * of the window and key's byte representation. This {@link PTransform} also sorts
-     * the values by the combination of the window and key's byte representations.
-     */
-    private static class GroupByKeyHashAndSortByKeyAndWindow<K, V, W extends BoundedWindow>
-        extends PTransform<PCollection<KV<K, V>>,
-                           PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>> {
-
-      @SystemDoFnInternal
-      private static class GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>
-          extends DoFn<KV<K, V>, KV<Integer, KV<KV<K, W>, WindowedValue<V>>>>
-          implements DoFn.RequiresWindowAccess {
-
-        private final IsmRecordCoder<?> coder;
-        private GroupByKeyHashAndSortByKeyAndWindowDoFn(IsmRecordCoder<?> coder) {
-          this.coder = coder;
-        }
-
-        @Override
-        public void processElement(ProcessContext c) throws Exception {
-          @SuppressWarnings("unchecked")
-          W window = (W) c.window();
-
-          c.output(
-              KV.of(coder.hash(ImmutableList.of(c.element().getKey())),
-                  KV.of(KV.of(c.element().getKey(), window),
-                      WindowedValue.of(
-                          c.element().getValue(),
-                          c.timestamp(),
-                          (BoundedWindow) window,
-                          c.pane()))));
-        }
-      }
-
-      private final IsmRecordCoder<?> coder;
-      public GroupByKeyHashAndSortByKeyAndWindow(IsmRecordCoder<?> coder) {
-        this.coder = coder;
-      }
-
-      @Override
-      public PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>
-          apply(PCollection<KV<K, V>> input) {
-
-        @SuppressWarnings("unchecked")
-        Coder<W> windowCoder = (Coder<W>)
-            input.getWindowingStrategy().getWindowFn().windowCoder();
-        @SuppressWarnings("unchecked")
-        KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();
-
-        PCollection<KV<Integer, KV<KV<K, W>, WindowedValue<V>>>> keyedByHash;
-        keyedByHash = input.apply(
-            ParDo.of(new GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>(coder)));
-        keyedByHash.setCoder(
-            KvCoder.of(
-                VarIntCoder.of(),
-                KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), windowCoder),
-                    FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));
-
-        return keyedByHash.apply(
-            new GroupByKeyAndSortValuesOnly<Integer, KV<K, W>, WindowedValue<V>>());
-      }
-    }
-
-    /**
-     * A {@link DoFn} which creates {@link IsmRecord}s comparing successive elements windows
-     * and keys to locate window and key boundaries. The main output {@link IsmRecord}s have:
-     * <ul>
-     *   <li>Key 1: Window</li>
-     *   <li>Key 2: User key K</li>
-     *   <li>Key 3: Index offset for a given key and window.</li>
-     *   <li>Value: Windowed value</li>
-     * </ul>
-     *
-     * <p>Additionally, we output all the unique keys per window seen to {@code outputForEntrySet}
-     * and the unique key count per window to {@code outputForSize}.
-     *
-     * <p>Finally, if this DoFn has been requested to perform unique key checking, it will
-     * throw an {@link IllegalStateException} if more than one key per window is found.
-     */
-    static class ToIsmRecordForMapLikeDoFn<K, V, W extends BoundedWindow>
-        extends DoFn<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>,
-                     IsmRecord<WindowedValue<V>>> {
-
-      private final TupleTag<KV<Integer, KV<W, Long>>> outputForSize;
-      private final TupleTag<KV<Integer, KV<W, K>>> outputForEntrySet;
-      private final Coder<W> windowCoder;
-      private final Coder<K> keyCoder;
-      private final IsmRecordCoder<WindowedValue<V>> ismCoder;
-      private final boolean uniqueKeysExpected;
-      ToIsmRecordForMapLikeDoFn(
-          TupleTag<KV<Integer, KV<W, Long>>> outputForSize,
-          TupleTag<KV<Integer, KV<W, K>>> outputForEntrySet,
-          Coder<W> windowCoder,
-          Coder<K> keyCoder,
-          IsmRecordCoder<WindowedValue<V>> ismCoder,
-          boolean uniqueKeysExpected) {
-        this.outputForSize = outputForSize;
-        this.outputForEntrySet = outputForEntrySet;
-        this.windowCoder = windowCoder;
-        this.keyCoder = keyCoder;
-        this.ismCoder = ismCoder;
-        this.uniqueKeysExpected = uniqueKeysExpected;
-      }
-
-      @Override
-      public void processElement(ProcessContext c) throws Exception {
-        long currentKeyIndex = 0;
-        // We use one based indexing while counting
-        long currentUniqueKeyCounter = 1;
-        Iterator<KV<KV<K, W>, WindowedValue<V>>> iterator = c.element().getValue().iterator();
-
-        KV<KV<K, W>, WindowedValue<V>> currentValue = iterator.next();
-        Object currentKeyStructuralValue =
-            keyCoder.structuralValue(currentValue.getKey().getKey());
-        Object currentWindowStructuralValue =
-            windowCoder.structuralValue(currentValue.getKey().getValue());
-
-        while (iterator.hasNext()) {
-          KV<KV<K, W>, WindowedValue<V>> nextValue = iterator.next();
-          Object nextKeyStructuralValue =
-              keyCoder.structuralValue(nextValue.getKey().getKey());
-          Object nextWindowStructuralValue =
-              windowCoder.structuralValue(nextValue.getKey().getValue());
-
-          outputDataRecord(c, currentValue, currentKeyIndex);
-
-          final long nextKeyIndex;
-          final long nextUniqueKeyCounter;
-
-          // Check to see if its a new window
-          if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
-            // The next value is a new window, so we output for size the number of unique keys
-            // seen and the last key of the window. We also reset the next key index the unique
-            // key counter.
-            outputMetadataRecordForSize(c, currentValue, currentUniqueKeyCounter);
-            outputMetadataRecordForEntrySet(c, currentValue);
-
-            nextKeyIndex = 0;
-            nextUniqueKeyCounter = 1;
-          } else if (!currentKeyStructuralValue.equals(nextKeyStructuralValue)){
-            // It is a new key within the same window so output the key for the entry set,
-            // reset the key index and increase the count of unique keys seen within this window.
-            outputMetadataRecordForEntrySet(c, currentValue);
-
-            nextKeyIndex = 0;
-            nextUniqueKeyCounter = currentUniqueKeyCounter + 1;
-          } else if (!uniqueKeysExpected) {
-            // It is not a new key so we don't have to output the number of elements in this
-            // window or increase the unique key counter. All we do is increase the key index.
-
-            nextKeyIndex = currentKeyIndex + 1;
-            nextUniqueKeyCounter = currentUniqueKeyCounter;
-          } else {
-            throw new IllegalStateException(String.format(
-                "Unique keys are expected but found key %s with values %s and %s in window %s.",
-                currentValue.getKey().getKey(),
-                currentValue.getValue().getValue(),
-                nextValue.getValue().getValue(),
-                currentValue.getKey().getValue()));
-          }
-
-          currentValue = nextValue;
-          currentWindowStructuralValue = nextWindowStructuralValue;
-          currentKeyStructuralValue = nextKeyStructuralValue;
-          currentKeyIndex = nextKeyIndex;
-          currentUniqueKeyCounter = nextUniqueKeyCounter;
-        }
-
-        outputDataRecord(c, currentValue, currentKeyIndex);
-        outputMetadataRecordForSize(c, currentValue, currentUniqueKeyCounter);
-        // The last value for this hash is guaranteed to be at a window boundary
-        // so we output a record with the number of unique keys seen.
-        outputMetadataRecordForEntrySet(c, currentValue);
-      }
-
-      /** This outputs the data record. */
-      private void outputDataRecord(
-          ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value, long keyIndex) {
-        IsmRecord<WindowedValue<V>> ismRecord = IsmRecord.of(
-            ImmutableList.of(
-                value.getKey().getKey(),
-                value.getKey().getValue(),
-                keyIndex),
-            value.getValue());
-        c.output(ismRecord);
-      }
-
-      /**
-       * This outputs records which will be used to compute the number of keys for a given window.
-       */
-      private void outputMetadataRecordForSize(
-          ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value, long uniqueKeyCount) {
-        c.sideOutput(outputForSize,
-            KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(),
-                                                 value.getKey().getValue())),
-                KV.of(value.getKey().getValue(), uniqueKeyCount)));
-      }
-
-      /** This outputs records which will be used to construct the entry set. */
-      private void outputMetadataRecordForEntrySet(
-          ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value) {
-        c.sideOutput(outputForEntrySet,
-            KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(),
-                                                 value.getKey().getValue())),
-                KV.of(value.getKey().getValue(), value.getKey().getKey())));
-      }
-    }
-
-    /**
-     * A {@link DoFn} which outputs a metadata {@link IsmRecord} per window of:
-       * <ul>
-       *   <li>Key 1: META key</li>
-       *   <li>Key 2: window</li>
-       *   <li>Key 3: 0L (constant)</li>
-       *   <li>Value: sum of values for window</li>
-       * </ul>
-       *
-       * <p>This {@link DoFn} is meant to be used to compute the number of unique keys
-       * per window for map and multimap side inputs.
-       */
-    static class ToIsmMetadataRecordForSizeDoFn<K, V, W extends BoundedWindow>
-        extends DoFn<KV<Integer, Iterable<KV<W, Long>>>, IsmRecord<WindowedValue<V>>> {
-      private final Coder<W> windowCoder;
-      ToIsmMetadataRecordForSizeDoFn(Coder<W> windowCoder) {
-        this.windowCoder = windowCoder;
-      }
-
-      @Override
-      public void processElement(ProcessContext c) throws Exception {
-        Iterator<KV<W, Long>> iterator = c.element().getValue().iterator();
-        KV<W, Long> currentValue = iterator.next();
-        Object currentWindowStructuralValue = windowCoder.structuralValue(currentValue.getKey());
-        long size = 0;
-        while (iterator.hasNext()) {
-          KV<W, Long> nextValue = iterator.next();
-          Object nextWindowStructuralValue = windowCoder.structuralValue(nextValue.getKey());
-
-          size += currentValue.getValue();
-          if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
-            c.output(IsmRecord.<WindowedValue<V>>meta(
-                ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), 0L),
-                CoderUtils.encodeToByteArray(VarLongCoder.of(), size)));
-            size = 0;
-          }
-
-          currentValue = nextValue;
-          currentWindowStructuralValue = nextWindowStructuralValue;
-        }
-
-        size += currentValue.getValue();
-        // Output the final value since it is guaranteed to be on a window boundary.
-        c.output(IsmRecord.<WindowedValue<V>>meta(
-            ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), 0L),
-            CoderUtils.encodeToByteArray(VarLongCoder.of(), size)));
-      }
-    }
-
-    /**
-     * A {@link DoFn} which outputs a metadata {@link IsmRecord} per window and key pair of:
-       * <ul>
-       *   <li>Key 1: META key</li>
-       *   <li>Key 2: window</li>
-       *   <li>Key 3: index offset (1-based index)</li>
-       *   <li>Value: key</li>
-       * </ul>
-       *
-       * <p>This {@link DoFn} is meant to be used to output index to key records
-       * per window for map and multimap side inputs.
-       */
-    static class ToIsmMetadataRecordForKeyDoFn<K, V, W extends BoundedWindow>
-        extends DoFn<KV<Integer, Iterable<KV<W, K>>>, IsmRecord<WindowedValue<V>>> {
-
-      private final Coder<K> keyCoder;
-      private final Coder<W> windowCoder;
-      ToIsmMetadataRecordForKeyDoFn(Coder<K> keyCoder, Coder<W> windowCoder) {
-        this.keyCoder = keyCoder;
-        this.windowCoder = windowCoder;
-      }
-
-      @Override
-      public void processElement(ProcessContext c) throws Exception {
-        Iterator<KV<W, K>> iterator = c.element().getValue().iterator();
-        KV<W, K> currentValue = iterator.next();
-        Object currentWindowStructuralValue = windowCoder.structuralValue(currentValue.getKey());
-        long elementsInWindow = 1;
-        while (iterator.hasNext()) {
-          KV<W, K> nextValue = iterator.next();
-          Object nextWindowStructuralValue = windowCoder.structuralValue(nextValue.getKey());
-
-          c.output(IsmRecord.<WindowedValue<V>>meta(
-              ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), elementsInWindow),
-              CoderUtils.encodeToByteArray(keyCoder, currentValue.getValue())));
-          elementsInWindow += 1;
-
-          if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
-            elementsInWindow = 1;
-          }
-
-          currentValue = nextValue;
-          currentWindowStructuralValue = nextWindowStructuralValue;
-        }
-
-        // Output the final value since it is guaranteed to be on a window boundary.
-        c.output(IsmRecord.<WindowedValue<V>>meta(
-            ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), elementsInWindow),
-            CoderUtils.encodeToByteArray(keyCoder, currentValue.getValue())));
-      }
-    }
-
-    /**
-     * A {@link DoFn} which partitions sets of elements by window boundaries. Within each
-     * partition, the set of elements is transformed into a {@link TransformedMap}.
-     * The transformed {@code Map<K, Iterable<V>>} is backed by a
-     * {@code Map<K, Iterable<WindowedValue<V>>>} and contains a function
-     * {@code Iterable<WindowedValue<V>> -> Iterable<V>}.
-     *
-     * <p>Outputs {@link IsmRecord}s having:
-     * <ul>
-     *   <li>Key 1: Window</li>
-     *   <li>Value: Transformed map containing a transform that removes the encapsulation
-     *              of the window around each value,
-     *              {@code Map<K, Iterable<WindowedValue<V>>> -> Map<K, Iterable<V>>}.</li>
-     * </ul>
-     */
-    static class ToMultimapDoFn<K, V, W extends BoundedWindow>
-        extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<KV<K, V>>>>>,
-                     IsmRecord<WindowedValue<TransformedMap<K,
-                                                            Iterable<WindowedValue<V>>,
-                                                            Iterable<V>>>>> {
-
-      private final Coder<W> windowCoder;
-      ToMultimapDoFn(Coder<W> windowCoder) {
-        this.windowCoder = windowCoder;
-      }
-
-      @Override
-      public void processElement(ProcessContext c)
-          throws Exception {
-        Optional<Object> previousWindowStructuralValue = Optional.absent();
-        Optional<W> previousWindow = Optional.absent();
-        Multimap<K, WindowedValue<V>> multimap = HashMultimap.create();
-        for (KV<W, WindowedValue<KV<K, V>>> kv : c.element().getValue()) {
-          Object currentWindowStructuralValue = windowCoder.structuralValue(kv.getKey());
-          if (previousWindowStructuralValue.isPresent()
-              && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
-            // Construct the transformed map containing all the elements since we
-            // are at a window boundary.
-            @SuppressWarnings({"unchecked", "rawtypes"})
-            Map<K, Iterable<WindowedValue<V>>> resultMap = (Map) multimap.asMap();
-            c.output(IsmRecord.<WindowedValue<TransformedMap<K,
-                                                             Iterable<WindowedValue<V>>,
-                                                             Iterable<V>>>>of(
-                ImmutableList.of(previousWindow.get()),
-                valueInEmptyWindows(
-                    new TransformedMap<>(
-                        IterableWithWindowedValuesToIterable.<V>of(), resultMap))));
-            multimap = HashMultimap.create();
-          }
-
-          multimap.put(kv.getValue().getValue().getKey(),
-                       kv.getValue().withValue(kv.getValue().getValue().getValue()));
-          previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
-          previousWindow = Optional.of(kv.getKey());
-        }
-
-        // The last value for this hash is guaranteed to be at a window boundary
-        // so we output a transformed map containing all the elements since the last
-        // window boundary.
-        @SuppressWarnings({"unchecked", "rawtypes"})
-        Map<K, Iterable<WindowedValue<V>>> resultMap = (Map) multimap.asMap();
-        c.output(IsmRecord.<WindowedValue<TransformedMap<K,
-                                                         Iterable<WindowedValue<V>>,
-                                                         Iterable<V>>>>of(
-            ImmutableList.of(previousWindow.get()),
-            valueInEmptyWindows(
-                new TransformedMap<>(IterableWithWindowedValuesToIterable.<V>of(), resultMap))));
-      }
-    }
-
-    private final DataflowPipelineRunner runner;
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
-    public BatchViewAsMultimap(DataflowPipelineRunner runner, View.AsMultimap<K, V> transform) {
-      this.runner = runner;
-    }
-
-    @Override
-    public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
-      return this.<BoundedWindow>applyInternal(input);
-    }
-
-    private <W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>>
-        applyInternal(PCollection<KV<K, V>> input) {
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-      try {
-        PCollectionView<Map<K, Iterable<V>>> view = PCollectionViews.multimapView(
-            input.getPipeline(), input.getWindowingStrategy(), inputCoder);
-
-        return applyForMapLike(runner, input, view, false /* unique keys not expected */);
-      } catch (NonDeterministicException e) {
-        runner.recordViewUsesNonDeterministicKeyCoder(this);
-
-        // Since the key coder is not deterministic, we convert the map into a singleton
-        // and return a singleton view equivalent.
-        return applyForSingletonFallback(input);
-      }
-    }
-
-    /** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */
-    private <W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>>
-        applyForSingletonFallback(PCollection<KV<K, V>> input) {
-      @SuppressWarnings("unchecked")
-      Coder<W> windowCoder = (Coder<W>)
-          input.getWindowingStrategy().getWindowFn().windowCoder();
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Coder<Function<Iterable<WindowedValue<V>>, Iterable<V>>> transformCoder =
-          (Coder) SerializableCoder.of(IterableWithWindowedValuesToIterable.class);
-
-      Coder<TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>> finalValueCoder =
-          TransformedMapCoder.of(
-          transformCoder,
-          MapCoder.of(
-              inputCoder.getKeyCoder(),
-              IterableCoder.of(
-                  FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));
-
-      TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>> defaultValue =
-          new TransformedMap<>(
-              IterableWithWindowedValuesToIterable.<V>of(),
-              ImmutableMap.<K, Iterable<WindowedValue<V>>>of());
-
-      return BatchViewAsSingleton.<KV<K, V>,
-                                   TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>,
-                                   Map<K, Iterable<V>>,
-                                   W> applyForSingleton(
-          runner,
-          input,
-          new ToMultimapDoFn<K, V, W>(windowCoder),
-          true,
-          defaultValue,
-          finalValueCoder);
-    }
-
-    private static <K, V, W extends BoundedWindow, ViewT> PCollectionView<ViewT> applyForMapLike(
-        DataflowPipelineRunner runner,
-        PCollection<KV<K, V>> input,
-        PCollectionView<ViewT> view,
-        boolean uniqueKeysExpected) throws NonDeterministicException {
-
-      @SuppressWarnings("unchecked")
-      Coder<W> windowCoder = (Coder<W>)
-          input.getWindowingStrategy().getWindowFn().windowCoder();
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-
-      // If our key coder is deterministic, we can use the key portion of each KV
-      // part of a composite key containing the window , key and index.
-      inputCoder.getKeyCoder().verifyDeterministic();
-
-      IsmRecordCoder<WindowedValue<V>> ismCoder =
-          coderForMapLike(windowCoder, inputCoder.getKeyCoder(), inputCoder.getValueCoder());
-
-      // Create the various output tags representing the main output containing the data stream
-      // and the side outputs containing the metadata about the size and entry set.
-      TupleTag<IsmRecord<WindowedValue<V>>> mainOutputTag = new TupleTag<>();
-      TupleTag<KV<Integer, KV<W, Long>>> outputForSizeTag = new TupleTag<>();
-      TupleTag<KV<Integer, KV<W, K>>> outputForEntrySetTag = new TupleTag<>();
-
-      // Process all the elements grouped by key hash, and sorted by key and then window
-      // outputting to all the outputs defined above.
-      PCollectionTuple outputTuple = input
-           .apply("GBKaSVForData", new GroupByKeyHashAndSortByKeyAndWindow<K, V, W>(ismCoder))
-           .apply(ParDo.of(new ToIsmRecordForMapLikeDoFn<K, V, W>(
-                   outputForSizeTag, outputForEntrySetTag,
-                   windowCoder, inputCoder.getKeyCoder(), ismCoder, uniqueKeysExpected))
-                       .withOutputTags(mainOutputTag,
-                                       TupleTagList.of(
-                                           ImmutableList.<TupleTag<?>>of(outputForSizeTag,
-                                                                         outputForEntrySetTag))));
-
-      // Set the coder on the main data output.
-      PCollection<IsmRecord<WindowedValue<V>>> perHashWithReifiedWindows =
-          outputTuple.get(mainOutputTag);
-      perHashWithReifiedWindows.setCoder(ismCoder);
-
-      // Set the coder on the metadata output for size and process the entries
-      // producing a [META, Window, 0L] record per window storing the number of unique keys
-      // for each window.
-      PCollection<KV<Integer, KV<W, Long>>> outputForSize = outputTuple.get(outputForSizeTag);
-      outputForSize.setCoder(
-          KvCoder.of(VarIntCoder.of(),
-                     KvCoder.of(windowCoder, VarLongCoder.of())));
-      PCollection<IsmRecord<WindowedValue<V>>> windowMapSizeMetadata = outputForSize
-          .apply("GBKaSVForSize", new GroupByKeyAndSortValuesOnly<Integer, W, Long>())
-          .apply(ParDo.of(new ToIsmMetadataRecordForSizeDoFn<K, V, W>(windowCoder)));
-      windowMapSizeMetadata.setCoder(ismCoder);
-
-      // Set the coder on the metadata output destined to build the entry set and process the
-      // entries producing a [META, Window, Index] record per window key pair storing the key.
-      PCollection<KV<Integer, KV<W, K>>> outputForEntrySet =
-          outputTuple.get(outputForEntrySetTag);
-      outputForEntrySet.setCoder(
-          KvCoder.of(VarIntCoder.of(),
-                     KvCoder.of(windowCoder, inputCoder.getKeyCoder())));
-      PCollection<IsmRecord<WindowedValue<V>>> windowMapKeysMetadata = outputForEntrySet
-          .apply("GBKaSVForKeys", new GroupByKeyAndSortValuesOnly<Integer, W, K>())
-          .apply(ParDo.of(
-              new ToIsmMetadataRecordForKeyDoFn<K, V, W>(inputCoder.getKeyCoder(), windowCoder)));
-      windowMapKeysMetadata.setCoder(ismCoder);
-
-      // Set that all these outputs should be materialized using an indexed format.
-      runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows);
-      runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata);
-      runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);
-
-      PCollectionList<IsmRecord<WindowedValue<V>>> outputs =
-          PCollectionList.of(ImmutableList.of(
-              perHashWithReifiedWindows, windowMapSizeMetadata, windowMapKeysMetadata));
-
-      return Pipeline.applyTransform(outputs,
-                                     Flatten.<IsmRecord<WindowedValue<V>>>pCollections())
-          .apply(CreatePCollectionView.<IsmRecord<WindowedValue<V>>,
-                                        ViewT>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "BatchViewAsMultimap";
-    }
-
-    static <V> IsmRecordCoder<WindowedValue<V>> coderForMapLike(
-        Coder<? extends BoundedWindow> windowCoder, Coder<?> keyCoder, Coder<V> valueCoder) {
-      // TODO: swap to use a variable length long coder which has values which compare
-      // the same as their byte representation compare lexicographically within the key coder
-      return IsmRecordCoder.of(
-          1, // We use only the key for hashing when producing value records
-          2, // Since the key is not present, we add the window to the hash when
-             // producing metadata records
-          ImmutableList.of(
-              MetadataKeyCoder.of(keyCoder),
-              windowCoder,
-              BigEndianLongCoder.of()),
-          FullWindowedValueCoder.of(valueCoder, windowCoder));
-    }
-  }
-
-  /**
-   * A {@code Map<K, V2>} backed by a {@code Map<K, V1>} and a function that transforms
-   * {@code V1 -> V2}.
-   */
-  static class TransformedMap<K, V1, V2>
-      extends ForwardingMap<K, V2> {
-    private final Function<V1, V2> transform;
-    private final Map<K, V1> originalMap;
-    private final Map<K, V2> transformedMap;
-
-    private TransformedMap(Function<V1, V2> transform, Map<K, V1> originalMap) {
-      this.transform = transform;
-      this.originalMap = Collections.unmodifiableMap(originalMap);
-      this.transformedMap = Maps.transformValues(originalMap, transform);
-    }
-
-    @Override
-    protected Map<K, V2> delegate() {
-      return transformedMap;
-    }
-  }
-
-  /**
-   * A {@link Coder} for {@link TransformedMap}s.
-   */
-  static class TransformedMapCoder<K, V1, V2>
-      extends StandardCoder<TransformedMap<K, V1, V2>> {
-    private final Coder<Function<V1, V2>> transformCoder;
-    private final Coder<Map<K, V1>> originalMapCoder;
-
-    private TransformedMapCoder(
-        Coder<Function<V1, V2>> transformCoder, Coder<Map<K, V1>> originalMapCoder) {
-      this.transformCoder = transformCoder;
-      this.originalMapCoder = originalMapCoder;
-    }
-
-    public static <K, V1, V2> TransformedMapCoder<K, V1, V2> of(
-        Coder<Function<V1, V2>> transformCoder, Coder<Map<K, V1>> originalMapCoder) {
-      return new TransformedMapCoder<>(transformCoder, originalMapCoder);
-    }
-
-    @JsonCreator
-    public static <K, V1, V2> TransformedMapCoder<K, V1, V2> of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<?>> components) {
-      checkArgument(components.size() == 2,
-          "Expecting 2 components, got " + components.size());
-      @SuppressWarnings("unchecked")
-      Coder<Function<V1, V2>> transformCoder = (Coder<Function<V1, V2>>) components.get(0);
-      @SuppressWarnings("unchecked")
-      Coder<Map<K, V1>> originalMapCoder = (Coder<Map<K, V1>>) components.get(1);
-      return of(transformCoder, originalMapCoder);
-    }
-
-    @Override
-    public void encode(TransformedMap<K, V1, V2> value, OutputStream outStream,
-        Coder.Context context) throws CoderException, IOException {
-      transformCoder.encode(value.transform, outStream, context.nested());
-      originalMapCoder.encode(value.originalMap, outStream, context.nested());
-    }
-
-    @Override
-    public TransformedMap<K, V1, V2> decode(
-        InputStream inStream, Coder.Context context) throws CoderException, IOException {
-      return new TransformedMap<>(
-          transformCoder.decode(inStream, context.nested()),
-          originalMapCoder.decode(inStream, context.nested()));
-    }
-
-    @Override
-    public List<? extends Co

<TRUNCATED>

[31/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
deleted file mode 100644
index a9a62a6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InMemoryWatermarkManager.java
+++ /dev/null
@@ -1,1310 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.TimerInternals;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ComparisonChain;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Ordering;
-import com.google.common.collect.SortedMultiset;
-import com.google.common.collect.TreeMultiset;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.EnumMap;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.NavigableSet;
-import java.util.Objects;
-import java.util.PriorityQueue;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.concurrent.atomic.AtomicReference;
-
-import javax.annotation.Nullable;
-
-/**
- * Manages watermarks of {@link PCollection PCollections} and input and output watermarks of
- * {@link AppliedPTransform AppliedPTransforms} to provide event-time and completion tracking for
- * in-memory execution. {@link InMemoryWatermarkManager} is designed to update and return a
- * consistent view of watermarks in the presence of concurrent updates.
- *
- * <p>An {@link InMemoryWatermarkManager} is provided with the collection of root
- * {@link AppliedPTransform AppliedPTransforms} and a map of {@link PCollection PCollections} to
- * all the {@link AppliedPTransform AppliedPTransforms} that consume them at construction time.
- *
- * <p>Whenever a root {@link AppliedPTransform transform} produces elements, the
- * {@link InMemoryWatermarkManager} is provided with the produced elements and the output watermark
- * of the producing {@link AppliedPTransform transform}. The
- * {@link InMemoryWatermarkManager watermark manager} is responsible for computing the watermarks
- * of all {@link AppliedPTransform transforms} that consume one or more
- * {@link PCollection PCollections}.
- *
- * <p>Whenever a non-root {@link AppliedPTransform} finishes processing one or more in-flight
- * elements (referred to as the input {@link CommittedBundle bundle}), the following occurs
- * atomically:
- * <ul>
- *  <li>All of the in-flight elements are removed from the collection of pending elements for the
- *      {@link AppliedPTransform}.</li>
- *  <li>All of the elements produced by the {@link AppliedPTransform} are added to the collection
- *      of pending elements for each {@link AppliedPTransform} that consumes them.</li>
- *  <li>The input watermark for the {@link AppliedPTransform} becomes the maximum value of
- *    <ul>
- *      <li>the previous input watermark</li>
- *      <li>the minimum of
- *        <ul>
- *          <li>the timestamps of all currently pending elements</li>
- *          <li>all input {@link PCollection} watermarks</li>
- *        </ul>
- *      </li>
- *    </ul>
- *  </li>
- *  <li>The output watermark for the {@link AppliedPTransform} becomes the maximum of
- *    <ul>
- *      <li>the previous output watermark</li>
- *      <li>the minimum of
- *        <ul>
- *          <li>the current input watermark</li>
- *          <li>the current watermark holds</li>
- *        </ul>
- *      </li>
- *    </ul>
- *  </li>
- *  <li>The watermark of the output {@link PCollection} can be advanced to the output watermark of
- *      the {@link AppliedPTransform}</li>
- *  <li>The watermark of all downstream {@link AppliedPTransform AppliedPTransforms} can be
- *      advanced.</li>
- * </ul>
- *
- * <p>The watermark of a {@link PCollection} is equal to the output watermark of the
- * {@link AppliedPTransform} that produces it.
- *
- * <p>The watermarks for a {@link PTransform} are updated as follows when output is committed:<pre>
- * Watermark_In'  = MAX(Watermark_In, MIN(U(TS_Pending), U(Watermark_InputPCollection)))
- * Watermark_Out' = MAX(Watermark_Out, MIN(Watermark_In', U(StateHold)))
- * Watermark_PCollection = Watermark_Out_ProducingPTransform
- * </pre>
- */
-public class InMemoryWatermarkManager {
-  /**
-   * The watermark of some {@link Pipeline} element, usually a {@link PTransform} or a
-   * {@link PCollection}.
-   *
-   * <p>A watermark is a monotonically increasing value, which represents the point up to which the
-   * system believes it has received all of the data. Data that arrives with a timestamp that is
-   * before the watermark is considered late. {@link BoundedWindow#TIMESTAMP_MAX_VALUE} is a special
-   * timestamp which indicates we have received all of the data and there will be no more on-time or
-   * late data. This value is represented by {@link InMemoryWatermarkManager#THE_END_OF_TIME}.
-   */
-  private static interface Watermark {
-    /**
-     * Returns the current value of this watermark.
-     */
-    Instant get();
-
-    /**
-     * Refreshes the value of this watermark from its input watermarks and watermark holds.
-     *
-     * @return true if the value of the watermark has changed (and thus dependent watermark must
-     *         also be updated
-     */
-    WatermarkUpdate refresh();
-  }
-
-  /**
-   * The result of computing a {@link Watermark}.
-   */
-  private static enum WatermarkUpdate {
-    /** The watermark is later than the value at the previous time it was computed. */
-    ADVANCED(true),
-    /** The watermark is equal to the value at the previous time it was computed. */
-    NO_CHANGE(false);
-
-    private final boolean advanced;
-
-    private WatermarkUpdate(boolean advanced) {
-      this.advanced = advanced;
-    }
-
-    public boolean isAdvanced() {
-      return advanced;
-    }
-
-    /**
-     * Returns the {@link WatermarkUpdate} that is a result of combining the two watermark updates.
-     *
-     * If either of the input {@link WatermarkUpdate WatermarkUpdates} were advanced, the result
-     * {@link WatermarkUpdate} has been advanced.
-     */
-    public WatermarkUpdate union(WatermarkUpdate that) {
-      if (this.advanced) {
-        return this;
-      }
-      return that;
-    }
-
-    /**
-     * Returns the {@link WatermarkUpdate} based on the former and current
-     * {@link Instant timestamps}.
-     */
-    public static WatermarkUpdate fromTimestamps(Instant oldTime, Instant currentTime) {
-      if (currentTime.isAfter(oldTime)) {
-        return ADVANCED;
-      }
-      return NO_CHANGE;
-    }
-  }
-
-  /**
-   * The input {@link Watermark} of an {@link AppliedPTransform}.
-   *
-   * <p>At any point, the value of an {@link AppliedPTransformInputWatermark} is equal to the
-   * minimum watermark across all of its input {@link Watermark Watermarks}, and the minimum
-   * timestamp of all of the pending elements, restricted to be monotonically increasing.
-   *
-   * <p>See {@link #refresh()} for more information.
-   */
-  private static class AppliedPTransformInputWatermark implements Watermark {
-    private final Collection<? extends Watermark> inputWatermarks;
-    private final SortedMultiset<WindowedValue<?>> pendingElements;
-    private final Map<Object, NavigableSet<TimerData>> objectTimers;
-
-    private AtomicReference<Instant> currentWatermark;
-
-    public AppliedPTransformInputWatermark(Collection<? extends Watermark> inputWatermarks) {
-      this.inputWatermarks = inputWatermarks;
-      this.pendingElements = TreeMultiset.create(PENDING_ELEMENT_COMPARATOR);
-      this.objectTimers = new HashMap<>();
-      currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
-    }
-
-    @Override
-    public Instant get() {
-      return currentWatermark.get();
-    }
-
-    /**
-     * {@inheritDoc}.
-     *
-     * <p>When refresh is called, the value of the {@link AppliedPTransformInputWatermark} becomes
-     * equal to the maximum value of
-     * <ul>
-     *   <li>the previous input watermark</li>
-     *   <li>the minimum of
-     *     <ul>
-     *       <li>the timestamps of all currently pending elements</li>
-     *       <li>all input {@link PCollection} watermarks</li>
-     *     </ul>
-     *   </li>
-     * </ul>
-     */
-    @Override
-    public synchronized WatermarkUpdate refresh() {
-      Instant oldWatermark = currentWatermark.get();
-      Instant minInputWatermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      for (Watermark inputWatermark : inputWatermarks) {
-        minInputWatermark = INSTANT_ORDERING.min(minInputWatermark, inputWatermark.get());
-      }
-      if (!pendingElements.isEmpty()) {
-        minInputWatermark = INSTANT_ORDERING.min(
-            minInputWatermark, pendingElements.firstEntry().getElement().getTimestamp());
-      }
-      Instant newWatermark = INSTANT_ORDERING.max(oldWatermark, minInputWatermark);
-      currentWatermark.set(newWatermark);
-      return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
-    }
-
-    private synchronized void addPendingElements(Iterable<? extends WindowedValue<?>> newPending) {
-      for (WindowedValue<?> pendingElement : newPending) {
-        pendingElements.add(pendingElement);
-      }
-    }
-
-    private synchronized void removePendingElements(
-        Iterable<? extends WindowedValue<?>> finishedElements) {
-      for (WindowedValue<?> finishedElement : finishedElements) {
-        pendingElements.remove(finishedElement);
-      }
-    }
-
-    private synchronized void updateTimers(TimerUpdate update) {
-      NavigableSet<TimerData> keyTimers = objectTimers.get(update.key);
-      if (keyTimers == null) {
-        keyTimers = new TreeSet<>();
-        objectTimers.put(update.key, keyTimers);
-      }
-      for (TimerData timer : update.setTimers) {
-        if (TimeDomain.EVENT_TIME.equals(timer.getDomain())) {
-          keyTimers.add(timer);
-        }
-      }
-      for (TimerData timer : update.deletedTimers) {
-        if (TimeDomain.EVENT_TIME.equals(timer.getDomain())) {
-          keyTimers.remove(timer);
-        }
-      }
-      // We don't keep references to timers that have been fired and delivered via #getFiredTimers()
-    }
-
-    private synchronized Map<Object, List<TimerData>> extractFiredEventTimeTimers() {
-      return extractFiredTimers(currentWatermark.get(), objectTimers);
-    }
-
-    @Override
-    public synchronized String toString() {
-      return MoreObjects.toStringHelper(AppliedPTransformInputWatermark.class)
-          .add("pendingElements", pendingElements)
-          .add("currentWatermark", currentWatermark)
-          .toString();
-    }
-  }
-
-  /**
-   * The output {@link Watermark} of an {@link AppliedPTransform}.
-   *
-   * <p>The value of an {@link AppliedPTransformOutputWatermark} is equal to the minimum of the
-   * current watermark hold and the {@link AppliedPTransformInputWatermark} for the same
-   * {@link AppliedPTransform}, restricted to be monotonically increasing. See
-   * {@link #refresh()} for more information.
-   */
-  private static class AppliedPTransformOutputWatermark implements Watermark {
-    private final Watermark inputWatermark;
-    private final PerKeyHolds holds;
-    private AtomicReference<Instant> currentWatermark;
-
-    public AppliedPTransformOutputWatermark(AppliedPTransformInputWatermark inputWatermark) {
-      this.inputWatermark = inputWatermark;
-      holds = new PerKeyHolds();
-      currentWatermark = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
-    }
-
-    public synchronized void updateHold(Object key, Instant newHold) {
-      if (newHold == null) {
-        holds.removeHold(key);
-      } else {
-        holds.updateHold(key, newHold);
-      }
-    }
-
-    @Override
-    public Instant get() {
-      return currentWatermark.get();
-    }
-
-    /**
-     * {@inheritDoc}.
-     *
-     * <p>When refresh is called, the value of the {@link AppliedPTransformOutputWatermark} becomes
-     * equal to the maximum value of:
-     * <ul>
-     *   <li>the previous output watermark</li>
-     *   <li>the minimum of
-     *     <ul>
-     *       <li>the current input watermark</li>
-     *       <li>the current watermark holds</li>
-     *     </ul>
-     *   </li>
-     * </ul>
-     */
-    @Override
-    public synchronized WatermarkUpdate refresh() {
-      Instant oldWatermark = currentWatermark.get();
-      Instant newWatermark = INSTANT_ORDERING.min(inputWatermark.get(), holds.getMinHold());
-      newWatermark = INSTANT_ORDERING.max(oldWatermark, newWatermark);
-      currentWatermark.set(newWatermark);
-      return WatermarkUpdate.fromTimestamps(oldWatermark, newWatermark);
-    }
-
-    @Override
-    public synchronized String toString() {
-      return MoreObjects.toStringHelper(AppliedPTransformOutputWatermark.class)
-          .add("holds", holds)
-          .add("currentWatermark", currentWatermark)
-          .toString();
-    }
-  }
-
-  /**
-   * The input {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} hold for an
-   * {@link AppliedPTransform}.
-   *
-   * <p>At any point, the hold value of an {@link SynchronizedProcessingTimeInputWatermark} is equal
-   * to the minimum across all pending bundles at the {@link AppliedPTransform} and all upstream
-   * {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} watermarks. The value of the input
-   * synchronized processing time at any step is equal to the maximum of:
-   * <ul>
-   *   <li>The most recently returned synchronized processing input time
-   *   <li>The minimum of
-   *     <ul>
-   *       <li>The current processing time
-   *       <li>The current synchronized processing time input hold
-   *     </ul>
-   * </ul>
-   */
-  private static class SynchronizedProcessingTimeInputWatermark implements Watermark {
-    private final Collection<? extends Watermark> inputWms;
-    private final Collection<CommittedBundle<?>> pendingBundles;
-    private final Map<Object, NavigableSet<TimerData>> processingTimers;
-    private final Map<Object, NavigableSet<TimerData>> synchronizedProcessingTimers;
-
-    private final PriorityQueue<TimerData> pendingTimers;
-
-    private AtomicReference<Instant> earliestHold;
-
-    public SynchronizedProcessingTimeInputWatermark(Collection<? extends Watermark> inputWms) {
-      this.inputWms = inputWms;
-      this.pendingBundles = new HashSet<>();
-      this.processingTimers = new HashMap<>();
-      this.synchronizedProcessingTimers = new HashMap<>();
-      this.pendingTimers = new PriorityQueue<>();
-      Instant initialHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      for (Watermark wm : inputWms) {
-        initialHold = INSTANT_ORDERING.min(initialHold, wm.get());
-      }
-      earliestHold = new AtomicReference<>(initialHold);
-    }
-
-    @Override
-    public Instant get() {
-      return earliestHold.get();
-    }
-
-    /**
-     * {@inheritDoc}.
-     *
-     * <p>When refresh is called, the value of the {@link SynchronizedProcessingTimeInputWatermark}
-     * becomes equal to the minimum value of
-     * <ul>
-     *   <li>the timestamps of all currently pending bundles</li>
-     *   <li>all input {@link PCollection} synchronized processing time watermarks</li>
-     * </ul>
-     *
-     * <p>Note that this value is not monotonic, but the returned value for the synchronized
-     * processing time must be.
-     */
-    @Override
-    public synchronized WatermarkUpdate refresh() {
-      Instant oldHold = earliestHold.get();
-      Instant minTime = THE_END_OF_TIME.get();
-      for (Watermark input : inputWms) {
-        minTime = INSTANT_ORDERING.min(minTime, input.get());
-      }
-      for (CommittedBundle<?> bundle : pendingBundles) {
-        // TODO: Track elements in the bundle by the processing time they were output instead of
-        // entire bundles. Requried to support arbitrarily splitting and merging bundles between
-        // steps
-        minTime = INSTANT_ORDERING.min(minTime, bundle.getSynchronizedProcessingOutputWatermark());
-      }
-      earliestHold.set(minTime);
-      return WatermarkUpdate.fromTimestamps(oldHold, minTime);
-    }
-
-    public synchronized void addPending(CommittedBundle<?> bundle) {
-      pendingBundles.add(bundle);
-    }
-
-    public synchronized void removePending(CommittedBundle<?> bundle) {
-      pendingBundles.remove(bundle);
-    }
-
-    /**
-     * Return the earliest timestamp of the earliest timer that has not been completed. This is
-     * either the earliest timestamp across timers that have not been completed, or the earliest
-     * timestamp across timers that have been delivered but have not been completed.
-     */
-    public synchronized Instant getEarliestTimerTimestamp() {
-      Instant earliest = THE_END_OF_TIME.get();
-      for (NavigableSet<TimerData> timers : processingTimers.values()) {
-        if (!timers.isEmpty()) {
-          earliest = INSTANT_ORDERING.min(timers.first().getTimestamp(), earliest);
-        }
-      }
-      for (NavigableSet<TimerData> timers : synchronizedProcessingTimers.values()) {
-        if (!timers.isEmpty()) {
-          earliest = INSTANT_ORDERING.min(timers.first().getTimestamp(), earliest);
-        }
-      }
-      if (!pendingTimers.isEmpty()) {
-        earliest = INSTANT_ORDERING.min(pendingTimers.peek().getTimestamp(), earliest);
-      }
-      return earliest;
-    }
-
-    private synchronized void updateTimers(TimerUpdate update) {
-      for (TimerData completedTimer : update.completedTimers) {
-        pendingTimers.remove(completedTimer);
-      }
-      Map<TimeDomain, NavigableSet<TimerData>> timerMap = timerMap(update.key);
-      for (TimerData addedTimer : update.setTimers) {
-        NavigableSet<TimerData> timerQueue = timerMap.get(addedTimer.getDomain());
-        if (timerQueue != null) {
-          timerQueue.add(addedTimer);
-        }
-      }
-      for (TimerData deletedTimer : update.deletedTimers) {
-        NavigableSet<TimerData> timerQueue = timerMap.get(deletedTimer.getDomain());
-        if (timerQueue != null) {
-          timerQueue.remove(deletedTimer);
-        }
-      }
-    }
-
-    private synchronized Map<Object, List<TimerData>> extractFiredDomainTimers(
-        TimeDomain domain, Instant firingTime) {
-      Map<Object, List<TimerData>> firedTimers;
-      switch (domain) {
-        case PROCESSING_TIME:
-          firedTimers = extractFiredTimers(firingTime, processingTimers);
-          break;
-        case SYNCHRONIZED_PROCESSING_TIME:
-          firedTimers =
-              extractFiredTimers(
-                  INSTANT_ORDERING.min(firingTime, earliestHold.get()),
-                  synchronizedProcessingTimers);
-          break;
-        default:
-          throw new IllegalArgumentException(
-              "Called getFiredTimers on a Synchronized Processing Time watermark"
-                  + " and gave a non-processing time domain "
-                  + domain);
-      }
-      for (Map.Entry<Object, ? extends Collection<TimerData>> firedTimer : firedTimers.entrySet()) {
-        pendingTimers.addAll(firedTimer.getValue());
-      }
-      return firedTimers;
-    }
-
-    private Map<TimeDomain, NavigableSet<TimerData>> timerMap(Object key) {
-      NavigableSet<TimerData> processingQueue = processingTimers.get(key);
-      if (processingQueue == null) {
-        processingQueue = new TreeSet<>();
-        processingTimers.put(key, processingQueue);
-      }
-      NavigableSet<TimerData> synchronizedProcessingQueue =
-          synchronizedProcessingTimers.get(key);
-      if (synchronizedProcessingQueue == null) {
-        synchronizedProcessingQueue = new TreeSet<>();
-        synchronizedProcessingTimers.put(key, synchronizedProcessingQueue);
-      }
-      EnumMap<TimeDomain, NavigableSet<TimerData>> result = new EnumMap<>(TimeDomain.class);
-      result.put(TimeDomain.PROCESSING_TIME, processingQueue);
-      result.put(TimeDomain.SYNCHRONIZED_PROCESSING_TIME, synchronizedProcessingQueue);
-      return result;
-    }
-
-    @Override
-    public synchronized String toString() {
-      return MoreObjects.toStringHelper(SynchronizedProcessingTimeInputWatermark.class)
-          .add("earliestHold", earliestHold)
-          .toString();
-    }
-  }
-
-  /**
-   * The output {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} hold for an
-   * {@link AppliedPTransform}.
-   *
-   * <p>At any point, the hold value of an {@link SynchronizedProcessingTimeOutputWatermark} is
-   * equal to the minimum across all incomplete timers at the {@link AppliedPTransform} and all
-   * upstream {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} watermarks. The value of the output
-   * synchronized processing time at any step is equal to the maximum of:
-   * <ul>
-   *   <li>The most recently returned synchronized processing output time
-   *   <li>The minimum of
-   *     <ul>
-   *       <li>The current processing time
-   *       <li>The current synchronized processing time output hold
-   *     </ul>
-   * </ul>
-   */
-  private static class SynchronizedProcessingTimeOutputWatermark implements Watermark {
-    private final SynchronizedProcessingTimeInputWatermark inputWm;
-    private AtomicReference<Instant> latestRefresh;
-
-    public SynchronizedProcessingTimeOutputWatermark(
-        SynchronizedProcessingTimeInputWatermark inputWm) {
-      this.inputWm = inputWm;
-      this.latestRefresh = new AtomicReference<>(BoundedWindow.TIMESTAMP_MIN_VALUE);
-    }
-
-    @Override
-    public Instant get() {
-      return latestRefresh.get();
-    }
-
-    /**
-     * {@inheritDoc}.
-     *
-     * <p>When refresh is called, the value of the {@link SynchronizedProcessingTimeOutputWatermark}
-     * becomes equal to the minimum value of:
-     * <ul>
-     *   <li>the current input watermark.
-     *   <li>all {@link TimeDomain#SYNCHRONIZED_PROCESSING_TIME} timers that are based on the input
-     *       watermark.
-     *   <li>all {@link TimeDomain#PROCESSING_TIME} timers that are based on the input watermark.
-     * </ul>
-     *
-     * <p>Note that this value is not monotonic, but the returned value for the synchronized
-     * processing time must be.
-     */
-    @Override
-    public synchronized WatermarkUpdate refresh() {
-      // Hold the output synchronized processing time to the input watermark, which takes into
-      // account buffered bundles, and the earliest pending timer, which determines what to hold
-      // downstream timers to.
-      Instant oldRefresh = latestRefresh.get();
-      Instant newTimestamp =
-          INSTANT_ORDERING.min(inputWm.get(), inputWm.getEarliestTimerTimestamp());
-      latestRefresh.set(newTimestamp);
-      return WatermarkUpdate.fromTimestamps(oldRefresh, newTimestamp);
-    }
-
-    @Override
-    public synchronized String toString() {
-      return MoreObjects.toStringHelper(SynchronizedProcessingTimeOutputWatermark.class)
-          .add("latestRefresh", latestRefresh)
-          .toString();
-    }
-  }
-
-  /**
-   * The {@code Watermark} that is after the latest time it is possible to represent in the global
-   * window. This is a distinguished value representing a complete {@link PTransform}.
-   */
-  private static final Watermark THE_END_OF_TIME = new Watermark() {
-        @Override
-        public WatermarkUpdate refresh() {
-          // THE_END_OF_TIME is a distinguished value that cannot be advanced.
-          return WatermarkUpdate.NO_CHANGE;
-        }
-
-        @Override
-        public Instant get() {
-          return BoundedWindow.TIMESTAMP_MAX_VALUE;
-        }
-      };
-
-  private static final Ordering<Instant> INSTANT_ORDERING = Ordering.natural();
-
-  /**
-   * An ordering that compares windowed values by timestamp, then arbitrarily. This ensures that
-   * {@link WindowedValue WindowedValues} will be sorted by timestamp, while two different
-   * {@link WindowedValue WindowedValues} with the same timestamp are not considered equal.
-   */
-  private static final Ordering<WindowedValue<? extends Object>> PENDING_ELEMENT_COMPARATOR =
-      (new WindowedValueByTimestampComparator()).compound(Ordering.arbitrary());
-
-  /**
-   * For each (Object, PriorityQueue) pair in the provided map, remove each Timer that is before the
-   * latestTime argument and put in in the result with the same key, then remove all of the keys
-   * which have no more pending timers.
-   *
-   * The result collection retains ordering of timers (from earliest to latest).
-   */
-  private static Map<Object, List<TimerData>> extractFiredTimers(
-      Instant latestTime, Map<Object, NavigableSet<TimerData>> objectTimers) {
-    Map<Object, List<TimerData>> result = new HashMap<>();
-    Set<Object> emptyKeys = new HashSet<>();
-    for (Map.Entry<Object, NavigableSet<TimerData>> pendingTimers : objectTimers.entrySet()) {
-      NavigableSet<TimerData> timers = pendingTimers.getValue();
-      if (!timers.isEmpty() && timers.first().getTimestamp().isBefore(latestTime)) {
-        ArrayList<TimerData> keyFiredTimers = new ArrayList<>();
-        result.put(pendingTimers.getKey(), keyFiredTimers);
-        while (!timers.isEmpty() && timers.first().getTimestamp().isBefore(latestTime)) {
-          keyFiredTimers.add(timers.first());
-          timers.remove(timers.first());
-        }
-      }
-      if (timers.isEmpty()) {
-        emptyKeys.add(pendingTimers.getKey());
-      }
-    }
-    objectTimers.keySet().removeAll(emptyKeys);
-    return result;
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * The {@link Clock} providing the current time in the {@link TimeDomain#PROCESSING_TIME} domain.
-   */
-  private final Clock clock;
-
-  /**
-   * A map from each {@link PCollection} to all {@link AppliedPTransform PTransform applications}
-   * that consume that {@link PCollection}.
-   */
-  private final Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers;
-
-  /**
-   * The input and output watermark of each {@link AppliedPTransform}.
-   */
-  private final Map<AppliedPTransform<?, ?, ?>, TransformWatermarks> transformToWatermarks;
-
-  /**
-   * Creates a new {@link InMemoryWatermarkManager}. All watermarks within the newly created
-   * {@link InMemoryWatermarkManager} start at {@link BoundedWindow#TIMESTAMP_MIN_VALUE}, the
-   * minimum watermark, with no watermark holds or pending elements.
-   *
-   * @param rootTransforms the root-level transforms of the {@link Pipeline}
-   * @param consumers a mapping between each {@link PCollection} in the {@link Pipeline} to the
-   *                  transforms that consume it as a part of their input
-   */
-  public static InMemoryWatermarkManager create(
-      Clock clock,
-      Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
-      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
-    return new InMemoryWatermarkManager(clock, rootTransforms, consumers);
-  }
-
-  private InMemoryWatermarkManager(
-      Clock clock,
-      Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
-      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> consumers) {
-    this.clock = clock;
-    this.consumers = consumers;
-
-    transformToWatermarks = new HashMap<>();
-
-    for (AppliedPTransform<?, ?, ?> rootTransform : rootTransforms) {
-      getTransformWatermark(rootTransform);
-    }
-    for (Collection<AppliedPTransform<?, ?, ?>> intermediateTransforms : consumers.values()) {
-      for (AppliedPTransform<?, ?, ?> transform : intermediateTransforms) {
-        getTransformWatermark(transform);
-      }
-    }
-  }
-
-  private TransformWatermarks getTransformWatermark(AppliedPTransform<?, ?, ?> transform) {
-    TransformWatermarks wms = transformToWatermarks.get(transform);
-    if (wms == null) {
-      List<Watermark> inputCollectionWatermarks = getInputWatermarks(transform);
-      AppliedPTransformInputWatermark inputWatermark =
-          new AppliedPTransformInputWatermark(inputCollectionWatermarks);
-      AppliedPTransformOutputWatermark outputWatermark =
-          new AppliedPTransformOutputWatermark(inputWatermark);
-
-      SynchronizedProcessingTimeInputWatermark inputProcessingWatermark =
-          new SynchronizedProcessingTimeInputWatermark(getInputProcessingWatermarks(transform));
-      SynchronizedProcessingTimeOutputWatermark outputProcessingWatermark =
-          new SynchronizedProcessingTimeOutputWatermark(inputProcessingWatermark);
-
-      wms =
-          new TransformWatermarks(
-              inputWatermark, outputWatermark, inputProcessingWatermark, outputProcessingWatermark);
-      transformToWatermarks.put(transform, wms);
-    }
-    return wms;
-  }
-
-  private Collection<Watermark> getInputProcessingWatermarks(
-      AppliedPTransform<?, ?, ?> transform) {
-    ImmutableList.Builder<Watermark> inputWmsBuilder = ImmutableList.builder();
-    Collection<? extends PValue> inputs = transform.getInput().expand();
-    if (inputs.isEmpty()) {
-      inputWmsBuilder.add(THE_END_OF_TIME);
-    }
-    for (PValue pvalue : inputs) {
-      Watermark producerOutputWatermark =
-          getTransformWatermark(pvalue.getProducingTransformInternal())
-              .synchronizedProcessingOutputWatermark;
-      inputWmsBuilder.add(producerOutputWatermark);
-    }
-    return inputWmsBuilder.build();
-  }
-
-  private List<Watermark> getInputWatermarks(AppliedPTransform<?, ?, ?> transform) {
-    ImmutableList.Builder<Watermark> inputWatermarksBuilder = ImmutableList.builder();
-    Collection<? extends PValue> inputs = transform.getInput().expand();
-    if (inputs.isEmpty()) {
-      inputWatermarksBuilder.add(THE_END_OF_TIME);
-    }
-    for (PValue pvalue : inputs) {
-      Watermark producerOutputWatermark =
-          getTransformWatermark(pvalue.getProducingTransformInternal()).outputWatermark;
-      inputWatermarksBuilder.add(producerOutputWatermark);
-    }
-    List<Watermark> inputCollectionWatermarks = inputWatermarksBuilder.build();
-    return inputCollectionWatermarks;
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Gets the input and output watermarks for an {@link AppliedPTransform}. If the
-   * {@link AppliedPTransform PTransform} has not processed any elements, return a watermark of
-   * {@link BoundedWindow#TIMESTAMP_MIN_VALUE}.
-   *
-   * @return a snapshot of the input watermark and output watermark for the provided transform
-   */
-  public TransformWatermarks getWatermarks(AppliedPTransform<?, ?, ?> transform) {
-    return transformToWatermarks.get(transform);
-  }
-
-  /**
-   * Updates the watermarks of a transform with one or more inputs.
-   *
-   * <p>Each transform has two monotonically increasing watermarks: the input watermark, which can,
-   * at any time, be updated to equal:
-   * <pre>
-   * MAX(CurrentInputWatermark, MIN(PendingElements, InputPCollectionWatermarks))
-   * </pre>
-   * and the output watermark, which can, at any time, be updated to equal:
-   * <pre>
-   * MAX(CurrentOutputWatermark, MIN(InputWatermark, WatermarkHolds))
-   * </pre>.
-   *
-   * @param completed the input that has completed
-   * @param transform the transform that has completed processing the input
-   * @param outputs the bundles the transform has output
-   * @param earliestHold the earliest watermark hold in the transform's state. {@code null} if there
-   *                     is no hold
-   */
-  public void updateWatermarks(
-      @Nullable CommittedBundle<?> completed,
-      AppliedPTransform<?, ?, ?> transform,
-      TimerUpdate timerUpdate,
-      Iterable<? extends CommittedBundle<?>> outputs,
-      @Nullable Instant earliestHold) {
-    updatePending(completed, transform, timerUpdate, outputs);
-    TransformWatermarks transformWms = transformToWatermarks.get(transform);
-    transformWms.setEventTimeHold(completed == null ? null : completed.getKey(), earliestHold);
-    refreshWatermarks(transform);
-  }
-
-  private void refreshWatermarks(AppliedPTransform<?, ?, ?> transform) {
-    TransformWatermarks myWatermarks = transformToWatermarks.get(transform);
-    WatermarkUpdate updateResult = myWatermarks.refresh();
-    if (updateResult.isAdvanced()) {
-      for (PValue outputPValue : transform.getOutput().expand()) {
-        Collection<AppliedPTransform<?, ?, ?>> downstreamTransforms = consumers.get(outputPValue);
-        if (downstreamTransforms != null) {
-          for (AppliedPTransform<?, ?, ?> downstreamTransform : downstreamTransforms) {
-            refreshWatermarks(downstreamTransform);
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Removes all of the completed Timers from the collection of pending timers, adds all new timers,
-   * and removes all deleted timers. Removes all elements consumed by the input bundle from the
-   * {@link PTransform PTransforms} collection of pending elements, and adds all elements produced
-   * by the {@link PTransform} to the pending queue of each consumer.
-   */
-  private void updatePending(
-      CommittedBundle<?> input,
-      AppliedPTransform<?, ?, ?> transform,
-      TimerUpdate timerUpdate,
-      Iterable<? extends CommittedBundle<?>> outputs) {
-    TransformWatermarks completedTransform = transformToWatermarks.get(transform);
-    completedTransform.updateTimers(timerUpdate);
-    if (input != null) {
-      completedTransform.removePending(input);
-    }
-
-    for (CommittedBundle<?> bundle : outputs) {
-      for (AppliedPTransform<?, ?, ?> consumer : consumers.get(bundle.getPCollection())) {
-        TransformWatermarks watermarks = transformToWatermarks.get(consumer);
-        watermarks.addPending(bundle);
-      }
-    }
-  }
-
-  /**
-   * Returns a map of each {@link PTransform} that has pending timers to those timers. All of the
-   * pending timers will be removed from this {@link InMemoryWatermarkManager}.
-   */
-  public Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> extractFiredTimers() {
-    Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> allTimers = new HashMap<>();
-    for (Map.Entry<AppliedPTransform<?, ?, ?>, TransformWatermarks> watermarksEntry :
-        transformToWatermarks.entrySet()) {
-      Map<Object, FiredTimers> keyFiredTimers = watermarksEntry.getValue().extractFiredTimers();
-      if (!keyFiredTimers.isEmpty()) {
-        allTimers.put(watermarksEntry.getKey(), keyFiredTimers);
-      }
-    }
-    return allTimers;
-  }
-
-  /**
-   * A (key, Instant) pair that holds the watermark. Holds are per-key, but the watermark is global,
-   * and as such the watermark manager must track holds and the release of holds on a per-key basis.
-   *
-   * <p>The {@link #compareTo(KeyedHold)} method of {@link KeyedHold} is not consistent with equals,
-   * as the key is arbitrarily ordered via identity, rather than object equality.
-   */
-  private static final class KeyedHold implements Comparable<KeyedHold> {
-    private static final Ordering<Object> KEY_ORDERING = Ordering.arbitrary().nullsLast();
-
-    private final Object key;
-    private final Instant timestamp;
-
-    /**
-     * Create a new KeyedHold with the specified key and timestamp.
-     */
-    public static KeyedHold of(Object key, Instant timestamp) {
-      return new KeyedHold(key, MoreObjects.firstNonNull(timestamp, THE_END_OF_TIME.get()));
-    }
-
-    private KeyedHold(Object key, Instant timestamp) {
-      this.key = key;
-      this.timestamp = timestamp;
-    }
-
-    @Override
-    public int compareTo(KeyedHold that) {
-      return ComparisonChain.start()
-          .compare(this.timestamp, that.timestamp)
-          .compare(this.key, that.key, KEY_ORDERING)
-          .result();
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(timestamp, key);
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == null || !(other instanceof KeyedHold)) {
-        return false;
-      }
-      KeyedHold that = (KeyedHold) other;
-      return Objects.equals(this.timestamp, that.timestamp) && Objects.equals(this.key, that.key);
-    }
-
-    /**
-     * Get the value of this {@link KeyedHold}.
-     */
-    public Instant getTimestamp() {
-      return timestamp;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(KeyedHold.class)
-          .add("key", key)
-          .add("hold", timestamp)
-          .toString();
-    }
-  }
-
-  private static class PerKeyHolds {
-    private final Map<Object, KeyedHold> keyedHolds;
-    private final PriorityQueue<KeyedHold> allHolds;
-
-    private PerKeyHolds() {
-      this.keyedHolds = new HashMap<>();
-      this.allHolds = new PriorityQueue<>();
-    }
-
-    /**
-     * Gets the minimum hold across all keys in this {@link PerKeyHolds}, or THE_END_OF_TIME if
-     * there are no holds within this {@link PerKeyHolds}.
-     */
-    public Instant getMinHold() {
-      return allHolds.isEmpty() ? THE_END_OF_TIME.get() : allHolds.peek().getTimestamp();
-    }
-
-    /**
-     * Updates the hold of the provided key to the provided value, removing any other holds for
-     * the same key.
-     */
-    public void updateHold(@Nullable Object key, Instant newHold) {
-      removeHold(key);
-      KeyedHold newKeyedHold = KeyedHold.of(key, newHold);
-      keyedHolds.put(key, newKeyedHold);
-      allHolds.offer(newKeyedHold);
-    }
-
-    /**
-     * Removes the hold of the provided key.
-     */
-    public void removeHold(Object key) {
-      KeyedHold oldHold = keyedHolds.get(key);
-      if (oldHold != null) {
-        allHolds.remove(oldHold);
-      }
-    }
-  }
-
-  /**
-   * A reference to the input and output watermarks of an {@link AppliedPTransform}.
-   */
-  public class TransformWatermarks {
-    private final AppliedPTransformInputWatermark inputWatermark;
-    private final AppliedPTransformOutputWatermark outputWatermark;
-
-    private final SynchronizedProcessingTimeInputWatermark synchronizedProcessingInputWatermark;
-    private final SynchronizedProcessingTimeOutputWatermark synchronizedProcessingOutputWatermark;
-
-    private Instant latestSynchronizedInputWm;
-    private Instant latestSynchronizedOutputWm;
-
-    private TransformWatermarks(
-        AppliedPTransformInputWatermark inputWatermark,
-        AppliedPTransformOutputWatermark outputWatermark,
-        SynchronizedProcessingTimeInputWatermark inputSynchProcessingWatermark,
-        SynchronizedProcessingTimeOutputWatermark outputSynchProcessingWatermark) {
-      this.inputWatermark = inputWatermark;
-      this.outputWatermark = outputWatermark;
-
-      this.synchronizedProcessingInputWatermark = inputSynchProcessingWatermark;
-      this.synchronizedProcessingOutputWatermark = outputSynchProcessingWatermark;
-      this.latestSynchronizedInputWm = BoundedWindow.TIMESTAMP_MIN_VALUE;
-      this.latestSynchronizedOutputWm = BoundedWindow.TIMESTAMP_MIN_VALUE;
-    }
-
-    /**
-     * Returns the input watermark of the {@link AppliedPTransform}.
-     */
-    public Instant getInputWatermark() {
-      return inputWatermark.get();
-    }
-
-    /**
-     * Returns the output watermark of the {@link AppliedPTransform}.
-     */
-    public Instant getOutputWatermark() {
-      return outputWatermark.get();
-    }
-
-    /**
-     * Returns the synchronized processing input time of the {@link AppliedPTransform}.
-     *
-     * <p>The returned value is guaranteed to be monotonically increasing, and outside of the
-     * presence of holds, will increase as the system time progresses.
-     */
-    public synchronized Instant getSynchronizedProcessingInputTime() {
-      latestSynchronizedInputWm = INSTANT_ORDERING.max(
-          latestSynchronizedInputWm,
-          INSTANT_ORDERING.min(clock.now(), synchronizedProcessingInputWatermark.get()));
-      return latestSynchronizedInputWm;
-    }
-
-    /**
-     * Returns the synchronized processing output time of the {@link AppliedPTransform}.
-     *
-     * <p>The returned value is guaranteed to be monotonically increasing, and outside of the
-     * presence of holds, will increase as the system time progresses.
-     */
-    public synchronized Instant getSynchronizedProcessingOutputTime() {
-      latestSynchronizedOutputWm = INSTANT_ORDERING.max(
-          latestSynchronizedOutputWm,
-          INSTANT_ORDERING.min(clock.now(), synchronizedProcessingOutputWatermark.get()));
-      return latestSynchronizedOutputWm;
-    }
-
-    private WatermarkUpdate refresh() {
-      inputWatermark.refresh();
-      synchronizedProcessingInputWatermark.refresh();
-      WatermarkUpdate eventOutputUpdate = outputWatermark.refresh();
-      WatermarkUpdate syncOutputUpdate = synchronizedProcessingOutputWatermark.refresh();
-      return eventOutputUpdate.union(syncOutputUpdate);
-    }
-
-    private void setEventTimeHold(Object key, Instant newHold) {
-      outputWatermark.updateHold(key, newHold);
-    }
-
-    private void removePending(CommittedBundle<?> bundle) {
-      inputWatermark.removePendingElements(bundle.getElements());
-      synchronizedProcessingInputWatermark.removePending(bundle);
-    }
-
-    private void addPending(CommittedBundle<?> bundle) {
-      inputWatermark.addPendingElements(bundle.getElements());
-      synchronizedProcessingInputWatermark.addPending(bundle);
-    }
-
-    private Map<Object, FiredTimers> extractFiredTimers() {
-      Map<Object, List<TimerData>> eventTimeTimers = inputWatermark.extractFiredEventTimeTimers();
-      Map<Object, List<TimerData>> processingTimers;
-      Map<Object, List<TimerData>> synchronizedTimers;
-      if (inputWatermark.get().equals(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
-        processingTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
-            TimeDomain.PROCESSING_TIME, BoundedWindow.TIMESTAMP_MAX_VALUE);
-        synchronizedTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
-            TimeDomain.PROCESSING_TIME, BoundedWindow.TIMESTAMP_MAX_VALUE);
-      } else {
-        processingTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
-            TimeDomain.PROCESSING_TIME, clock.now());
-        synchronizedTimers = synchronizedProcessingInputWatermark.extractFiredDomainTimers(
-            TimeDomain.SYNCHRONIZED_PROCESSING_TIME, getSynchronizedProcessingInputTime());
-      }
-      Map<Object, Map<TimeDomain, List<TimerData>>> groupedTimers = new HashMap<>();
-      groupFiredTimers(groupedTimers, eventTimeTimers, processingTimers, synchronizedTimers);
-
-      Map<Object, FiredTimers> keyFiredTimers = new HashMap<>();
-      for (Map.Entry<Object, Map<TimeDomain, List<TimerData>>> firedTimers :
-          groupedTimers.entrySet()) {
-        keyFiredTimers.put(firedTimers.getKey(), new FiredTimers(firedTimers.getValue()));
-      }
-      return keyFiredTimers;
-    }
-
-    @SafeVarargs
-    private final void groupFiredTimers(
-        Map<Object, Map<TimeDomain, List<TimerData>>> groupedToMutate,
-        Map<Object, List<TimerData>>... timersToGroup) {
-      for (Map<Object, List<TimerData>> subGroup : timersToGroup) {
-        for (Map.Entry<Object, List<TimerData>> newTimers : subGroup.entrySet()) {
-          Map<TimeDomain, List<TimerData>> grouped = groupedToMutate.get(newTimers.getKey());
-          if (grouped == null) {
-            grouped = new HashMap<>();
-            groupedToMutate.put(newTimers.getKey(), grouped);
-          }
-          grouped.put(newTimers.getValue().get(0).getDomain(), newTimers.getValue());
-        }
-      }
-    }
-
-    private void updateTimers(TimerUpdate update) {
-      inputWatermark.updateTimers(update);
-      synchronizedProcessingInputWatermark.updateTimers(update);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(TransformWatermarks.class)
-          .add("inputWatermark", inputWatermark)
-          .add("outputWatermark", outputWatermark)
-          .add("inputProcessingTime", synchronizedProcessingInputWatermark)
-          .add("outputProcessingTime", synchronizedProcessingOutputWatermark)
-          .toString();
-    }
-  }
-
-  /**
-   * A collection of newly set, deleted, and completed timers.
-   *
-   * <p>setTimers and deletedTimers are collections of {@link TimerData} that have been added to the
-   * {@link TimerInternals} of an executed step. completedTimers are timers that were delivered as
-   * the input to the executed step.
-   */
-  public static class TimerUpdate {
-    private final Object key;
-    private final Iterable<? extends TimerData> completedTimers;
-
-    private final Iterable<? extends TimerData> setTimers;
-    private final Iterable<? extends TimerData> deletedTimers;
-
-    /**
-     * Returns a TimerUpdate for a null key with no timers.
-     */
-    public static TimerUpdate empty() {
-      return new TimerUpdate(
-          null,
-          Collections.<TimerData>emptyList(),
-          Collections.<TimerData>emptyList(),
-          Collections.<TimerData>emptyList());
-    }
-
-    /**
-     * Creates a new {@link TimerUpdate} builder with the provided completed timers that needs the
-     * set and deleted timers to be added to it.
-     */
-    public static TimerUpdateBuilder builder(Object key) {
-      return new TimerUpdateBuilder(key);
-    }
-
-    /**
-     * A {@link TimerUpdate} builder that needs to be provided with set timers and deleted timers.
-     */
-    public static final class TimerUpdateBuilder {
-      private final Object key;
-      private final Collection<TimerData> completedTimers;
-      private final Collection<TimerData> setTimers;
-      private final Collection<TimerData> deletedTimers;
-
-      private TimerUpdateBuilder(Object key) {
-        this.key = key;
-        this.completedTimers = new HashSet<>();
-        this.setTimers = new HashSet<>();
-        this.deletedTimers = new HashSet<>();
-      }
-
-      /**
-       * Adds all of the provided timers to the collection of completed timers, and returns this
-       * {@link TimerUpdateBuilder}.
-       */
-      public TimerUpdateBuilder withCompletedTimers(Iterable<TimerData> completedTimers) {
-        Iterables.addAll(this.completedTimers, completedTimers);
-        return this;
-      }
-
-      /**
-       * Adds the provided timer to the collection of set timers, removing it from deleted timers if
-       * it has previously been deleted. Returns this {@link TimerUpdateBuilder}.
-       */
-      public TimerUpdateBuilder setTimer(TimerData setTimer) {
-        deletedTimers.remove(setTimer);
-        setTimers.add(setTimer);
-        return this;
-      }
-
-      /**
-       * Adds the provided timer to the collection of deleted timers, removing it from set timers if
-       * it has previously been set. Returns this {@link TimerUpdateBuilder}.
-       */
-      public TimerUpdateBuilder deletedTimer(TimerData deletedTimer) {
-        deletedTimers.add(deletedTimer);
-        setTimers.remove(deletedTimer);
-        return this;
-      }
-
-      /**
-       * Returns a new {@link TimerUpdate} with the most recently set completedTimers, setTimers,
-       * and deletedTimers.
-       */
-      public TimerUpdate build() {
-        return new TimerUpdate(
-            key,
-            ImmutableSet.copyOf(completedTimers),
-            ImmutableSet.copyOf(setTimers),
-            ImmutableSet.copyOf(deletedTimers));
-      }
-    }
-
-    private TimerUpdate(
-        Object key,
-        Iterable<? extends TimerData> completedTimers,
-        Iterable<? extends TimerData> setTimers,
-        Iterable<? extends TimerData> deletedTimers) {
-      this.key = key;
-      this.completedTimers = completedTimers;
-      this.setTimers = setTimers;
-      this.deletedTimers = deletedTimers;
-    }
-
-    @VisibleForTesting
-    Object getKey() {
-      return key;
-    }
-
-    @VisibleForTesting
-    Iterable<? extends TimerData> getCompletedTimers() {
-      return completedTimers;
-    }
-
-    @VisibleForTesting
-    Iterable<? extends TimerData> getSetTimers() {
-      return setTimers;
-    }
-
-    @VisibleForTesting
-    Iterable<? extends TimerData> getDeletedTimers() {
-      return deletedTimers;
-    }
-
-    /**
-     * Returns a {@link TimerUpdate} that is like this one, but with the specified completed timers.
-     */
-    public TimerUpdate withCompletedTimers(Iterable<TimerData> completedTimers) {
-      return new TimerUpdate(this.key, completedTimers, setTimers, deletedTimers);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(key, completedTimers, setTimers, deletedTimers);
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == null || !(other instanceof TimerUpdate)) {
-        return false;
-      }
-      TimerUpdate that = (TimerUpdate) other;
-      return Objects.equals(this.key, that.key)
-          && Objects.equals(this.completedTimers, that.completedTimers)
-          && Objects.equals(this.setTimers, that.setTimers)
-          && Objects.equals(this.deletedTimers, that.deletedTimers);
-    }
-  }
-
-  /**
-   * A pair of {@link TimerData} and key which can be delivered to the appropriate
-   * {@link AppliedPTransform}. A timer fires at the transform that set it with a specific key when
-   * the time domain in which it lives progresses past a specified time, as determined by the
-   * {@link InMemoryWatermarkManager}.
-   */
-  public static class FiredTimers {
-    private final Map<TimeDomain, ? extends Collection<TimerData>> timers;
-
-    private FiredTimers(Map<TimeDomain, ? extends Collection<TimerData>> timers) {
-      this.timers = timers;
-    }
-
-    /**
-     * Gets all of the timers that have fired within the provided {@link TimeDomain}. If no timers
-     * fired within the provided domain, return an empty collection.
-     *
-     * <p>Timers within a {@link TimeDomain} are guaranteed to be in order of increasing timestamp.
-     */
-    public Collection<TimerData> getTimers(TimeDomain domain) {
-      Collection<TimerData> domainTimers = timers.get(domain);
-      if (domainTimers == null) {
-        return Collections.emptyList();
-      }
-      return domainTimers;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(FiredTimers.class).add("timers", timers).toString();
-    }
-  }
-
-  private static class WindowedValueByTimestampComparator extends Ordering<WindowedValue<?>> {
-    @Override
-    public int compare(WindowedValue<?> o1, WindowedValue<?> o2) {
-      return o1.getTimestamp().compareTo(o2.getTimestamp());
-    }
-  }
-
-  public Set<AppliedPTransform<?, ?, ?>> getCompletedTransforms() {
-    Set<AppliedPTransform<?, ?, ?>> result = new HashSet<>();
-    for (Map.Entry<AppliedPTransform<?, ?, ?>, TransformWatermarks> wms :
-        transformToWatermarks.entrySet()) {
-      if (wms.getValue().getOutputWatermark().equals(THE_END_OF_TIME.get())) {
-        result.add(wms.getKey());
-      }
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundle.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundle.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundle.java
deleted file mode 100644
index 112ba17..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundle.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Instant;
-
-import javax.annotation.Nullable;
-
-/**
- * A {@link UncommittedBundle} that buffers elements in memory.
- */
-public final class InProcessBundle<T> implements UncommittedBundle<T> {
-  private final PCollection<T> pcollection;
-  private final boolean keyed;
-  private final Object key;
-  private boolean committed = false;
-  private ImmutableList.Builder<WindowedValue<T>> elements;
-
-  /**
-   * Create a new {@link InProcessBundle} for the specified {@link PCollection} without a key.
-   */
-  public static <T> InProcessBundle<T> unkeyed(PCollection<T> pcollection) {
-    return new InProcessBundle<T>(pcollection, false, null);
-  }
-
-  /**
-   * Create a new {@link InProcessBundle} for the specified {@link PCollection} with the specified
-   * key.
-   *
-   * See {@link CommittedBundle#getKey()} and {@link CommittedBundle#isKeyed()} for more
-   * information.
-   */
-  public static <T> InProcessBundle<T> keyed(PCollection<T> pcollection, Object key) {
-    return new InProcessBundle<T>(pcollection, true, key);
-  }
-
-  private InProcessBundle(PCollection<T> pcollection, boolean keyed, Object key) {
-    this.pcollection = pcollection;
-    this.keyed = keyed;
-    this.key = key;
-    this.elements = ImmutableList.builder();
-  }
-
-  @Override
-  public PCollection<T> getPCollection() {
-    return pcollection;
-  }
-
-  @Override
-  public InProcessBundle<T> add(WindowedValue<T> element) {
-    checkState(!committed, "Can't add element %s to committed bundle %s", element, this);
-    elements.add(element);
-    return this;
-  }
-
-  @Override
-  public CommittedBundle<T> commit(final Instant synchronizedCompletionTime) {
-    checkState(!committed, "Can't commit already committed bundle %s", this);
-    committed = true;
-    final Iterable<WindowedValue<T>> committedElements = elements.build();
-    return new CommittedBundle<T>() {
-      @Override
-      @Nullable
-      public Object getKey() {
-        return key;
-      }
-
-      @Override
-      public boolean isKeyed() {
-        return keyed;
-      }
-
-      @Override
-      public Iterable<WindowedValue<T>> getElements() {
-        return committedElements;
-      }
-
-      @Override
-      public PCollection<T> getPCollection() {
-        return pcollection;
-      }
-
-      @Override
-      public Instant getSynchronizedProcessingOutputWatermark() {
-        return synchronizedCompletionTime;
-      }
-
-      @Override
-      public String toString() {
-        return MoreObjects.toStringHelper(this)
-            .omitNullValues()
-            .add("pcollection", pcollection)
-            .add("key", key)
-            .add("elements", committedElements)
-            .toString();
-      }
-    };
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleOutputManager.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleOutputManager.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleOutputManager.java
deleted file mode 100644
index 406e2d4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessBundleOutputManager.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.util.Map;
-
-/**
- * An {@link OutputManager} that outputs to {@link CommittedBundle Bundles} used by the
- * {@link InProcessPipelineRunner}.
- */
-public class InProcessBundleOutputManager implements OutputManager {
-  private final Map<TupleTag<?>, UncommittedBundle<?>> bundles;
-
-  public static InProcessBundleOutputManager create(
-      Map<TupleTag<?>, UncommittedBundle<?>> outputBundles) {
-    return new InProcessBundleOutputManager(outputBundles);
-  }
-
-  public InProcessBundleOutputManager(Map<TupleTag<?>, UncommittedBundle<?>> bundles) {
-    this.bundles = bundles;
-  }
-
-  @SuppressWarnings("unchecked")
-  @Override
-  public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-    @SuppressWarnings("rawtypes")
-    UncommittedBundle bundle = bundles.get(tag);
-    bundle.add(output);
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
deleted file mode 100644
index 9023b7b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessCreate.java
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.Read;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.Create.Values;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Optional;
-import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterators;
-import com.google.common.collect.PeekingIterator;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-/**
- * An in-process implementation of the {@link Values Create.Values} {@link PTransform}, implemented
- * using a {@link BoundedSource}.
- *
- * The coder is inferred via the {@link Values#getDefaultOutputCoder(PInput)} method on the original
- * transform.
- */
-class InProcessCreate<T> extends ForwardingPTransform<PInput, PCollection<T>> {
-  private final Create.Values<T> original;
-
-  public static <T> InProcessCreate<T> from(Create.Values<T> original) {
-    return new InProcessCreate<>(original);
-  }
-
-  private InProcessCreate(Values<T> original) {
-    this.original = original;
-  }
-
-  @Override
-  public PCollection<T> apply(PInput input) {
-    Coder<T> elementCoder;
-    try {
-      elementCoder = original.getDefaultOutputCoder(input);
-    } catch (CannotProvideCoderException e) {
-      throw new IllegalArgumentException(
-          "Unable to infer a coder and no Coder was specified. "
-          + "Please set a coder by invoking Create.withCoder() explicitly.",
-          e);
-    }
-    InMemorySource<T> source;
-    try {
-      source = new InMemorySource<>(original.getElements(), elementCoder);
-    } catch (IOException e) {
-      throw Throwables.propagate(e);
-    }
-    PCollection<T> result = input.getPipeline().apply(Read.from(source));
-    result.setCoder(elementCoder);
-    return result;
-  }
-
-  @Override
-  public PTransform<PInput, PCollection<T>> delegate() {
-    return original;
-  }
-
-  @VisibleForTesting
-  static class InMemorySource<T> extends BoundedSource<T> {
-    private final Collection<byte[]> allElementsBytes;
-    private final long totalSize;
-    private final Coder<T> coder;
-
-    public InMemorySource(Iterable<T> elements, Coder<T> elemCoder)
-        throws CoderException, IOException {
-      allElementsBytes = new ArrayList<>();
-      long totalSize = 0L;
-      for (T element : elements) {
-        byte[] bytes = CoderUtils.encodeToByteArray(elemCoder, element);
-        allElementsBytes.add(bytes);
-        totalSize += bytes.length;
-      }
-      this.totalSize = totalSize;
-      this.coder = elemCoder;
-    }
-
-    /**
-     * Create a new source with the specified bytes. The new source owns the input element bytes,
-     * which must not be modified after this constructor is called.
-     */
-    private InMemorySource(Collection<byte[]> elementBytes, long totalSize, Coder<T> coder) {
-      this.allElementsBytes = ImmutableList.copyOf(elementBytes);
-      this.totalSize = totalSize;
-      this.coder = coder;
-    }
-
-    @Override
-    public List<? extends BoundedSource<T>> splitIntoBundles(
-        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      ImmutableList.Builder<InMemorySource<T>> resultBuilder = ImmutableList.builder();
-      long currentSourceSize = 0L;
-      List<byte[]> currentElems = new ArrayList<>();
-      for (byte[] elemBytes : allElementsBytes) {
-        currentElems.add(elemBytes);
-        currentSourceSize += elemBytes.length;
-        if (currentSourceSize >= desiredBundleSizeBytes) {
-          resultBuilder.add(new InMemorySource<>(currentElems, currentSourceSize, coder));
-          currentElems.clear();
-          currentSourceSize = 0L;
-        }
-      }
-      if (!currentElems.isEmpty()) {
-        resultBuilder.add(new InMemorySource<>(currentElems, currentSourceSize, coder));
-      }
-      return resultBuilder.build();
-    }
-
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-      return totalSize;
-    }
-
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-      return false;
-    }
-
-    @Override
-    public BoundedSource.BoundedReader<T> createReader(PipelineOptions options) throws IOException {
-      return new BytesReader();
-    }
-
-    @Override
-    public void validate() {}
-
-    @Override
-    public Coder<T> getDefaultOutputCoder() {
-      return coder;
-    }
-
-    private class BytesReader extends BoundedReader<T> {
-      private final PeekingIterator<byte[]> iter;
-      /**
-       * Use an optional to distinguish between null next element (as Optional.absent()) and no next
-       * element (next is null).
-       */
-      @Nullable private Optional<T> next;
-
-      public BytesReader() {
-        this.iter = Iterators.peekingIterator(allElementsBytes.iterator());
-      }
-
-      @Override
-      public BoundedSource<T> getCurrentSource() {
-        return InMemorySource.this;
-      }
-
-      @Override
-      public boolean start() throws IOException {
-        return advance();
-      }
-
-      @Override
-      public boolean advance() throws IOException {
-        boolean hasNext = iter.hasNext();
-        if (hasNext) {
-          next = Optional.fromNullable(CoderUtils.decodeFromByteArray(coder, iter.next()));
-        } else {
-          next = null;
-        }
-        return hasNext;
-      }
-
-      @Override
-      @Nullable
-      public T getCurrent() throws NoSuchElementException {
-        if (next == null) {
-          throw new NoSuchElementException();
-        }
-        return next.orNull();
-      }
-
-      @Override
-      public void close() throws IOException {}
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java
deleted file mode 100644
index 4aeb0d3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/inprocess/InProcessEvaluationContext.java
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.runners.inprocess;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.runners.inprocess.GroupByKeyEvaluatorFactory.InProcessGroupByKeyOnly;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.FiredTimers;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InMemoryWatermarkManager.TransformWatermarks;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.CommittedBundle;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.PCollectionViewWriter;
-import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner.UncommittedBundle;
-import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.util.ExecutionContext;
-import com.google.cloud.dataflow.sdk.util.SideInputReader;
-import com.google.cloud.dataflow.sdk.util.TimerInternals.TimerData;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.util.state.CopyOnAccessInMemoryStateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
-
-import javax.annotation.Nullable;
-
-/**
- * The evaluation context for a specific pipeline being executed by the
- * {@link InProcessPipelineRunner}. Contains state shared within the execution across all
- * transforms.
- *
- * <p>{@link InProcessEvaluationContext} contains shared state for an execution of the
- * {@link InProcessPipelineRunner} that can be used while evaluating a {@link PTransform}. This
- * consists of views into underlying state and watermark implementations, access to read and write
- * {@link PCollectionView PCollectionViews}, and constructing {@link CounterSet CounterSets} and
- * {@link ExecutionContext ExecutionContexts}. This includes executing callbacks asynchronously when
- * state changes to the appropriate point (e.g. when a {@link PCollectionView} is requested and
- * known to be empty).
- *
- * <p>{@link InProcessEvaluationContext} also handles results by committing finalizing bundles based
- * on the current global state and updating the global state appropriately. This includes updating
- * the per-{@link StepAndKey} state, updating global watermarks, and executing any callbacks that
- * can be executed.
- */
-class InProcessEvaluationContext {
-  /** The step name for each {@link AppliedPTransform} in the {@link Pipeline}. */
-  private final Map<AppliedPTransform<?, ?, ?>, String> stepNames;
-
-  /** The options that were used to create this {@link Pipeline}. */
-  private final InProcessPipelineOptions options;
-
-  /** The current processing time and event time watermarks and timers. */
-  private final InMemoryWatermarkManager watermarkManager;
-
-  /** Executes callbacks based on the progression of the watermark. */
-  private final WatermarkCallbackExecutor callbackExecutor;
-
-  /** The stateInternals of the world, by applied PTransform and key. */
-  private final ConcurrentMap<StepAndKey, CopyOnAccessInMemoryStateInternals<?>>
-      applicationStateInternals;
-
-  private final InProcessSideInputContainer sideInputContainer;
-
-  private final CounterSet mergedCounters;
-
-  public static InProcessEvaluationContext create(
-      InProcessPipelineOptions options,
-      Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
-      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> valueToConsumers,
-      Map<AppliedPTransform<?, ?, ?>, String> stepNames,
-      Collection<PCollectionView<?>> views) {
-    return new InProcessEvaluationContext(
-        options, rootTransforms, valueToConsumers, stepNames, views);
-  }
-
-  private InProcessEvaluationContext(
-      InProcessPipelineOptions options,
-      Collection<AppliedPTransform<?, ?, ?>> rootTransforms,
-      Map<PValue, Collection<AppliedPTransform<?, ?, ?>>> valueToConsumers,
-      Map<AppliedPTransform<?, ?, ?>, String> stepNames,
-      Collection<PCollectionView<?>> views) {
-    this.options = checkNotNull(options);
-    checkNotNull(rootTransforms);
-    checkNotNull(valueToConsumers);
-    checkNotNull(stepNames);
-    checkNotNull(views);
-    this.stepNames = stepNames;
-
-    this.watermarkManager =
-        InMemoryWatermarkManager.create(
-            NanosOffsetClock.create(), rootTransforms, valueToConsumers);
-    this.sideInputContainer = InProcessSideInputContainer.create(this, views);
-
-    this.applicationStateInternals = new ConcurrentHashMap<>();
-    this.mergedCounters = new CounterSet();
-
-    this.callbackExecutor = WatermarkCallbackExecutor.create();
-  }
-
-  /**
-   * Handle the provided {@link InProcessTransformResult}, produced after evaluating the provided
-   * {@link CommittedBundle} (potentially null, if the result of a root {@link PTransform}).
-   *
-   * <p>The result is the output of running the transform contained in the
-   * {@link InProcessTransformResult} on the contents of the provided bundle.
-   *
-   * @param completedBundle the bundle that was processed to produce the result. Potentially
-   *                        {@code null} if the transform that produced the result is a root
-   *                        transform
-   * @param completedTimers the timers that were delivered to produce the {@code completedBundle},
-   *                        or an empty iterable if no timers were delivered
-   * @param result the result of evaluating the input bundle
-   * @return the committed bundles contained within the handled {@code result}
-   */
-  public synchronized Iterable<? extends CommittedBundle<?>> handleResult(
-      @Nullable CommittedBundle<?> completedBundle,
-      Iterable<TimerData> completedTimers,
-      InProcessTransformResult result) {
-    Iterable<? extends CommittedBundle<?>> committedBundles =
-        commitBundles(result.getOutputBundles());
-    // Update watermarks and timers
-    watermarkManager.updateWatermarks(
-        completedBundle,
-        result.getTransform(),
-        result.getTimerUpdate().withCompletedTimers(completedTimers),
-        committedBundles,
-        result.getWatermarkHold());
-    fireAllAvailableCallbacks();
-    // Update counters
-    if (result.getCounters() != null) {
-      mergedCounters.merge(result.getCounters());
-    }
-    // Update state internals
-    CopyOnAccessInMemoryStateInternals<?> theirState = result.getState();
-    if (theirState != null) {
-      CopyOnAccessInMemoryStateInternals<?> committedState = theirState.commit();
-      StepAndKey stepAndKey =
-          StepAndKey.of(
-              result.getTransform(), completedBundle == null ? null : completedBundle.getKey());
-      if (!committedState.isEmpty()) {
-        applicationStateInternals.put(stepAndKey, committedState);
-      } else {
-        applicationStateInternals.remove(stepAndKey);
-      }
-    }
-    return committedBundles;
-  }
-
-  private Iterable<? extends CommittedBundle<?>> commitBundles(
-      Iterable<? extends UncommittedBundle<?>> bundles) {
-    ImmutableList.Builder<CommittedBundle<?>> completed = ImmutableList.builder();
-    for (UncommittedBundle<?> inProgress : bundles) {
-      AppliedPTransform<?, ?, ?> producing =
-          inProgress.getPCollection().getProducingTransformInternal();
-      TransformWatermarks watermarks = watermarkManager.getWatermarks(producing);
-      CommittedBundle<?> committed =
-          inProgress.commit(watermarks.getSynchronizedProcessingOutputTime());
-      // Empty bundles don't impact watermarks and shouldn't trigger downstream execution, so
-      // filter them out
-      if (!Iterables.isEmpty(committed.getElements())) {
-        completed.add(committed);
-      }
-    }
-    return completed.build();
-  }
-
-  private void fireAllAvailableCallbacks() {
-    for (AppliedPTransform<?, ?, ?> transform : stepNames.keySet()) {
-      fireAvailableCallbacks(transform);
-    }
-  }
-
-  private void fireAvailableCallbacks(AppliedPTransform<?, ?, ?> producingTransform) {
-    TransformWatermarks watermarks = watermarkManager.getWatermarks(producingTransform);
-    callbackExecutor.fireForWatermark(producingTransform, watermarks.getOutputWatermark());
-  }
-
-  /**
-   * Create a {@link UncommittedBundle} for use by a source.
-   */
-  public <T> UncommittedBundle<T> createRootBundle(PCollection<T> output) {
-    return InProcessBundle.unkeyed(output);
-  }
-
-  /**
-   * Create a {@link UncommittedBundle} whose elements belong to the specified {@link
-   * PCollection}.
-   */
-  public <T> UncommittedBundle<T> createBundle(CommittedBundle<?> input, PCollection<T> output) {
-    return input.isKeyed()
-        ? InProcessBundle.keyed(output, input.getKey())
-        : InProcessBundle.unkeyed(output);
-  }
-
-  /**
-   * Create a {@link UncommittedBundle} with the specified keys at the specified step. For use by
-   * {@link InProcessGroupByKeyOnly} {@link PTransform PTransforms}.
-   */
-  public <T> UncommittedBundle<T> createKeyedBundle(
-      CommittedBundle<?> input, Object key, PCollection<T> output) {
-    return InProcessBundle.keyed(output, key);
-  }
-
-  /**
-   * Create a {@link PCollectionViewWriter}, whose elements will be used in the provided
-   * {@link PCollectionView}.
-   */
-  public <ElemT, ViewT> PCollectionViewWriter<ElemT, ViewT> createPCollectionViewWriter(
-      PCollection<Iterable<ElemT>> input, final PCollectionView<ViewT> output) {
-    return new PCollectionViewWriter<ElemT, ViewT>() {
-      @Override
-      public void add(Iterable<WindowedValue<ElemT>> values) {
-        sideInputContainer.write(output, values);
-      }
-    };
-  }
-
-  /**
-   * Schedule a callback to be executed after output would be produced for the given window
-   * if there had been input.
-   *
-   * <p>Output would be produced when the watermark for a {@link PValue} passes the point at
-   * which the trigger for the specified window (with the specified windowing strategy) must have
-   * fired from the perspective of that {@link PValue}, as specified by the value of
-   * {@link Trigger#getWatermarkThatGuaranteesFiring(BoundedWindow)} for the trigger of the
-   * {@link WindowingStrategy}. When the callback has fired, either values will have been produced
-   * for a key in that window, the window is empty, or all elements in the window are late. The
-   * callback will be executed regardless of whether values have been produced.
-   */
-  public void scheduleAfterOutputWouldBeProduced(
-      PValue value,
-      BoundedWindow window,
-      WindowingStrategy<?, ?> windowingStrategy,
-      Runnable runnable) {
-    AppliedPTransform<?, ?, ?> producing = getProducing(value);
-    callbackExecutor.callOnGuaranteedFiring(producing, window, windowingStrategy, runnable);
-
-    fireAvailableCallbacks(lookupProducing(value));
-  }
-
-  private AppliedPTransform<?, ?, ?> getProducing(PValue value) {
-    if (value.getProducingTransformInternal() != null) {
-      return value.getProducingTransformInternal();
-    }
-    return lookupProducing(value);
-  }
-
-  private AppliedPTransform<?, ?, ?> lookupProducing(PValue value) {
-    for (AppliedPTransform<?, ?, ?> transform : stepNames.keySet()) {
-      if (transform.getOutput().equals(value) || transform.getOutput().expand().contains(value)) {
-        return transform;
-      }
-    }
-    return null;
-  }
-
-  /**
-   * Get the options used by this {@link Pipeline}.
-   */
-  public InProcessPipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  /**
-   * Get an {@link ExecutionContext} for the provided {@link AppliedPTransform} and key.
-   */
-  public InProcessExecutionContext getExecutionContext(
-      AppliedPTransform<?, ?, ?> application, Object key) {
-    StepAndKey stepAndKey = StepAndKey.of(application, key);
-    return new InProcessExecutionContext(
-        options.getClock(),
-        key,
-        (CopyOnAccessInMemoryStateInternals<Object>) applicationStateInternals.get(stepAndKey),
-        watermarkManager.getWatermarks(application));
-  }
-
-  /**
-   * Get all of the steps used in this {@link Pipeline}.
-   */
-  public Collection<AppliedPTransform<?, ?, ?>> getSteps() {
-    return stepNames.keySet();
-  }
-
-  /**
-   * Get the Step Name for the provided application.
-   */
-  public String getStepName(AppliedPTransform<?, ?, ?> application) {
-    return stepNames.get(application);
-  }
-
-  /**
-   * Returns a {@link SideInputReader} capable of reading the provided
-   * {@link PCollectionView PCollectionViews}.
-   * @param sideInputs the {@link PCollectionView PCollectionViews} the result should be able to
-   *                   read
-   * @return a {@link SideInputReader} that can read all of the provided
-   *         {@link PCollectionView PCollectionViews}
-   */
-  public SideInputReader createSideInputReader(final List<PCollectionView<?>> sideInputs) {
-    return sideInputContainer.createReaderForViews(sideInputs);
-  }
-
-  /**
-   * Create a {@link CounterSet} for this {@link Pipeline}. The {@link CounterSet} is independent
-   * of all other {@link CounterSet CounterSets} created by this call.
-   *
-   * The {@link InProcessEvaluationContext} is responsible for unifying the counters present in
-   * all created {@link CounterSet CounterSets} when the transforms that call this method
-   * complete.
-   */
-  public CounterSet createCounterSet() {
-    return new CounterSet();
-  }
-
-  /**
-   * Returns all of the counters that have been merged into this context via calls to
-   * {@link CounterSet#merge(CounterSet)}.
-   */
-  public CounterSet getCounters() {
-    return mergedCounters;
-  }
-
-  /**
-   * Extracts all timers that have been fired and have not already been extracted.
-   *
-   * <p>This is a destructive operation. Timers will only appear in the result of this method once
-   * for each time they are set.
-   */
-  public Map<AppliedPTransform<?, ?, ?>, Map<Object, FiredTimers>> extractFiredTimers() {
-    return watermarkManager.extractFiredTimers();
-  }
-
-  /**
-   * Returns true if the step will not produce additional output.
-   *
-   * <p>If the provided transform produces only {@link IsBounded#BOUNDED}
-   * {@link PCollection PCollections}, returns true if the watermark is at
-   * {@link BoundedWindow#TIMESTAMP_MAX_VALUE positive infinity}.
-   *
-   * <p>If the provided transform produces any {@link IsBounded#UNBOUNDED}
-   * {@link PCollection PCollections}, returns the value of
-   * {@link InProcessPipelineOptions#isShutdownUnboundedProducersWithMaxWatermark()}.
-   */
-  public boolean isDone(AppliedPTransform<?, ?, ?> transform) {
-    // if the PTransform's watermark isn't at the max value, it isn't done
-    if (watermarkManager
-        .getWatermarks(transform)
-        .getOutputWatermark()
-        .isBefore(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
-      return false;
-    }
-    // If the PTransform has any unbounded outputs, and unbounded producers should not be shut down,
-    // the PTransform may produce additional output. It is not done.
-    for (PValue output : transform.getOutput().expand()) {
-      if (output instanceof PCollection) {
-        IsBounded bounded = ((PCollection<?>) output).isBounded();
-        if (bounded.equals(IsBounded.UNBOUNDED)
-            && !options.isShutdownUnboundedProducersWithMaxWatermark()) {
-          return false;
-        }
-      }
-    }
-    // The PTransform's watermark was at positive infinity and all of its outputs are known to be
-    // done. It is done.
-    return true;
-  }
-
-  /**
-   * Returns true if all steps are done.
-   */
-  public boolean isDone() {
-    for (AppliedPTransform<?, ?, ?> transform : stepNames.keySet()) {
-      if (!isDone(transform)) {
-        return false;
-      }
-    }
-    return true;
-  }
-}

[37/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
deleted file mode 100644
index 4781d1c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsFactory.java
+++ /dev/null
@@ -1,1537 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.options.Validation.Required;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar;
-import com.google.cloud.dataflow.sdk.util.StringUtils;
-import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
-import com.google.common.base.Function;
-import com.google.common.base.Joiner;
-import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Predicate;
-import com.google.common.base.Strings;
-import com.google.common.base.Throwables;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableListMultimap;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Iterators;
-import com.google.common.collect.ListMultimap;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.google.common.collect.SortedSetMultimap;
-import com.google.common.collect.TreeMultimap;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.databind.JavaType;
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.beans.BeanInfo;
-import java.beans.IntrospectionException;
-import java.beans.Introspector;
-import java.beans.PropertyDescriptor;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.lang.annotation.Annotation;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.lang.reflect.Proxy;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.ServiceLoader;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-import javax.annotation.Nullable;
-
-/**
- * Constructs a {@link PipelineOptions} or any derived interface that is composable to any other
- * derived interface of {@link PipelineOptions} via the {@link PipelineOptions#as} method. Being
- * able to compose one derived interface of {@link PipelineOptions} to another has the following
- * restrictions:
- * <ul>
- *   <li>Any property with the same name must have the same return type for all derived interfaces
- *       of {@link PipelineOptions}.
- *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
- *       getter and setter method.
- *   <li>Every method must conform to being a getter or setter for a JavaBean.
- *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
- *       registered with this factory.
- * </ul>
- *
- * <p>See the <a
- * href="http://www.oracle.com/technetwork/java/javase/documentation/spec-136004.html">JavaBeans
- * specification</a> for more details as to what constitutes a property.
- */
-public class PipelineOptionsFactory {
-  /**
-   * Creates and returns an object that implements {@link PipelineOptions}.
-   * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
-   * {@link Class#getSimpleName() classes simple name}.
-   *
-   * @return An object that implements {@link PipelineOptions}.
-   */
-  public static PipelineOptions create() {
-    return new Builder().as(PipelineOptions.class);
-  }
-
-  /**
-   * Creates and returns an object that implements {@code <T>}.
-   * This sets the {@link ApplicationNameOptions#getAppName() "appName"} to the calling
-   * {@link Class#getSimpleName() classes simple name}.
-   *
-   * <p>Note that {@code <T>} must be composable with every registered interface with this factory.
-   * See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more details.
-   *
-   * @return An object that implements {@code <T>}.
-   */
-  public static <T extends PipelineOptions> T as(Class<T> klass) {
-    return new Builder().as(klass);
-  }
-
-  /**
-   * Sets the command line arguments to parse when constructing the {@link PipelineOptions}.
-   *
-   * <p>Example GNU style command line arguments:
-   * <pre>
-   *   --project=MyProject (simple property, will set the "project" property to "MyProject")
-   *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
-   *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-   *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
-   *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
-   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
-   * </pre>
-   *
-   * <p>Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
-   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
-   * {@code float}, {@code double} and their primitive wrapper classes.
-   *
-   * <p>Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
-   * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-   *
-   * <p>JSON format is required for all other types.
-   *
-   * <p>By default, strict parsing is enabled and arguments must conform to be either
-   * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
-   * {@link Builder#withoutStrictParsing()}. Empty or null arguments will be ignored whether
-   * or not strict parsing is enabled.
-   *
-   * <p>Help information can be output to {@link System#out} by specifying {@code --help} as an
-   * argument. After help is printed, the application will exit. Specifying only {@code --help}
-   * will print out the list of
-   * {@link PipelineOptionsFactory#getRegisteredOptions() registered options}
-   * by invoking {@link PipelineOptionsFactory#printHelp(PrintStream)}. Specifying
-   * {@code --help=PipelineOptionsClassName} will print out detailed usage information about the
-   * specifically requested PipelineOptions by invoking
-   * {@link PipelineOptionsFactory#printHelp(PrintStream, Class)}.
-   */
-  public static Builder fromArgs(String[] args) {
-    return new Builder().fromArgs(args);
-  }
-
-  /**
-   * After creation we will validate that {@code <T>} conforms to all the
-   * validation criteria. See
-   * {@link PipelineOptionsValidator#validate(Class, PipelineOptions)} for more details about
-   * validation.
-   */
-  public Builder withValidation() {
-    return new Builder().withValidation();
-  }
-
-  /** A fluent {@link PipelineOptions} builder. */
-  public static class Builder {
-    private final String defaultAppName;
-    private final String[] args;
-    private final boolean validation;
-    private final boolean strictParsing;
-
-    // Do not allow direct instantiation
-    private Builder() {
-      this(null, false, true);
-    }
-
-    private Builder(String[] args, boolean validation,
-        boolean strictParsing) {
-      this.defaultAppName = findCallersClassName();
-      this.args = args;
-      this.validation = validation;
-      this.strictParsing = strictParsing;
-    }
-
-    /**
-     * Sets the command line arguments to parse when constructing the {@link PipelineOptions}.
-     *
-     * <p>Example GNU style command line arguments:
-     * <pre>
-     *   --project=MyProject (simple property, will set the "project" property to "MyProject")
-     *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
-     *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-     *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
-     *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
-     *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
-     * </pre>
-     *
-     * <p>Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
-     * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
-     * {@code float}, {@code double} and their primitive wrapper classes.
-     *
-     * <p>Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
-     * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-     * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-     *
-     * <p>JSON format is required for all other types.
-     *
-     * <p>By default, strict parsing is enabled and arguments must conform to be either
-     * {@code --booleanArgName} or {@code --argName=argValue}. Strict parsing can be disabled with
-     * {@link Builder#withoutStrictParsing()}. Empty or null arguments will be ignored whether
-     * or not strict parsing is enabled.
-     *
-     * <p>Help information can be output to {@link System#out} by specifying {@code --help} as an
-     * argument. After help is printed, the application will exit. Specifying only {@code --help}
-     * will print out the list of
-     * {@link PipelineOptionsFactory#getRegisteredOptions() registered options}
-     * by invoking {@link PipelineOptionsFactory#printHelp(PrintStream)}. Specifying
-     * {@code --help=PipelineOptionsClassName} will print out detailed usage information about the
-     * specifically requested PipelineOptions by invoking
-     * {@link PipelineOptionsFactory#printHelp(PrintStream, Class)}.
-     */
-    public Builder fromArgs(String[] args) {
-      Preconditions.checkNotNull(args, "Arguments should not be null.");
-      return new Builder(args, validation, strictParsing);
-    }
-
-    /**
-     * After creation we will validate that {@link PipelineOptions} conforms to all the
-     * validation criteria from {@code <T>}. See
-     * {@link PipelineOptionsValidator#validate(Class, PipelineOptions)} for more details about
-     * validation.
-     */
-    public Builder withValidation() {
-      return new Builder(args, true, strictParsing);
-    }
-
-    /**
-     * During parsing of the arguments, we will skip over improperly formatted and unknown
-     * arguments.
-     */
-    public Builder withoutStrictParsing() {
-      return new Builder(args, validation, false);
-    }
-
-    /**
-     * Creates and returns an object that implements {@link PipelineOptions} using the values
-     * configured on this builder during construction.
-     *
-     * @return An object that implements {@link PipelineOptions}.
-     */
-    public PipelineOptions create() {
-      return as(PipelineOptions.class);
-    }
-
-    /**
-     * Creates and returns an object that implements {@code <T>} using the values configured on
-     * this builder during construction.
-     *
-     * <p>Note that {@code <T>} must be composable with every registered interface with this
-     * factory. See {@link PipelineOptionsFactory#validateWellFormed(Class, Set)} for more
-     * details.
-     *
-     * @return An object that implements {@code <T>}.
-     */
-    public <T extends PipelineOptions> T as(Class<T> klass) {
-      Map<String, Object> initialOptions = Maps.newHashMap();
-
-      // Attempt to parse the arguments into the set of initial options to use
-      if (args != null) {
-        ListMultimap<String, String> options = parseCommandLine(args, strictParsing);
-        LOG.debug("Provided Arguments: {}", options);
-        printHelpUsageAndExitIfNeeded(options, System.out, true /* exit */);
-        initialOptions = parseObjects(klass, options, strictParsing);
-      }
-
-      // Create our proxy
-      ProxyInvocationHandler handler = new ProxyInvocationHandler(initialOptions);
-      T t = handler.as(klass);
-
-      // Set the application name to the default if none was set.
-      ApplicationNameOptions appNameOptions = t.as(ApplicationNameOptions.class);
-      if (appNameOptions.getAppName() == null) {
-        appNameOptions.setAppName(defaultAppName);
-      }
-
-      if (validation) {
-        PipelineOptionsValidator.validate(klass, t);
-      }
-      return t;
-    }
-  }
-
-  /**
-   * Determines whether the generic {@code --help} was requested or help was
-   * requested for a specific class and invokes the appropriate
-   * {@link PipelineOptionsFactory#printHelp(PrintStream)} and
-   * {@link PipelineOptionsFactory#printHelp(PrintStream, Class)} variant.
-   * Prints to the specified {@link PrintStream}, and exits if requested.
-   *
-   * <p>Visible for testing.
-   * {@code printStream} and {@code exit} used for testing.
-   */
-  @SuppressWarnings("unchecked")
-  static boolean printHelpUsageAndExitIfNeeded(ListMultimap<String, String> options,
-      PrintStream printStream, boolean exit) {
-    if (options.containsKey("help")) {
-      final String helpOption = Iterables.getOnlyElement(options.get("help"));
-
-      // Print the generic help if only --help was specified.
-      if (Boolean.TRUE.toString().equals(helpOption)) {
-        printHelp(printStream);
-        if (exit) {
-          System.exit(0);
-        } else {
-          return true;
-        }
-      }
-
-      // Otherwise attempt to print the specific help option.
-      try {
-        Class<?> klass = Class.forName(helpOption);
-        if (!PipelineOptions.class.isAssignableFrom(klass)) {
-          throw new ClassNotFoundException("PipelineOptions of type " + klass + " not found.");
-        }
-        printHelp(printStream, (Class<? extends PipelineOptions>) klass);
-      } catch (ClassNotFoundException e) {
-        // If we didn't find an exact match, look for any that match the class name.
-        Iterable<Class<? extends PipelineOptions>> matches = Iterables.filter(
-            getRegisteredOptions(),
-            new Predicate<Class<? extends PipelineOptions>>() {
-              @Override
-              public boolean apply(Class<? extends PipelineOptions> input) {
-                if (helpOption.contains(".")) {
-                  return input.getName().endsWith(helpOption);
-                } else {
-                  return input.getSimpleName().equals(helpOption);
-                }
-              }
-          });
-        try {
-          printHelp(printStream, Iterables.getOnlyElement(matches));
-        } catch (NoSuchElementException exception) {
-          printStream.format("Unable to find option %s.%n", helpOption);
-          printHelp(printStream);
-        } catch (IllegalArgumentException exception) {
-          printStream.format("Multiple matches found for %s: %s.%n", helpOption,
-              Iterables.transform(matches, ReflectHelpers.CLASS_NAME));
-          printHelp(printStream);
-        }
-      }
-      if (exit) {
-        System.exit(0);
-      } else {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  /**
-   * Returns the simple name of the calling class using the current threads stack.
-   */
-  private static String findCallersClassName() {
-    Iterator<StackTraceElement> elements =
-        Iterators.forArray(Thread.currentThread().getStackTrace());
-    // First find the PipelineOptionsFactory/Builder class in the stack trace.
-    while (elements.hasNext()) {
-      StackTraceElement next = elements.next();
-      if (PIPELINE_OPTIONS_FACTORY_CLASSES.contains(next.getClassName())) {
-        break;
-      }
-    }
-    // Then find the first instance after that is not the PipelineOptionsFactory/Builder class.
-    while (elements.hasNext()) {
-      StackTraceElement next = elements.next();
-      if (!PIPELINE_OPTIONS_FACTORY_CLASSES.contains(next.getClassName())) {
-        try {
-          return Class.forName(next.getClassName()).getSimpleName();
-        } catch (ClassNotFoundException e) {
-          break;
-        }
-      }
-    }
-
-    return "unknown";
-  }
-
-  /**
-   * Stores the generated proxyClass and its respective {@link BeanInfo} object.
-   *
-   * @param <T> The type of the proxyClass.
-   */
-  static class Registration<T extends PipelineOptions> {
-    private final Class<T> proxyClass;
-    private final List<PropertyDescriptor> propertyDescriptors;
-
-    public Registration(Class<T> proxyClass, List<PropertyDescriptor> beanInfo) {
-      this.proxyClass = proxyClass;
-      this.propertyDescriptors = beanInfo;
-    }
-
-    List<PropertyDescriptor> getPropertyDescriptors() {
-      return propertyDescriptors;
-    }
-
-    Class<T> getProxyClass() {
-      return proxyClass;
-    }
-  }
-
-  private static final Set<Class<?>> SIMPLE_TYPES = ImmutableSet.<Class<?>>builder()
-      .add(boolean.class)
-      .add(Boolean.class)
-      .add(char.class)
-      .add(Character.class)
-      .add(short.class)
-      .add(Short.class)
-      .add(int.class)
-      .add(Integer.class)
-      .add(long.class)
-      .add(Long.class)
-      .add(float.class)
-      .add(Float.class)
-      .add(double.class)
-      .add(Double.class)
-      .add(String.class)
-      .add(Class.class).build();
-  private static final Logger LOG = LoggerFactory.getLogger(PipelineOptionsFactory.class);
-  @SuppressWarnings("rawtypes")
-  private static final Class<?>[] EMPTY_CLASS_ARRAY = new Class[0];
-  private static final ObjectMapper MAPPER = new ObjectMapper();
-  private static final Map<String, Class<? extends PipelineRunner<?>>> SUPPORTED_PIPELINE_RUNNERS;
-
-  /** Classes that are used as the boundary in the stack trace to find the callers class name. */
-  private static final Set<String> PIPELINE_OPTIONS_FACTORY_CLASSES =
-      ImmutableSet.of(PipelineOptionsFactory.class.getName(), Builder.class.getName());
-
-  /** Methods that are ignored when validating the proxy class. */
-  private static final Set<Method> IGNORED_METHODS;
-
-  /** A predicate that checks if a method is synthetic via {@link Method#isSynthetic()}. */
-  private static final Predicate<Method> NOT_SYNTHETIC_PREDICATE =
-      new Predicate<Method>() {
-        @Override
-        public boolean apply(Method input) {
-          return !input.isSynthetic();
-        }
-      };
-
-  /** The set of options that have been registered and visible to the user. */
-  private static final Set<Class<? extends PipelineOptions>> REGISTERED_OPTIONS =
-      Sets.newConcurrentHashSet();
-
-  /** A cache storing a mapping from a given interface to its registration record. */
-  private static final Map<Class<? extends PipelineOptions>, Registration<?>> INTERFACE_CACHE =
-      Maps.newConcurrentMap();
-
-  /** A cache storing a mapping from a set of interfaces to its registration record. */
-  private static final Map<Set<Class<? extends PipelineOptions>>, Registration<?>> COMBINED_CACHE =
-      Maps.newConcurrentMap();
-
-  /** The width at which options should be output. */
-  private static final int TERMINAL_WIDTH = 80;
-
-  /**
-   * Finds the appropriate {@code ClassLoader} to be used by the
-   * {@link ServiceLoader#load} call, which by default would use the context
-   * {@code ClassLoader}, which can be null. The fallback is as follows: context
-   * ClassLoader, class ClassLoader and finaly the system ClassLoader.
-   */
-  static ClassLoader findClassLoader() {
-    ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
-    if (classLoader == null) {
-      classLoader = PipelineOptionsFactory.class.getClassLoader();
-    }
-    if (classLoader == null) {
-      classLoader = ClassLoader.getSystemClassLoader();
-    }
-    return classLoader;
-  }
-
-  static {
-    try {
-      IGNORED_METHODS = ImmutableSet.<Method>builder()
-          .add(Object.class.getMethod("getClass"))
-          .add(Object.class.getMethod("wait"))
-          .add(Object.class.getMethod("wait", long.class))
-          .add(Object.class.getMethod("wait", long.class, int.class))
-          .add(Object.class.getMethod("notify"))
-          .add(Object.class.getMethod("notifyAll"))
-          .add(Proxy.class.getMethod("getInvocationHandler", Object.class))
-          .build();
-    } catch (NoSuchMethodException | SecurityException e) {
-      LOG.error("Unable to find expected method", e);
-      throw new ExceptionInInitializerError(e);
-    }
-
-    ClassLoader classLoader = findClassLoader();
-
-    // Store the list of all available pipeline runners.
-    ImmutableMap.Builder<String, Class<? extends PipelineRunner<?>>> builder =
-            ImmutableMap.builder();
-    Set<PipelineRunnerRegistrar> pipelineRunnerRegistrars =
-        Sets.newTreeSet(ObjectsClassComparator.INSTANCE);
-    pipelineRunnerRegistrars.addAll(
-        Lists.newArrayList(ServiceLoader.load(PipelineRunnerRegistrar.class, classLoader)));
-    for (PipelineRunnerRegistrar registrar : pipelineRunnerRegistrars) {
-      for (Class<? extends PipelineRunner<?>> klass : registrar.getPipelineRunners()) {
-        builder.put(klass.getSimpleName(), klass);
-      }
-    }
-    SUPPORTED_PIPELINE_RUNNERS = builder.build();
-
-    // Load and register the list of all classes that extend PipelineOptions.
-    register(PipelineOptions.class);
-    Set<PipelineOptionsRegistrar> pipelineOptionsRegistrars =
-        Sets.newTreeSet(ObjectsClassComparator.INSTANCE);
-    pipelineOptionsRegistrars.addAll(
-        Lists.newArrayList(ServiceLoader.load(PipelineOptionsRegistrar.class, classLoader)));
-    for (PipelineOptionsRegistrar registrar : pipelineOptionsRegistrars) {
-      for (Class<? extends PipelineOptions> klass : registrar.getPipelineOptions()) {
-        register(klass);
-      }
-    }
-  }
-
-  /**
-   * This registers the interface with this factory. This interface must conform to the following
-   * restrictions:
-   * <ul>
-   *   <li>Any property with the same name must have the same return type for all derived
-   *       interfaces of {@link PipelineOptions}.
-   *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
-   *       getter and setter method.
-   *   <li>Every method must conform to being a getter or setter for a JavaBean.
-   *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
-   *       registered with this factory.
-   * </ul>
-   *
-   * @param iface The interface object to manually register.
-   */
-  public static synchronized void register(Class<? extends PipelineOptions> iface) {
-    Preconditions.checkNotNull(iface);
-    Preconditions.checkArgument(iface.isInterface(), "Only interface types are supported.");
-
-    if (REGISTERED_OPTIONS.contains(iface)) {
-      return;
-    }
-    validateWellFormed(iface, REGISTERED_OPTIONS);
-    REGISTERED_OPTIONS.add(iface);
-  }
-
-  /**
-   * Validates that the interface conforms to the following:
-   * <ul>
-   *   <li>Any property with the same name must have the same return type for all derived
-   *       interfaces of {@link PipelineOptions}.
-   *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
-   *       getter and setter method.
-   *   <li>Every method must conform to being a getter or setter for a JavaBean.
-   *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
-   *       part of allPipelineOptionsClasses.
-   *   <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
-   *   <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
-   *       this property must be annotated with {@link JsonIgnore @JsonIgnore}.
-   * </ul>
-   *
-   * @param iface The interface to validate.
-   * @param validatedPipelineOptionsInterfaces The set of validated pipeline options interfaces to
-   *        validate against.
-   * @return A registration record containing the proxy class and bean info for iface.
-   */
-  static synchronized <T extends PipelineOptions> Registration<T> validateWellFormed(
-      Class<T> iface, Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces) {
-    Preconditions.checkArgument(iface.isInterface(), "Only interface types are supported.");
-
-    @SuppressWarnings("unchecked")
-    Set<Class<? extends PipelineOptions>> combinedPipelineOptionsInterfaces =
-        FluentIterable.from(validatedPipelineOptionsInterfaces).append(iface).toSet();
-    // Validate that the view of all currently passed in options classes is well formed.
-    if (!COMBINED_CACHE.containsKey(combinedPipelineOptionsInterfaces)) {
-      @SuppressWarnings("unchecked")
-      Class<T> allProxyClass =
-          (Class<T>) Proxy.getProxyClass(PipelineOptionsFactory.class.getClassLoader(),
-              combinedPipelineOptionsInterfaces.toArray(EMPTY_CLASS_ARRAY));
-      try {
-        List<PropertyDescriptor> propertyDescriptors =
-            validateClass(iface, validatedPipelineOptionsInterfaces, allProxyClass);
-        COMBINED_CACHE.put(combinedPipelineOptionsInterfaces,
-            new Registration<T>(allProxyClass, propertyDescriptors));
-      } catch (IntrospectionException e) {
-        throw Throwables.propagate(e);
-      }
-    }
-
-    // Validate that the local view of the class is well formed.
-    if (!INTERFACE_CACHE.containsKey(iface)) {
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      Class<T> proxyClass = (Class<T>) Proxy.getProxyClass(
-          PipelineOptionsFactory.class.getClassLoader(), new Class[] {iface});
-      try {
-        List<PropertyDescriptor> propertyDescriptors =
-            validateClass(iface, validatedPipelineOptionsInterfaces, proxyClass);
-        INTERFACE_CACHE.put(iface,
-            new Registration<T>(proxyClass, propertyDescriptors));
-      } catch (IntrospectionException e) {
-        throw Throwables.propagate(e);
-      }
-    }
-    @SuppressWarnings("unchecked")
-    Registration<T> result = (Registration<T>) INTERFACE_CACHE.get(iface);
-    return result;
-  }
-
-  public static Set<Class<? extends PipelineOptions>> getRegisteredOptions() {
-    return Collections.unmodifiableSet(REGISTERED_OPTIONS);
-  }
-
-  /**
-   * Outputs the set of registered options with the PipelineOptionsFactory
-   * with a description for each one if available to the output stream. This output
-   * is pretty printed and meant to be human readable. This method will attempt to
-   * format its output to be compatible with a terminal window.
-   */
-  public static void printHelp(PrintStream out) {
-    Preconditions.checkNotNull(out);
-    out.println("The set of registered options are:");
-    Set<Class<? extends PipelineOptions>> sortedOptions =
-        new TreeSet<>(ClassNameComparator.INSTANCE);
-    sortedOptions.addAll(REGISTERED_OPTIONS);
-    for (Class<? extends PipelineOptions> kls : sortedOptions) {
-      out.format("  %s%n", kls.getName());
-    }
-    out.format("%nUse --help=<OptionsName> for detailed help. For example:%n"
-        + "  --help=DataflowPipelineOptions <short names valid for registered options>%n"
-        + "  --help=com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions%n");
-  }
-
-  /**
-   * Outputs the set of options available to be set for the passed in {@link PipelineOptions}
-   * interface. The output is in a human readable format. The format is:
-   * <pre>
-   * OptionGroup:
-   *     ... option group description ...
-   *
-   *  --option1={@code <type>} or list of valid enum choices
-   *     Default: value (if available, see {@link Default})
-   *     ... option description ... (if available, see {@link Description})
-   *     Required groups (if available, see {@link Required})
-   *  --option2={@code <type>} or list of valid enum choices
-   *     Default: value (if available, see {@link Default})
-   *     ... option description ... (if available, see {@link Description})
-   *     Required groups (if available, see {@link Required})
-   * </pre>
-   * This method will attempt to format its output to be compatible with a terminal window.
-   */
-  public static void printHelp(PrintStream out, Class<? extends PipelineOptions> iface) {
-    Preconditions.checkNotNull(out);
-    Preconditions.checkNotNull(iface);
-    validateWellFormed(iface, REGISTERED_OPTIONS);
-
-    Iterable<Method> methods =
-        Iterables.filter(
-            ReflectHelpers.getClosureOfMethodsOnInterface(iface), NOT_SYNTHETIC_PREDICATE);
-    ListMultimap<Class<?>, Method> ifaceToMethods = ArrayListMultimap.create();
-    for (Method method : methods) {
-      // Process only methods that are not marked as hidden.
-      if (method.getAnnotation(Hidden.class) == null) {
-        ifaceToMethods.put(method.getDeclaringClass(), method);
-      }
-    }
-    SortedSet<Class<?>> ifaces = new TreeSet<>(ClassNameComparator.INSTANCE);
-    // Keep interfaces that are not marked as hidden.
-    ifaces.addAll(Collections2.filter(ifaceToMethods.keySet(), new Predicate<Class<?>>() {
-      @Override
-      public boolean apply(Class<?> input) {
-        return input.getAnnotation(Hidden.class) == null;
-      }
-    }));
-    for (Class<?> currentIface : ifaces) {
-      Map<String, Method> propertyNamesToGetters =
-          getPropertyNamesToGetters(ifaceToMethods.get(currentIface));
-
-      // Don't output anything if there are no defined options
-      if (propertyNamesToGetters.isEmpty()) {
-        continue;
-      }
-      SortedSetMultimap<String, String> requiredGroupNameToProperties =
-          getRequiredGroupNamesToProperties(propertyNamesToGetters);
-
-      out.format("%s:%n", currentIface.getName());
-      prettyPrintDescription(out, currentIface.getAnnotation(Description.class));
-
-      out.println();
-
-      List<String> lists = Lists.newArrayList(propertyNamesToGetters.keySet());
-      Collections.sort(lists, String.CASE_INSENSITIVE_ORDER);
-      for (String propertyName : lists) {
-        Method method = propertyNamesToGetters.get(propertyName);
-        String printableType = method.getReturnType().getSimpleName();
-        if (method.getReturnType().isEnum()) {
-          printableType = Joiner.on(" | ").join(method.getReturnType().getEnumConstants());
-        }
-        out.format("  --%s=<%s>%n", propertyName, printableType);
-        Optional<String> defaultValue = getDefaultValueFromAnnotation(method);
-        if (defaultValue.isPresent()) {
-          out.format("    Default: %s%n", defaultValue.get());
-        }
-        prettyPrintDescription(out, method.getAnnotation(Description.class));
-        prettyPrintRequiredGroups(out, method.getAnnotation(Validation.Required.class),
-            requiredGroupNameToProperties);
-      }
-      out.println();
-    }
-  }
-
-  /**
-   * Output the requirement groups that the property is a member of, including all properties that
-   * satisfy the group requirement, breaking up long lines on white space characters and attempting
-   * to honor a line limit of {@code TERMINAL_WIDTH}.
-   */
-  private static void prettyPrintRequiredGroups(PrintStream out, Required annotation,
-      SortedSetMultimap<String, String> requiredGroupNameToProperties) {
-    if (annotation == null || annotation.groups() == null) {
-      return;
-    }
-    for (String group : annotation.groups()) {
-      SortedSet<String> groupMembers = requiredGroupNameToProperties.get(group);
-      String requirement;
-      if (groupMembers.size() == 1) {
-        requirement = Iterables.getOnlyElement(groupMembers) + " is required.";
-      } else {
-        requirement = "At least one of " + groupMembers + " is required";
-      }
-      terminalPrettyPrint(out, requirement.split("\\s+"));
-    }
-  }
-
-  /**
-   * Outputs the value of the description, breaking up long lines on white space characters and
-   * attempting to honor a line limit of {@code TERMINAL_WIDTH}.
-   */
-  private static void prettyPrintDescription(PrintStream out, Description description) {
-    if (description == null || description.value() == null) {
-      return;
-    }
-
-    String[] words = description.value().split("\\s+");
-    terminalPrettyPrint(out, words);
-  }
-
-  private static void terminalPrettyPrint(PrintStream out, String[] words) {
-    final String spacing = "   ";
-
-    if (words.length == 0) {
-      return;
-    }
-
-    out.print(spacing);
-    int lineLength = spacing.length();
-    for (int i = 0; i < words.length; ++i) {
-      out.print(" ");
-      out.print(words[i]);
-      lineLength += 1 + words[i].length();
-
-      // If the next word takes us over the terminal width, then goto the next line.
-      if (i + 1 != words.length && words[i + 1].length() + lineLength + 1 > TERMINAL_WIDTH) {
-        out.println();
-        out.print(spacing);
-        lineLength = spacing.length();
-      }
-    }
-    out.println();
-  }
-
-  /**
-   * Returns a string representation of the {@link Default} value on the passed in method.
-   */
-  private static Optional<String> getDefaultValueFromAnnotation(Method method) {
-    for (Annotation annotation : method.getAnnotations()) {
-      if (annotation instanceof Default.Class) {
-        return Optional.of(((Default.Class) annotation).value().getSimpleName());
-      } else if (annotation instanceof Default.String) {
-        return Optional.of(((Default.String) annotation).value());
-      } else if (annotation instanceof Default.Boolean) {
-        return Optional.of(Boolean.toString(((Default.Boolean) annotation).value()));
-      } else if (annotation instanceof Default.Character) {
-        return Optional.of(Character.toString(((Default.Character) annotation).value()));
-      } else if (annotation instanceof Default.Byte) {
-        return Optional.of(Byte.toString(((Default.Byte) annotation).value()));
-      } else if (annotation instanceof Default.Short) {
-        return Optional.of(Short.toString(((Default.Short) annotation).value()));
-      } else if (annotation instanceof Default.Integer) {
-        return Optional.of(Integer.toString(((Default.Integer) annotation).value()));
-      } else if (annotation instanceof Default.Long) {
-        return Optional.of(Long.toString(((Default.Long) annotation).value()));
-      } else if (annotation instanceof Default.Float) {
-        return Optional.of(Float.toString(((Default.Float) annotation).value()));
-      } else if (annotation instanceof Default.Double) {
-        return Optional.of(Double.toString(((Default.Double) annotation).value()));
-      } else if (annotation instanceof Default.Enum) {
-        return Optional.of(((Default.Enum) annotation).value());
-      } else if (annotation instanceof Default.InstanceFactory) {
-        return Optional.of(((Default.InstanceFactory) annotation).value().getSimpleName());
-      }
-    }
-    return Optional.absent();
-  }
-
-  static Map<String, Class<? extends PipelineRunner<?>>> getRegisteredRunners() {
-    return SUPPORTED_PIPELINE_RUNNERS;
-  }
-
-  static List<PropertyDescriptor> getPropertyDescriptors(
-      Set<Class<? extends PipelineOptions>> interfaces) {
-    return COMBINED_CACHE.get(interfaces).getPropertyDescriptors();
-  }
-
-  /**
-   * Creates a set of Dataflow worker harness options based of a set of known system
-   * properties. This is meant to only be used from the Dataflow worker harness as a method to
-   * bootstrap the worker harness.
-   *
-   * <p>For internal use only.
-   *
-   * @return A {@link DataflowWorkerHarnessOptions} object configured for the
-   *         Dataflow worker harness.
-   */
-  public static DataflowWorkerHarnessOptions createFromSystemPropertiesInternal()
-      throws IOException {
-    return createFromSystemProperties();
-  }
-
-  /**
-   * Creates a set of {@link DataflowWorkerHarnessOptions} based of a set of known system
-   * properties. This is meant to only be used from the Dataflow worker harness as a method to
-   * bootstrap the worker harness.
-   *
-   * @return A {@link DataflowWorkerHarnessOptions} object configured for the
-   *         Dataflow worker harness.
-   * @deprecated for internal use only
-   */
-  @Deprecated
-  public static DataflowWorkerHarnessOptions createFromSystemProperties() throws IOException {
-    ObjectMapper objectMapper = new ObjectMapper();
-    DataflowWorkerHarnessOptions options;
-    if (System.getProperties().containsKey("sdk_pipeline_options")) {
-      String serializedOptions = System.getProperty("sdk_pipeline_options");
-      LOG.info("Worker harness starting with: " + serializedOptions);
-      options = objectMapper.readValue(serializedOptions, PipelineOptions.class)
-          .as(DataflowWorkerHarnessOptions.class);
-    } else {
-      options = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
-    }
-
-    // These values will not be known at job submission time and must be provided.
-    if (System.getProperties().containsKey("worker_id")) {
-      options.setWorkerId(System.getProperty("worker_id"));
-    }
-    if (System.getProperties().containsKey("job_id")) {
-      options.setJobId(System.getProperty("job_id"));
-    }
-
-    return options;
-  }
-
-  /**
-   * This method is meant to emulate the behavior of {@link Introspector#getBeanInfo(Class, int)}
-   * to construct the list of {@link PropertyDescriptor}.
-   *
-   * <p>TODO: Swap back to using Introspector once the proxy class issue with AppEngine is
-   * resolved.
-   */
-  private static List<PropertyDescriptor> getPropertyDescriptors(Class<?> beanClass)
-      throws IntrospectionException {
-    // The sorting is important to make this method stable.
-    SortedSet<Method> methods = Sets.newTreeSet(MethodComparator.INSTANCE);
-    methods.addAll(
-        Collections2.filter(Arrays.asList(beanClass.getMethods()), NOT_SYNTHETIC_PREDICATE));
-    SortedMap<String, Method> propertyNamesToGetters = getPropertyNamesToGetters(methods);
-    List<PropertyDescriptor> descriptors = Lists.newArrayList();
-
-    List<TypeMismatch> mismatches = new ArrayList<>();
-    /*
-     * Add all the getter/setter pairs to the list of descriptors removing the getter once
-     * it has been paired up.
-     */
-    for (Method method : methods) {
-      String methodName = method.getName();
-      if (!methodName.startsWith("set")
-          || method.getParameterTypes().length != 1
-          || method.getReturnType() != void.class) {
-        continue;
-      }
-      String propertyName = Introspector.decapitalize(methodName.substring(3));
-      Method getterMethod = propertyNamesToGetters.remove(propertyName);
-
-      // Validate that the getter and setter property types are the same.
-      if (getterMethod != null) {
-        Class<?> getterPropertyType = getterMethod.getReturnType();
-        Class<?> setterPropertyType = method.getParameterTypes()[0];
-        if (getterPropertyType != setterPropertyType) {
-          TypeMismatch mismatch = new TypeMismatch();
-          mismatch.propertyName = propertyName;
-          mismatch.getterPropertyType = getterPropertyType;
-          mismatch.setterPropertyType = setterPropertyType;
-          mismatches.add(mismatch);
-          continue;
-        }
-      }
-
-      descriptors.add(new PropertyDescriptor(
-          propertyName, getterMethod, method));
-    }
-    throwForTypeMismatches(mismatches);
-
-    // Add the remaining getters with missing setters.
-    for (Map.Entry<String, Method> getterToMethod : propertyNamesToGetters.entrySet()) {
-      descriptors.add(new PropertyDescriptor(
-          getterToMethod.getKey(), getterToMethod.getValue(), null));
-    }
-    return descriptors;
-  }
-
-  private static class TypeMismatch {
-    private String propertyName;
-    private Class<?> getterPropertyType;
-    private Class<?> setterPropertyType;
-  }
-
-  private static void throwForTypeMismatches(List<TypeMismatch> mismatches) {
-    if (mismatches.size() == 1) {
-      TypeMismatch mismatch = mismatches.get(0);
-      throw new IllegalArgumentException(String.format(
-          "Type mismatch between getter and setter methods for property [%s]. "
-          + "Getter is of type [%s] whereas setter is of type [%s].",
-          mismatch.propertyName,
-          mismatch.getterPropertyType.getName(),
-          mismatch.setterPropertyType.getName()));
-    } else if (mismatches.size() > 1) {
-      StringBuilder builder = new StringBuilder(
-          String.format("Type mismatches between getters and setters detected:"));
-      for (TypeMismatch mismatch : mismatches) {
-        builder.append(String.format(
-            "%n  - Property [%s]: Getter is of type [%s] whereas setter is of type [%s].",
-            mismatch.propertyName,
-            mismatch.getterPropertyType.getName(),
-            mismatch.setterPropertyType.getName()));
-      }
-      throw new IllegalArgumentException(builder.toString());
-    }
-  }
-
-  /**
-   * Returns a map of the property name to the getter method it represents.
-   * If there are duplicate methods with the same bean name, then it is indeterminate
-   * as to which method will be returned.
-   */
-  private static SortedMap<String, Method> getPropertyNamesToGetters(Iterable<Method> methods) {
-    SortedMap<String, Method> propertyNamesToGetters = Maps.newTreeMap();
-    for (Method method : methods) {
-      String methodName = method.getName();
-      if ((!methodName.startsWith("get")
-          && !methodName.startsWith("is"))
-          || method.getParameterTypes().length != 0
-          || method.getReturnType() == void.class) {
-        continue;
-      }
-      String propertyName = Introspector.decapitalize(
-          methodName.startsWith("is") ? methodName.substring(2) : methodName.substring(3));
-      propertyNamesToGetters.put(propertyName, method);
-    }
-    return propertyNamesToGetters;
-  }
-
-  /**
-   * Returns a map of required groups of arguments to the properties that satisfy the requirement.
-   */
-  private static SortedSetMultimap<String, String> getRequiredGroupNamesToProperties(
-      Map<String, Method> propertyNamesToGetters) {
-    SortedSetMultimap<String, String> result = TreeMultimap.create();
-    for (Map.Entry<String, Method> propertyEntry : propertyNamesToGetters.entrySet()) {
-      Required requiredAnnotation =
-          propertyEntry.getValue().getAnnotation(Validation.Required.class);
-      if (requiredAnnotation != null) {
-        for (String groupName : requiredAnnotation.groups()) {
-          result.put(groupName, propertyEntry.getKey());
-        }
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Validates that a given class conforms to the following properties:
-   * <ul>
-   *   <li>Any property with the same name must have the same return type for all derived
-   *       interfaces of {@link PipelineOptions}.
-   *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
-   *       getter and setter method.
-   *   <li>Every method must conform to being a getter or setter for a JavaBean.
-   *   <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
-   *   <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
-   *       this property must be annotated with {@link JsonIgnore @JsonIgnore}.
-   * </ul>
-   *
-   * @param iface The interface to validate.
-   * @param validatedPipelineOptionsInterfaces The set of validated pipeline options interfaces to
-   *        validate against.
-   * @param klass The proxy class representing the interface.
-   * @return A list of {@link PropertyDescriptor}s representing all valid bean properties of
-   *         {@code iface}.
-   * @throws IntrospectionException if invalid property descriptors.
-   */
-  private static List<PropertyDescriptor> validateClass(Class<? extends PipelineOptions> iface,
-      Set<Class<? extends PipelineOptions>> validatedPipelineOptionsInterfaces,
-      Class<?> klass) throws IntrospectionException {
-    Set<Method> methods = Sets.newHashSet(IGNORED_METHODS);
-    // Ignore static methods, "equals", "hashCode", "toString" and "as" on the generated class.
-    // Ignore synthetic methods
-    for (Method method : klass.getMethods()) {
-      if (Modifier.isStatic(method.getModifiers()) || method.isSynthetic()) {
-        methods.add(method);
-      }
-    }
-    try {
-      methods.add(klass.getMethod("equals", Object.class));
-      methods.add(klass.getMethod("hashCode"));
-      methods.add(klass.getMethod("toString"));
-      methods.add(klass.getMethod("as", Class.class));
-      methods.add(klass.getMethod("cloneAs", Class.class));
-    } catch (NoSuchMethodException | SecurityException e) {
-      throw Throwables.propagate(e);
-    }
-
-    // Verify that there are no methods with the same name with two different return types.
-    Iterable<Method> interfaceMethods = FluentIterable
-        .from(ReflectHelpers.getClosureOfMethodsOnInterface(iface))
-        .filter(NOT_SYNTHETIC_PREDICATE)
-        .toSortedSet(MethodComparator.INSTANCE);
-    SortedSetMultimap<Method, Method> methodNameToMethodMap =
-        TreeMultimap.create(MethodNameComparator.INSTANCE, MethodComparator.INSTANCE);
-    for (Method method : interfaceMethods) {
-      methodNameToMethodMap.put(method, method);
-    }
-    List<MultipleDefinitions> multipleDefinitions = Lists.newArrayList();
-    for (Map.Entry<Method, Collection<Method>> entry
-        : methodNameToMethodMap.asMap().entrySet()) {
-      Set<Class<?>> returnTypes = FluentIterable.from(entry.getValue())
-          .transform(ReturnTypeFetchingFunction.INSTANCE).toSet();
-      SortedSet<Method> collidingMethods = FluentIterable.from(entry.getValue())
-          .toSortedSet(MethodComparator.INSTANCE);
-      if (returnTypes.size() > 1) {
-        MultipleDefinitions defs = new MultipleDefinitions();
-        defs.method = entry.getKey();
-        defs.collidingMethods = collidingMethods;
-        multipleDefinitions.add(defs);
-      }
-    }
-    throwForMultipleDefinitions(iface, multipleDefinitions);
-
-    // Verify that there is no getter with a mixed @JsonIgnore annotation and verify
-    // that no setter has @JsonIgnore.
-    Iterable<Method> allInterfaceMethods =
-        FluentIterable.from(
-                ReflectHelpers.getClosureOfMethodsOnInterfaces(
-                    validatedPipelineOptionsInterfaces))
-            .append(ReflectHelpers.getClosureOfMethodsOnInterface(iface))
-            .filter(NOT_SYNTHETIC_PREDICATE)
-            .toSortedSet(MethodComparator.INSTANCE);
-    SortedSetMultimap<Method, Method> methodNameToAllMethodMap =
-        TreeMultimap.create(MethodNameComparator.INSTANCE, MethodComparator.INSTANCE);
-    for (Method method : allInterfaceMethods) {
-      methodNameToAllMethodMap.put(method, method);
-    }
-
-    List<PropertyDescriptor> descriptors = getPropertyDescriptors(klass);
-
-    List<InconsistentlyIgnoredGetters> incompletelyIgnoredGetters = new ArrayList<>();
-    List<IgnoredSetter> ignoredSetters = new ArrayList<>();
-
-    for (PropertyDescriptor descriptor : descriptors) {
-      if (descriptor.getReadMethod() == null
-          || descriptor.getWriteMethod() == null
-          || IGNORED_METHODS.contains(descriptor.getReadMethod())
-          || IGNORED_METHODS.contains(descriptor.getWriteMethod())) {
-        continue;
-      }
-      SortedSet<Method> getters = methodNameToAllMethodMap.get(descriptor.getReadMethod());
-      SortedSet<Method> gettersWithJsonIgnore = Sets.filter(getters, JsonIgnorePredicate.INSTANCE);
-
-      Iterable<String> getterClassNames = FluentIterable.from(getters)
-          .transform(MethodToDeclaringClassFunction.INSTANCE)
-          .transform(ReflectHelpers.CLASS_NAME);
-      Iterable<String> gettersWithJsonIgnoreClassNames = FluentIterable.from(gettersWithJsonIgnore)
-          .transform(MethodToDeclaringClassFunction.INSTANCE)
-          .transform(ReflectHelpers.CLASS_NAME);
-
-      if (!(gettersWithJsonIgnore.isEmpty() || getters.size() == gettersWithJsonIgnore.size())) {
-        InconsistentlyIgnoredGetters err = new InconsistentlyIgnoredGetters();
-        err.descriptor = descriptor;
-        err.getterClassNames = getterClassNames;
-        err.gettersWithJsonIgnoreClassNames = gettersWithJsonIgnoreClassNames;
-        incompletelyIgnoredGetters.add(err);
-      }
-      if (!incompletelyIgnoredGetters.isEmpty()) {
-        continue;
-      }
-
-      SortedSet<Method> settersWithJsonIgnore =
-          Sets.filter(methodNameToAllMethodMap.get(descriptor.getWriteMethod()),
-              JsonIgnorePredicate.INSTANCE);
-
-      Iterable<String> settersWithJsonIgnoreClassNames = FluentIterable.from(settersWithJsonIgnore)
-              .transform(MethodToDeclaringClassFunction.INSTANCE)
-              .transform(ReflectHelpers.CLASS_NAME);
-
-      if (!settersWithJsonIgnore.isEmpty()) {
-        IgnoredSetter ignored = new IgnoredSetter();
-        ignored.descriptor = descriptor;
-        ignored.settersWithJsonIgnoreClassNames = settersWithJsonIgnoreClassNames;
-        ignoredSetters.add(ignored);
-      }
-    }
-    throwForGettersWithInconsistentJsonIgnore(incompletelyIgnoredGetters);
-    throwForSettersWithJsonIgnore(ignoredSetters);
-
-    List<MissingBeanMethod> missingBeanMethods = new ArrayList<>();
-    // Verify that each property has a matching read and write method.
-    for (PropertyDescriptor propertyDescriptor : descriptors) {
-      if (!(IGNORED_METHODS.contains(propertyDescriptor.getWriteMethod())
-        || propertyDescriptor.getReadMethod() != null)) {
-        MissingBeanMethod method = new MissingBeanMethod();
-        method.property = propertyDescriptor;
-        method.methodType = "getter";
-        missingBeanMethods.add(method);
-        continue;
-      }
-      if (!(IGNORED_METHODS.contains(propertyDescriptor.getReadMethod())
-              || propertyDescriptor.getWriteMethod() != null)) {
-        MissingBeanMethod method = new MissingBeanMethod();
-        method.property = propertyDescriptor;
-        method.methodType = "setter";
-        missingBeanMethods.add(method);
-        continue;
-      }
-      methods.add(propertyDescriptor.getReadMethod());
-      methods.add(propertyDescriptor.getWriteMethod());
-    }
-    throwForMissingBeanMethod(iface, missingBeanMethods);
-
-    // Verify that no additional methods are on an interface that aren't a bean property.
-    SortedSet<Method> unknownMethods = new TreeSet<>(MethodComparator.INSTANCE);
-    unknownMethods.addAll(
-        Sets.filter(
-            Sets.difference(Sets.newHashSet(klass.getMethods()), methods),
-            NOT_SYNTHETIC_PREDICATE));
-    Preconditions.checkArgument(unknownMethods.isEmpty(),
-        "Methods %s on [%s] do not conform to being bean properties.",
-        FluentIterable.from(unknownMethods).transform(ReflectHelpers.METHOD_FORMATTER),
-        iface.getName());
-
-    return descriptors;
-  }
-
-  private static class MultipleDefinitions {
-    private Method method;
-    private SortedSet<Method> collidingMethods;
-  }
-
-  private static void throwForMultipleDefinitions(
-      Class<? extends PipelineOptions> iface, List<MultipleDefinitions> definitions) {
-    if (definitions.size() == 1) {
-      MultipleDefinitions errDef = definitions.get(0);
-      throw new IllegalArgumentException(String.format(
-          "Method [%s] has multiple definitions %s with different return types for [%s].",
-          errDef.method.getName(), errDef.collidingMethods, iface.getName()));
-    } else if (definitions.size() > 1) {
-      StringBuilder errorBuilder = new StringBuilder(String.format(
-          "Interface [%s] has Methods with multiple definitions with different return types:",
-          iface.getName()));
-      for (MultipleDefinitions errDef : definitions) {
-        errorBuilder.append(String.format(
-            "%n  - Method [%s] has multiple definitions %s",
-            errDef.method.getName(),
-            errDef.collidingMethods));
-      }
-      throw new IllegalArgumentException(errorBuilder.toString());
-    }
-  }
-
-  private static class InconsistentlyIgnoredGetters {
-    PropertyDescriptor descriptor;
-    Iterable<String> getterClassNames;
-    Iterable<String> gettersWithJsonIgnoreClassNames;
-  }
-
-  private static void throwForGettersWithInconsistentJsonIgnore(
-      List<InconsistentlyIgnoredGetters> getters) {
-    if (getters.size() == 1) {
-      InconsistentlyIgnoredGetters getter = getters.get(0);
-      throw new IllegalArgumentException(String.format(
-          "Expected getter for property [%s] to be marked with @JsonIgnore on all %s, "
-          + "found only on %s",
-          getter.descriptor.getName(), getter.getterClassNames,
-          getter.gettersWithJsonIgnoreClassNames));
-    } else if (getters.size() > 1) {
-      StringBuilder errorBuilder =
-          new StringBuilder("Property getters are inconsistently marked with @JsonIgnore:");
-      for (InconsistentlyIgnoredGetters getter : getters) {
-        errorBuilder.append(
-            String.format("%n  - Expected for property [%s] to be marked on all %s, "
-                + "found only on %s",
-                getter.descriptor.getName(), getter.getterClassNames,
-                getter.gettersWithJsonIgnoreClassNames));
-      }
-      throw new IllegalArgumentException(errorBuilder.toString());
-    }
-  }
-
-  private static class IgnoredSetter {
-    PropertyDescriptor descriptor;
-    Iterable<String> settersWithJsonIgnoreClassNames;
-  }
-
-  private static void throwForSettersWithJsonIgnore(List<IgnoredSetter> setters) {
-    if (setters.size() == 1) {
-      IgnoredSetter setter = setters.get(0);
-      throw new IllegalArgumentException(
-          String.format("Expected setter for property [%s] to not be marked with @JsonIgnore on %s",
-              setter.descriptor.getName(), setter.settersWithJsonIgnoreClassNames));
-    } else if (setters.size() > 1) {
-      StringBuilder builder = new StringBuilder("Found setters marked with @JsonIgnore:");
-      for (IgnoredSetter setter : setters) {
-        builder.append(
-            String.format("%n  - Setter for property [%s] should not be marked with @JsonIgnore "
-                + "on %s",
-                setter.descriptor.getName(), setter.settersWithJsonIgnoreClassNames));
-      }
-      throw new IllegalArgumentException(builder.toString());
-    }
-  }
-
-  private static class MissingBeanMethod {
-    String methodType;
-    PropertyDescriptor property;
-  }
-
-  private static void throwForMissingBeanMethod(
-      Class<? extends PipelineOptions> iface, List<MissingBeanMethod> missingBeanMethods) {
-    if (missingBeanMethods.size() == 1) {
-      MissingBeanMethod missingBeanMethod = missingBeanMethods.get(0);
-      throw new IllegalArgumentException(
-          String.format("Expected %s for property [%s] of type [%s] on [%s].",
-              missingBeanMethod.methodType, missingBeanMethod.property.getName(),
-              missingBeanMethod.property.getPropertyType().getName(), iface.getName()));
-    } else if (missingBeanMethods.size() > 1) {
-      StringBuilder builder = new StringBuilder(String.format(
-          "Found missing property methods on [%s]:", iface.getName()));
-      for (MissingBeanMethod method : missingBeanMethods) {
-        builder.append(
-            String.format("%n  - Expected %s for property [%s] of type [%s]", method.methodType,
-                method.property.getName(), method.property.getPropertyType().getName()));
-      }
-      throw new IllegalArgumentException(builder.toString());
-    }
-  }
-
-  /** A {@link Comparator} that uses the classes name to compare them. */
-  private static class ClassNameComparator implements Comparator<Class<?>> {
-    static final ClassNameComparator INSTANCE = new ClassNameComparator();
-    @Override
-    public int compare(Class<?> o1, Class<?> o2) {
-      return o1.getName().compareTo(o2.getName());
-    }
-  }
-
-  /** A {@link Comparator} that uses the object's classes canonical name to compare them. */
-  private static class ObjectsClassComparator implements Comparator<Object> {
-    static final ObjectsClassComparator INSTANCE = new ObjectsClassComparator();
-    @Override
-    public int compare(Object o1, Object o2) {
-      return o1.getClass().getCanonicalName().compareTo(o2.getClass().getCanonicalName());
-    }
-  }
-
-  /** A {@link Comparator} that uses the generic method signature to sort them. */
-  private static class MethodComparator implements Comparator<Method> {
-    static final MethodComparator INSTANCE = new MethodComparator();
-    @Override
-    public int compare(Method o1, Method o2) {
-      return o1.toGenericString().compareTo(o2.toGenericString());
-    }
-  }
-
-  /** A {@link Comparator} that uses the methods name to compare them. */
-  static class MethodNameComparator implements Comparator<Method> {
-    static final MethodNameComparator INSTANCE = new MethodNameComparator();
-    @Override
-    public int compare(Method o1, Method o2) {
-      return o1.getName().compareTo(o2.getName());
-    }
-  }
-
-  /** A {@link Function} that gets the method's return type. */
-  private static class ReturnTypeFetchingFunction implements Function<Method, Class<?>> {
-    static final ReturnTypeFetchingFunction INSTANCE = new ReturnTypeFetchingFunction();
-    @Override
-    public Class<?> apply(Method input) {
-      return input.getReturnType();
-    }
-  }
-
-  /** A {@link Function} with returns the declaring class for the method. */
-  private static class MethodToDeclaringClassFunction implements Function<Method, Class<?>> {
-    static final MethodToDeclaringClassFunction INSTANCE = new MethodToDeclaringClassFunction();
-    @Override
-    public Class<?> apply(Method input) {
-      return input.getDeclaringClass();
-    }
-  }
-
-  /**
-   * A {@link Predicate} that returns true if the method is annotated with
-   * {@link JsonIgnore @JsonIgnore}.
-   */
-  static class JsonIgnorePredicate implements Predicate<Method> {
-    static final JsonIgnorePredicate INSTANCE = new JsonIgnorePredicate();
-    @Override
-    public boolean apply(Method input) {
-      return input.isAnnotationPresent(JsonIgnore.class);
-    }
-  }
-
-  /**
-   * Splits string arguments based upon expected pattern of --argName=value.
-   *
-   * <p>Example GNU style command line arguments:
-   *
-   * <pre>
-   *   --project=MyProject (simple property, will set the "project" property to "MyProject")
-   *   --readOnly=true (for boolean properties, will set the "readOnly" property to "true")
-   *   --readOnly (shorthand for boolean properties, will set the "readOnly" property to "true")
-   *   --x=1 --x=2 --x=3 (list style simple property, will set the "x" property to [1, 2, 3])
-   *   --x=1,2,3 (shorthand list style simple property, will set the "x" property to [1, 2, 3])
-   *   --complexObject='{"key1":"value1",...} (JSON format for all other complex types)
-   * </pre>
-   *
-   * <p>Simple properties are able to bound to {@link String}, {@link Class}, enums and Java
-   * primitives {@code boolean}, {@code byte}, {@code short}, {@code int}, {@code long},
-   * {@code float}, {@code double} and their primitive wrapper classes.
-   *
-   * <p>Simple list style properties are able to be bound to {@code boolean[]}, {@code char[]},
-   * {@code short[]}, {@code int[]}, {@code long[]}, {@code float[]}, {@code double[]},
-   * {@code Class[]}, enum arrays, {@code String[]}, and {@code List<String>}.
-   *
-   * <p>JSON format is required for all other types.
-   *
-   * <p>If strict parsing is enabled, options must start with '--', and not have an empty argument
-   * name or value based upon the positioning of the '='. Empty or null arguments will be ignored
-   * whether or not strict parsing is enabled.
-   */
-  private static ListMultimap<String, String> parseCommandLine(
-      String[] args, boolean strictParsing) {
-    ImmutableListMultimap.Builder<String, String> builder = ImmutableListMultimap.builder();
-    for (String arg : args) {
-      if (Strings.isNullOrEmpty(arg)) {
-        continue;
-      }
-      try {
-        Preconditions.checkArgument(arg.startsWith("--"),
-            "Argument '%s' does not begin with '--'", arg);
-        int index = arg.indexOf("=");
-        // Make sure that '=' isn't the first character after '--' or the last character
-        Preconditions.checkArgument(index != 2,
-            "Argument '%s' starts with '--=', empty argument name not allowed", arg);
-        if (index > 0) {
-          builder.put(arg.substring(2, index), arg.substring(index + 1, arg.length()));
-        } else {
-          builder.put(arg.substring(2), "true");
-        }
-      } catch (IllegalArgumentException e) {
-        if (strictParsing) {
-          throw e;
-        } else {
-          LOG.warn("Strict parsing is disabled, ignoring option '{}' because {}",
-              arg, e.getMessage());
-        }
-      }
-    }
-    return builder.build();
-  }
-
-  /**
-   * Using the parsed string arguments, we convert the strings to the expected
-   * return type of the methods that are found on the passed-in class.
-   *
-   * <p>For any return type that is expected to be an array or a collection, we further
-   * split up each string on ','.
-   *
-   * <p>We special case the "runner" option. It is mapped to the class of the {@link PipelineRunner}
-   * based off of the {@link PipelineRunner PipelineRunners} simple class name. If the provided
-   * runner name is not registered via a {@link PipelineRunnerRegistrar}, we attempt to obtain the
-   * class that the name represents using {@link Class#forName(String)} and use the result class if
-   * it subclasses {@link PipelineRunner}.
-   *
-   * <p>If strict parsing is enabled, unknown options or options that cannot be converted to
-   * the expected java type using an {@link ObjectMapper} will be ignored.
-   */
-  private static <T extends PipelineOptions> Map<String, Object> parseObjects(
-      Class<T> klass, ListMultimap<String, String> options, boolean strictParsing) {
-    Map<String, Method> propertyNamesToGetters = Maps.newHashMap();
-    PipelineOptionsFactory.validateWellFormed(klass, REGISTERED_OPTIONS);
-    @SuppressWarnings("unchecked")
-    Iterable<PropertyDescriptor> propertyDescriptors =
-        PipelineOptionsFactory.getPropertyDescriptors(
-            FluentIterable.from(getRegisteredOptions()).append(klass).toSet());
-    for (PropertyDescriptor descriptor : propertyDescriptors) {
-      propertyNamesToGetters.put(descriptor.getName(), descriptor.getReadMethod());
-    }
-    Map<String, Object> convertedOptions = Maps.newHashMap();
-    for (final Map.Entry<String, Collection<String>> entry : options.asMap().entrySet()) {
-      try {
-        // Search for close matches for missing properties.
-        // Either off by one or off by two character errors.
-        if (!propertyNamesToGetters.containsKey(entry.getKey())) {
-          SortedSet<String> closestMatches = new TreeSet<String>(
-              Sets.filter(propertyNamesToGetters.keySet(), new Predicate<String>() {
-                @Override
-                public boolean apply(@Nullable String input) {
-                  return StringUtils.getLevenshteinDistance(entry.getKey(), input) <= 2;
-                }
-          }));
-          switch (closestMatches.size()) {
-            case 0:
-              throw new IllegalArgumentException(
-                  String.format("Class %s missing a property named '%s'.",
-                      klass, entry.getKey()));
-            case 1:
-              throw new IllegalArgumentException(
-                  String.format("Class %s missing a property named '%s'. Did you mean '%s'?",
-                      klass, entry.getKey(), Iterables.getOnlyElement(closestMatches)));
-            default:
-              throw new IllegalArgumentException(
-                  String.format("Class %s missing a property named '%s'. Did you mean one of %s?",
-                      klass, entry.getKey(), closestMatches));
-          }
-        }
-
-        Method method = propertyNamesToGetters.get(entry.getKey());
-        // Only allow empty argument values for String, String Array, and Collection.
-        Class<?> returnType = method.getReturnType();
-        JavaType type = MAPPER.getTypeFactory().constructType(method.getGenericReturnType());
-        if ("runner".equals(entry.getKey())) {
-          String runner = Iterables.getOnlyElement(entry.getValue());
-          if (SUPPORTED_PIPELINE_RUNNERS.containsKey(runner)) {
-            convertedOptions.put("runner", SUPPORTED_PIPELINE_RUNNERS.get(runner));
-          } else {
-            try {
-              Class<?> runnerClass = Class.forName(runner);
-              checkArgument(
-                  PipelineRunner.class.isAssignableFrom(runnerClass),
-                  "Class '%s' does not implement PipelineRunner. Supported pipeline runners %s",
-                  runner,
-                  Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
-              convertedOptions.put("runner", runnerClass);
-            } catch (ClassNotFoundException e) {
-              String msg =
-                  String.format(
-                      "Unknown 'runner' specified '%s', supported pipeline runners %s",
-                      runner,
-                      Sets.newTreeSet(SUPPORTED_PIPELINE_RUNNERS.keySet()));
-                throw new IllegalArgumentException(msg, e);
-            }
-          }
-        } else if ((returnType.isArray() && (SIMPLE_TYPES.contains(returnType.getComponentType())
-                || returnType.getComponentType().isEnum()))
-            || Collection.class.isAssignableFrom(returnType)) {
-          // Split any strings with ","
-          List<String> values = FluentIterable.from(entry.getValue())
-              .transformAndConcat(new Function<String, Iterable<String>>() {
-                @Override
-                public Iterable<String> apply(String input) {
-                  return Arrays.asList(input.split(","));
-                }
-          }).toList();
-
-          if (returnType.isArray() && !returnType.getComponentType().equals(String.class)) {
-            for (String value : values) {
-              Preconditions.checkArgument(!value.isEmpty(),
-                  "Empty argument value is only allowed for String, String Array, and Collection,"
-                  + " but received: " + returnType);
-            }
-          }
-          convertedOptions.put(entry.getKey(), MAPPER.convertValue(values, type));
-        } else if (SIMPLE_TYPES.contains(returnType) || returnType.isEnum()) {
-          String value = Iterables.getOnlyElement(entry.getValue());
-          Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
-              "Empty argument value is only allowed for String, String Array, and Collection,"
-               + " but received: " + returnType);
-          convertedOptions.put(entry.getKey(), MAPPER.convertValue(value, type));
-        } else {
-          String value = Iterables.getOnlyElement(entry.getValue());
-          Preconditions.checkArgument(returnType.equals(String.class) || !value.isEmpty(),
-              "Empty argument value is only allowed for String, String Array, and Collection,"
-               + " but received: " + returnType);
-          try {
-            convertedOptions.put(entry.getKey(), MAPPER.readValue(value, type));
-          } catch (IOException e) {
-            throw new IllegalArgumentException("Unable to parse JSON value " + value, e);
-          }
-        }
-      } catch (IllegalArgumentException e) {
-        if (strictParsing) {
-          throw e;
-        } else {
-          LOG.warn("Strict parsing is disabled, ignoring option '{}' with value '{}' because {}",
-              entry.getKey(), entry.getValue(), e.getMessage());
-        }
-      }
-    }
-    return convertedOptions;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
deleted file mode 100644
index 1678541..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsRegistrar.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.auto.service.AutoService;
-
-import java.util.ServiceLoader;
-
-/**
- * {@link PipelineOptions} creators have the ability to automatically have their
- * {@link PipelineOptions} registered with this SDK by creating a {@link ServiceLoader} entry
- * and a concrete implementation of this interface.
- *
- * <p>Note that automatic registration of any {@link PipelineOptions} requires users
- * conform to the limitations discussed on {@link PipelineOptionsFactory#register(Class)}.
- *
- * <p>It is optional but recommended to use one of the many build time tools such as
- * {@link AutoService} to generate the necessary META-INF files automatically.
- */
-public interface PipelineOptionsRegistrar {
-  Iterable<Class<? extends PipelineOptions>> getPipelineOptions();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
deleted file mode 100644
index b5612c4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptionsValidator.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.options.Validation.Required;
-import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.Ordering;
-import com.google.common.collect.SortedSetMultimap;
-import com.google.common.collect.TreeMultimap;
-
-import java.lang.reflect.Method;
-import java.lang.reflect.Proxy;
-import java.util.Collection;
-
-/**
- * Validates that the {@link PipelineOptions} conforms to all the {@link Validation} criteria.
- */
-public class PipelineOptionsValidator {
-  /**
-   * Validates that the passed {@link PipelineOptions} conforms to all the validation criteria from
-   * the passed in interface.
-   *
-   * <p>Note that the interface requested must conform to the validation criteria specified on
-   * {@link PipelineOptions#as(Class)}.
-   *
-   * @param klass The interface to fetch validation criteria from.
-   * @param options The {@link PipelineOptions} to validate.
-   * @return The type
-   */
-  public static <T extends PipelineOptions> T validate(Class<T> klass, PipelineOptions options) {
-    Preconditions.checkNotNull(klass);
-    Preconditions.checkNotNull(options);
-    Preconditions.checkArgument(Proxy.isProxyClass(options.getClass()));
-    Preconditions.checkArgument(Proxy.getInvocationHandler(options)
-        instanceof ProxyInvocationHandler);
-
-    // Ensure the methods for T are registered on the ProxyInvocationHandler
-    T asClassOptions = options.as(klass);
-
-    ProxyInvocationHandler handler =
-        (ProxyInvocationHandler) Proxy.getInvocationHandler(asClassOptions);
-
-    SortedSetMultimap<String, Method> requiredGroups = TreeMultimap.create(
-        Ordering.natural(), PipelineOptionsFactory.MethodNameComparator.INSTANCE);
-    for (Method method : ReflectHelpers.getClosureOfMethodsOnInterface(klass)) {
-      Required requiredAnnotation = method.getAnnotation(Validation.Required.class);
-      if (requiredAnnotation != null) {
-        if (requiredAnnotation.groups().length > 0) {
-          for (String requiredGroup : requiredAnnotation.groups()) {
-            requiredGroups.put(requiredGroup, method);
-          }
-        } else {
-          Preconditions.checkArgument(handler.invoke(asClassOptions, method, null) != null,
-              "Missing required value for [" + method + ", \"" + getDescription(method) + "\"]. ");
-        }
-      }
-    }
-
-    for (String requiredGroup : requiredGroups.keySet()) {
-      if (!verifyGroup(handler, asClassOptions, requiredGroups.get(requiredGroup))) {
-        throw new IllegalArgumentException("Missing required value for group [" + requiredGroup
-            + "]. At least one of the following properties "
-            + Collections2.transform(
-                requiredGroups.get(requiredGroup), ReflectHelpers.METHOD_FORMATTER)
-            + " required. Run with --help=" + klass.getSimpleName() + " for more information.");
-      }
-    }
-
-    return asClassOptions;
-  }
-
-  private static boolean verifyGroup(ProxyInvocationHandler handler, PipelineOptions options,
-      Collection<Method> requiredGroup) {
-    for (Method m : requiredGroup) {
-      if (handler.invoke(options, m, null) != null) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  private static String getDescription(Method method) {
-    Description description = method.getAnnotation(Description.class);
-    return description == null ? "" : description.value();
-  }
-}

[42/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
deleted file mode 100644
index 5d32a9d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/FileBasedSource.java
+++ /dev/null
@@ -1,648 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.IOChannelFactory;
-import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import com.google.common.util.concurrent.Futures;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.common.util.concurrent.ListeningExecutorService;
-import com.google.common.util.concurrent.MoreExecutors;
-
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.SeekableByteChannel;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.ListIterator;
-import java.util.NoSuchElementException;
-import java.util.concurrent.Callable;
-import java.util.concurrent.Executors;
-
-/**
- * A common base class for all file-based {@link Source}s. Extend this class to implement your own
- * file-based custom source.
- *
- * <p>A file-based {@code Source} is a {@code Source} backed by a file pattern defined as a Java
- * glob, a single file, or a offset range for a single file. See {@link OffsetBasedSource} and
- * {@link com.google.cloud.dataflow.sdk.io.range.RangeTracker} for semantics of offset ranges.
- *
- * <p>This source stores a {@code String} that is an {@link IOChannelFactory} specification for a
- * file or file pattern. There should be an {@code IOChannelFactory} defined for the file
- * specification provided. Please refer to {@link IOChannelUtils} and {@link IOChannelFactory} for
- * more information on this.
- *
- * <p>In addition to the methods left abstract from {@code BoundedSource}, subclasses must implement
- * methods to create a sub-source and a reader for a range of a single file -
- * {@link #createForSubrangeOfFile} and {@link #createSingleFileReader}. Please refer to
- * {@link XmlSource} for an example implementation of {@code FileBasedSource}.
- *
- * @param <T> Type of records represented by the source.
- */
-public abstract class FileBasedSource<T> extends OffsetBasedSource<T> {
-  private static final Logger LOG = LoggerFactory.getLogger(FileBasedSource.class);
-  private static final float FRACTION_OF_FILES_TO_STAT = 0.01f;
-
-  // Package-private for testing
-  static final int MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT = 100;
-
-  // Size of the thread pool to be used for performing file operations in parallel.
-  // Package-private for testing.
-  static final int THREAD_POOL_SIZE = 128;
-
-  private final String fileOrPatternSpec;
-  private final Mode mode;
-
-  /**
-   * A given {@code FileBasedSource} represents a file resource of one of these types.
-   */
-  public enum Mode {
-    FILEPATTERN,
-    SINGLE_FILE_OR_SUBRANGE
-  }
-
-  /**
-   * Create a {@code FileBaseSource} based on a file or a file pattern specification. This
-   * constructor must be used when creating a new {@code FileBasedSource} for a file pattern.
-   *
-   * <p>See {@link OffsetBasedSource} for a detailed description of {@code minBundleSize}.
-   *
-   * @param fileOrPatternSpec {@link IOChannelFactory} specification of file or file pattern
-   *        represented by the {@link FileBasedSource}.
-   * @param minBundleSize minimum bundle size in bytes.
-   */
-  public FileBasedSource(String fileOrPatternSpec, long minBundleSize) {
-    super(0, Long.MAX_VALUE, minBundleSize);
-    mode = Mode.FILEPATTERN;
-    this.fileOrPatternSpec = fileOrPatternSpec;
-  }
-
-  /**
-   * Create a {@code FileBasedSource} based on a single file. This constructor must be used when
-   * creating a new {@code FileBasedSource} for a subrange of a single file.
-   * Additionally, this constructor must be used to create new {@code FileBasedSource}s when
-   * subclasses implement the method {@link #createForSubrangeOfFile}.
-   *
-   * <p>See {@link OffsetBasedSource} for detailed descriptions of {@code minBundleSize},
-   * {@code startOffset}, and {@code endOffset}.
-   *
-   * @param fileName {@link IOChannelFactory} specification of the file represented by the
-   *        {@link FileBasedSource}.
-   * @param minBundleSize minimum bundle size in bytes.
-   * @param startOffset starting byte offset.
-   * @param endOffset ending byte offset. If the specified value {@code >= #getMaxEndOffset()} it
-   *        implies {@code #getMaxEndOffSet()}.
-   */
-  public FileBasedSource(String fileName, long minBundleSize,
-      long startOffset, long endOffset) {
-    super(startOffset, endOffset, minBundleSize);
-    mode = Mode.SINGLE_FILE_OR_SUBRANGE;
-    this.fileOrPatternSpec = fileName;
-  }
-
-  public final String getFileOrPatternSpec() {
-    return fileOrPatternSpec;
-  }
-
-  public final Mode getMode() {
-    return mode;
-  }
-
-  @Override
-  public final FileBasedSource<T> createSourceForSubrange(long start, long end) {
-    Preconditions.checkArgument(mode != Mode.FILEPATTERN,
-        "Cannot split a file pattern based source based on positions");
-    Preconditions.checkArgument(start >= getStartOffset(), "Start offset value " + start
-        + " of the subrange cannot be smaller than the start offset value " + getStartOffset()
-        + " of the parent source");
-    Preconditions.checkArgument(end <= getEndOffset(), "End offset value " + end
-        + " of the subrange cannot be larger than the end offset value " + getEndOffset()
-        + " of the parent source");
-
-    FileBasedSource<T> source = createForSubrangeOfFile(fileOrPatternSpec, start, end);
-    if (start > 0 || end != Long.MAX_VALUE) {
-      Preconditions.checkArgument(source.getMode() == Mode.SINGLE_FILE_OR_SUBRANGE,
-          "Source created for the range [" + start + "," + end + ")"
-          + " must be a subrange source");
-    }
-    return source;
-  }
-
-  /**
-   * Creates and returns a new {@code FileBasedSource} of the same type as the current
-   * {@code FileBasedSource} backed by a given file and an offset range. When current source is
-   * being split, this method is used to generate new sub-sources. When creating the source
-   * subclasses must call the constructor {@link #FileBasedSource(String, long, long, long)} of
-   * {@code FileBasedSource} with corresponding parameter values passed here.
-   *
-   * @param fileName file backing the new {@code FileBasedSource}.
-   * @param start starting byte offset of the new {@code FileBasedSource}.
-   * @param end ending byte offset of the new {@code FileBasedSource}. May be Long.MAX_VALUE,
-   *        in which case it will be inferred using {@link #getMaxEndOffset}.
-   */
-  protected abstract FileBasedSource<T> createForSubrangeOfFile(
-      String fileName, long start, long end);
-
-  /**
-   * Creates and returns an instance of a {@code FileBasedReader} implementation for the current
-   * source assuming the source represents a single file. File patterns will be handled by
-   * {@code FileBasedSource} implementation automatically.
-   */
-  protected abstract FileBasedReader<T> createSingleFileReader(
-      PipelineOptions options);
-
-  @Override
-  public final long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-    // This implementation of method getEstimatedSizeBytes is provided to simplify subclasses. Here
-    // we perform the size estimation of files and file patterns using the interface provided by
-    // IOChannelFactory.
-
-    IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
-    if (mode == Mode.FILEPATTERN) {
-      // TODO Implement a more efficient parallel/batch size estimation mechanism for file patterns.
-      long startTime = System.currentTimeMillis();
-      long totalSize = 0;
-      Collection<String> inputs = factory.match(fileOrPatternSpec);
-      if (inputs.size() <= MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT) {
-        totalSize = getExactTotalSizeOfFiles(inputs, factory);
-        LOG.debug("Size estimation of all files of pattern " + fileOrPatternSpec + " took "
-           + (System.currentTimeMillis() - startTime) + " ms");
-      } else {
-        totalSize = getEstimatedSizeOfFilesBySampling(inputs, factory);
-        LOG.debug("Size estimation of pattern " + fileOrPatternSpec + " by sampling took "
-           + (System.currentTimeMillis() - startTime) + " ms");
-      }
-      return totalSize;
-    } else {
-      long start = getStartOffset();
-      long end = Math.min(getEndOffset(), getMaxEndOffset(options));
-      return end - start;
-    }
-  }
-
-  // Get the exact total size of the given set of files.
-  // Invokes multiple requests for size estimation in parallel using a thread pool.
-  // TODO: replace this with bulk request API when it is available. Will require updates
-  // to IOChannelFactory interface.
-  private static long getExactTotalSizeOfFiles(
-      Collection<String> files, IOChannelFactory ioChannelFactory) throws Exception {
-    List<ListenableFuture<Long>> futures = new ArrayList<>();
-    ListeningExecutorService service =
-        MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(THREAD_POOL_SIZE));
-    long totalSize = 0;
-    try {
-      for (String file : files) {
-        futures.add(createFutureForSizeEstimation(file, ioChannelFactory, service));
-      }
-
-      for (Long val : Futures.allAsList(futures).get()) {
-        totalSize += val;
-      }
-
-      return totalSize;
-    } finally {
-      service.shutdown();
-    }
-  }
-
-  private static ListenableFuture<Long> createFutureForSizeEstimation(
-      final String file,
-      final IOChannelFactory ioChannelFactory,
-      ListeningExecutorService service) {
-    return service.submit(
-        new Callable<Long>() {
-          @Override
-          public Long call() throws Exception {
-            return ioChannelFactory.getSizeBytes(file);
-          }
-        });
-  }
-
-  // Estimate the total size of the given set of files through sampling and extrapolation.
-  // Currently we use uniform sampling which requires a linear sampling size for a reasonable
-  // estimate.
-  // TODO: Implement a more efficient sampling mechanism.
-  private static long getEstimatedSizeOfFilesBySampling(
-      Collection<String> files, IOChannelFactory ioChannelFactory) throws Exception {
-    int sampleSize = (int) (FRACTION_OF_FILES_TO_STAT * files.size());
-    sampleSize = Math.max(MAX_NUMBER_OF_FILES_FOR_AN_EXACT_STAT, sampleSize);
-
-    List<String> selectedFiles = new ArrayList<String>(files);
-    Collections.shuffle(selectedFiles);
-    selectedFiles = selectedFiles.subList(0, sampleSize);
-
-    return files.size() * getExactTotalSizeOfFiles(selectedFiles, ioChannelFactory)
-        / selectedFiles.size();
-  }
-
-  private ListenableFuture<List<? extends FileBasedSource<T>>> createFutureForFileSplit(
-      final String file,
-      final long desiredBundleSizeBytes,
-      final PipelineOptions options,
-      ListeningExecutorService service) {
-    return service.submit(new Callable<List<? extends FileBasedSource<T>>>() {
-      @Override
-      public List<? extends FileBasedSource<T>> call() throws Exception {
-        return createForSubrangeOfFile(file, 0, Long.MAX_VALUE)
-            .splitIntoBundles(desiredBundleSizeBytes, options);
-      }
-    });
-  }
-
-  @Override
-  public final List<? extends FileBasedSource<T>> splitIntoBundles(
-      long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-    // This implementation of method splitIntoBundles is provided to simplify subclasses. Here we
-    // split a FileBasedSource based on a file pattern to FileBasedSources based on full single
-    // files. For files that can be efficiently seeked, we further split FileBasedSources based on
-    // those files to FileBasedSources based on sub ranges of single files.
-
-    if (mode == Mode.FILEPATTERN) {
-      long startTime = System.currentTimeMillis();
-      List<ListenableFuture<List<? extends FileBasedSource<T>>>> futures = new ArrayList<>();
-
-      ListeningExecutorService service =
-          MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(THREAD_POOL_SIZE));
-      try {
-        for (final String file : FileBasedSource.expandFilePattern(fileOrPatternSpec)) {
-          futures.add(createFutureForFileSplit(file, desiredBundleSizeBytes, options, service));
-        }
-        List<? extends FileBasedSource<T>> splitResults =
-            ImmutableList.copyOf(Iterables.concat(Futures.allAsList(futures).get()));
-        LOG.debug(
-            "Splitting the source based on file pattern "
-                + fileOrPatternSpec
-                + " took "
-                + (System.currentTimeMillis() - startTime)
-                + " ms");
-        return splitResults;
-      } finally {
-        service.shutdown();
-      }
-    } else {
-      if (isSplittable()) {
-        List<FileBasedSource<T>> splitResults = new ArrayList<>();
-        for (OffsetBasedSource<T> split :
-            super.splitIntoBundles(desiredBundleSizeBytes, options)) {
-          splitResults.add((FileBasedSource<T>) split);
-        }
-        return splitResults;
-      } else {
-        LOG.debug("The source for file " + fileOrPatternSpec
-            + " is not split into sub-range based sources since the file is not seekable");
-        return ImmutableList.of(this);
-      }
-    }
-  }
-
-  /**
-   * Determines whether a file represented by this source is can be split into bundles.
-   *
-   * <p>By default, a file is splittable if it is on a file system that supports efficient read
-   * seeking. Subclasses may override to provide different behavior.
-   */
-  protected boolean isSplittable() throws Exception {
-    // We split a file-based source into subranges only if the file is efficiently seekable.
-    // If a file is not efficiently seekable it would be highly inefficient to create and read a
-    // source based on a subrange of that file.
-    IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
-    return factory.isReadSeekEfficient(fileOrPatternSpec);
-  }
-
-  @Override
-  public final BoundedReader<T> createReader(PipelineOptions options) throws IOException {
-    // Validate the current source prior to creating a reader for it.
-    this.validate();
-
-    if (mode == Mode.FILEPATTERN) {
-      long startTime = System.currentTimeMillis();
-      Collection<String> files = FileBasedSource.expandFilePattern(fileOrPatternSpec);
-      List<FileBasedReader<T>> fileReaders = new ArrayList<>();
-      for (String fileName : files) {
-        long endOffset;
-        try {
-          endOffset = IOChannelUtils.getFactory(fileName).getSizeBytes(fileName);
-        } catch (IOException e) {
-          LOG.warn("Failed to get size of " + fileName, e);
-          endOffset = Long.MAX_VALUE;
-        }
-        fileReaders.add(
-            createForSubrangeOfFile(fileName, 0, endOffset).createSingleFileReader(options));
-      }
-      LOG.debug("Creating a reader for file pattern " + fileOrPatternSpec + " took "
-          + (System.currentTimeMillis() - startTime) + " ms");
-      if (fileReaders.size() == 1) {
-        return fileReaders.get(0);
-      }
-      return new FilePatternReader(this, fileReaders);
-    } else {
-      return createSingleFileReader(options);
-    }
-  }
-
-  @Override
-  public String toString() {
-    switch (mode) {
-      case FILEPATTERN:
-        return fileOrPatternSpec;
-      case SINGLE_FILE_OR_SUBRANGE:
-        return fileOrPatternSpec + " range " + super.toString();
-      default:
-        throw new IllegalStateException("Unexpected mode: " + mode);
-    }
-  }
-
-  @Override
-  public void validate() {
-    super.validate();
-    switch (mode) {
-      case FILEPATTERN:
-        Preconditions.checkArgument(getStartOffset() == 0,
-            "FileBasedSource is based on a file pattern or a full single file "
-            + "but the starting offset proposed " + getStartOffset() + " is not zero");
-        Preconditions.checkArgument(getEndOffset() == Long.MAX_VALUE,
-            "FileBasedSource is based on a file pattern or a full single file "
-            + "but the ending offset proposed " + getEndOffset() + " is not Long.MAX_VALUE");
-        break;
-      case SINGLE_FILE_OR_SUBRANGE:
-        // Nothing more to validate.
-        break;
-      default:
-        throw new IllegalStateException("Unknown mode: " + mode);
-    }
-  }
-
-  @Override
-  public final long getMaxEndOffset(PipelineOptions options) throws Exception {
-    if (mode == Mode.FILEPATTERN) {
-      throw new IllegalArgumentException("Cannot determine the exact end offset of a file pattern");
-    }
-    if (getEndOffset() == Long.MAX_VALUE) {
-      IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
-      return factory.getSizeBytes(fileOrPatternSpec);
-    } else {
-      return getEndOffset();
-    }
-  }
-
-  protected static final Collection<String> expandFilePattern(String fileOrPatternSpec)
-      throws IOException {
-    IOChannelFactory factory = IOChannelUtils.getFactory(fileOrPatternSpec);
-    Collection<String> matches = factory.match(fileOrPatternSpec);
-    LOG.info("Matched {} files for pattern {}", matches.size(), fileOrPatternSpec);
-    return matches;
-  }
-
-  /**
-   * A {@link Source.Reader reader} that implements code common to readers of
-   * {@code FileBasedSource}s.
-   *
-   * <h2>Seekability</h2>
-   *
-   * <p>This reader uses a {@link ReadableByteChannel} created for the file represented by the
-   * corresponding source to efficiently move to the correct starting position defined in the
-   * source. Subclasses of this reader should implement {@link #startReading} to get access to this
-   * channel. If the source corresponding to the reader is for a subrange of a file the
-   * {@code ReadableByteChannel} provided is guaranteed to be an instance of the type
-   * {@link SeekableByteChannel}, which may be used by subclass to traverse back in the channel to
-   * determine the correct starting position.
-   *
-   * <h2>Reading Records</h2>
-   *
-   * <p>Sequential reading is implemented using {@link #readNextRecord}.
-   *
-   * <p>Then {@code FileBasedReader} implements "reading a range [A, B)" in the following way.
-   * <ol>
-   * <li>{@link #start} opens the file
-   * <li>{@link #start} seeks the {@code SeekableByteChannel} to A (reading offset ranges for
-   * non-seekable files is not supported) and calls {@code startReading()}
-   * <li>{@link #start} calls {@link #advance} once, which, via {@link #readNextRecord},
-   * locates the first record which is at a split point AND its offset is at or after A.
-   * If this record is at or after B, {@link #advance} returns false and reading is finished.
-   * <li>if the previous advance call returned {@code true} sequential reading starts and
-   * {@code advance()} will be called repeatedly
-   * </ol>
-   * {@code advance()} calls {@code readNextRecord()} on the subclass, and stops (returns false) if
-   * the new record is at a split point AND the offset of the new record is at or after B.
-   *
-   * <h2>Thread Safety</h2>
-   *
-   * <p>Since this class implements {@link Source.Reader} it guarantees thread safety. Abstract
-   * methods defined here will not be accessed by more than one thread concurrently.
-   */
-  public abstract static class FileBasedReader<T> extends OffsetBasedReader<T> {
-    private ReadableByteChannel channel = null;
-
-    /**
-     * Subclasses should not perform IO operations at the constructor. All IO operations should be
-     * delayed until the {@link #startReading} method is invoked.
-     */
-    public FileBasedReader(FileBasedSource<T> source) {
-      super(source);
-      Preconditions.checkArgument(source.getMode() != Mode.FILEPATTERN,
-          "FileBasedReader does not support reading file patterns");
-    }
-
-    @Override
-    public FileBasedSource<T> getCurrentSource() {
-      return (FileBasedSource<T>) super.getCurrentSource();
-    }
-
-    @Override
-    protected final boolean startImpl() throws IOException {
-      FileBasedSource<T> source = getCurrentSource();
-      IOChannelFactory factory = IOChannelUtils.getFactory(source.getFileOrPatternSpec());
-      this.channel = factory.open(source.getFileOrPatternSpec());
-
-      if (channel instanceof SeekableByteChannel) {
-        SeekableByteChannel seekChannel = (SeekableByteChannel) channel;
-        seekChannel.position(source.getStartOffset());
-      } else {
-        // Channel is not seekable. Must not be a subrange.
-        Preconditions.checkArgument(source.mode != Mode.SINGLE_FILE_OR_SUBRANGE,
-            "Subrange-based sources must only be defined for file types that support seekable "
-            + " read channels");
-        Preconditions.checkArgument(source.getStartOffset() == 0, "Start offset "
-            + source.getStartOffset()
-            + " is not zero but channel for reading the file is not seekable.");
-      }
-
-      startReading(channel);
-
-      // Advance once to load the first record.
-      return advanceImpl();
-    }
-
-    @Override
-    protected final boolean advanceImpl() throws IOException {
-      return readNextRecord();
-    }
-
-    /**
-     * Closes any {@link ReadableByteChannel} created for the current reader. This implementation is
-     * idempotent. Any {@code close()} method introduced by a subclass must be idempotent and must
-     * call the {@code close()} method in the {@code FileBasedReader}.
-     */
-    @Override
-    public void close() throws IOException {
-      if (channel != null) {
-        channel.close();
-      }
-    }
-
-    /**
-     * Performs any initialization of the subclass of {@code FileBasedReader} that involves IO
-     * operations. Will only be invoked once and before that invocation the base class will seek the
-     * channel to the source's starting offset.
-     *
-     * <p>Provided {@link ReadableByteChannel} is for the file represented by the source of this
-     * reader. Subclass may use the {@code channel} to build a higher level IO abstraction, e.g., a
-     * BufferedReader or an XML parser.
-     *
-     * <p>If the corresponding source is for a subrange of a file, {@code channel} is guaranteed to
-     * be an instance of the type {@link SeekableByteChannel}.
-     *
-     * <p>After this method is invoked the base class will not be reading data from the channel or
-     * adjusting the position of the channel. But the base class is responsible for properly closing
-     * the channel.
-     *
-     * @param channel a byte channel representing the file backing the reader.
-     */
-    protected abstract void startReading(ReadableByteChannel channel) throws IOException;
-
-    /**
-     * Reads the next record from the channel provided by {@link #startReading}. Methods
-     * {@link #getCurrent}, {@link #getCurrentOffset}, and {@link #isAtSplitPoint()} should return
-     * the corresponding information about the record read by the last invocation of this method.
-     *
-     * <p>Note that this method will be called the same way for reading the first record in the
-     * source (file or offset range in the file) and for reading subsequent records. It is up to the
-     * subclass to do anything special for locating and reading the first record, if necessary.
-     *
-     * @return {@code true} if a record was successfully read, {@code false} if the end of the
-     *         channel was reached before successfully reading a new record.
-     */
-    protected abstract boolean readNextRecord() throws IOException;
-  }
-
-  // An internal Reader implementation that concatenates a sequence of FileBasedReaders.
-  private class FilePatternReader extends BoundedReader<T> {
-    private final FileBasedSource<T> source;
-    private final List<FileBasedReader<T>> fileReaders;
-    final ListIterator<FileBasedReader<T>> fileReadersIterator;
-    FileBasedReader<T> currentReader = null;
-
-    public FilePatternReader(FileBasedSource<T> source, List<FileBasedReader<T>> fileReaders) {
-      this.source = source;
-      this.fileReaders = fileReaders;
-      this.fileReadersIterator = fileReaders.listIterator();
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      return startNextNonemptyReader();
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      Preconditions.checkState(currentReader != null, "Call start() before advance()");
-      if (currentReader.advance()) {
-        return true;
-      }
-      return startNextNonemptyReader();
-    }
-
-    private boolean startNextNonemptyReader() throws IOException {
-      while (fileReadersIterator.hasNext()) {
-        currentReader = fileReadersIterator.next();
-        if (currentReader.start()) {
-          return true;
-        }
-        currentReader.close();
-      }
-      return false;
-    }
-
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      // A NoSuchElement will be thrown by the last FileBasedReader if getCurrent() is called after
-      // advance() returns false.
-      return currentReader.getCurrent();
-    }
-
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      // A NoSuchElement will be thrown by the last FileBasedReader if getCurrentTimestamp()
-      // is called after advance() returns false.
-      return currentReader.getCurrentTimestamp();
-    }
-
-    @Override
-    public void close() throws IOException {
-      // Close all readers that may have not yet been closed.
-      // If this reader has not been started, currentReader is null.
-      if (currentReader != null) {
-        currentReader.close();
-      }
-      while (fileReadersIterator.hasNext()) {
-        fileReadersIterator.next().close();
-      }
-    }
-
-    @Override
-    public FileBasedSource<T> getCurrentSource() {
-      return source;
-    }
-
-    @Override
-    public FileBasedSource<T> splitAtFraction(double fraction) {
-      // Unsupported. TODO: implement.
-      LOG.debug("Dynamic splitting of FilePatternReader is unsupported.");
-      return null;
-    }
-
-    @Override
-    public Double getFractionConsumed() {
-      if (currentReader == null) {
-        return 0.0;
-      }
-      if (fileReaders.isEmpty()) {
-        return 1.0;
-      }
-      int index = fileReadersIterator.previousIndex();
-      int numReaders = fileReaders.size();
-      if (index == numReaders) {
-        return 1.0;
-      }
-      double before = 1.0 * index / numReaders;
-      double after = 1.0 * (index + 1) / numReaders;
-      Double fractionOfCurrentReader = currentReader.getFractionConsumed();
-      if (fractionOfCurrentReader == null) {
-        return before;
-      }
-      return before + fractionOfCurrentReader * (after - before);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
deleted file mode 100644
index d581b80..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/OffsetBasedSource.java
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.io.range.OffsetRangeTracker;
-import com.google.cloud.dataflow.sdk.io.range.RangeTracker;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.common.base.Preconditions;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * A {@link BoundedSource} that uses offsets to define starting and ending positions.
- *
- * <p>{@link OffsetBasedSource} is a common base class for all bounded sources where the input can
- * be represented as a single range, and an input can be efficiently processed in parallel by
- * splitting the range into a set of disjoint ranges whose union is the original range. This class
- * should be used for sources that can be cheaply read starting at any given offset.
- * {@link OffsetBasedSource} stores the range and implements splitting into bundles.
- *
- * <p>Extend {@link OffsetBasedSource} to implement your own offset-based custom source.
- * {@link FileBasedSource}, which is a subclass of this, adds additional functionality useful for
- * custom sources that are based on files. If possible implementors should start from
- * {@link FileBasedSource} instead of {@link OffsetBasedSource}.
- *
- * <p>Consult {@link RangeTracker} for important semantics common to all sources defined by a range
- * of positions of a certain type, including the semantics of split points
- * ({@link OffsetBasedReader#isAtSplitPoint}).
- *
- * @param <T> Type of records represented by the source.
- * @see BoundedSource
- * @see FileBasedSource
- * @see RangeTracker
- */
-public abstract class OffsetBasedSource<T> extends BoundedSource<T> {
-  private final long startOffset;
-  private final long endOffset;
-  private final long minBundleSize;
-
-  /**
-   * @param startOffset starting offset (inclusive) of the source. Must be non-negative.
-   *
-   * @param endOffset ending offset (exclusive) of the source. Use {@link Long#MAX_VALUE} to
-   *        indicate that the entire source after {@code startOffset} should be read. Must be
-   *        {@code > startOffset}.
-   *
-   * @param minBundleSize minimum bundle size in offset units that should be used when splitting the
-   *                      source into sub-sources. This value may not be respected if the total
-   *                      range of the source is smaller than the specified {@code minBundleSize}.
-   *                      Must be non-negative.
-   */
-  public OffsetBasedSource(long startOffset, long endOffset, long minBundleSize) {
-    this.startOffset = startOffset;
-    this.endOffset = endOffset;
-    this.minBundleSize = minBundleSize;
-  }
-
-  /**
-   * Returns the starting offset of the source.
-   */
-  public long getStartOffset() {
-    return startOffset;
-  }
-
-  /**
-   * Returns the specified ending offset of the source. Any returned value greater than or equal to
-   * {@link #getMaxEndOffset(PipelineOptions)} should be treated as
-   * {@link #getMaxEndOffset(PipelineOptions)}.
-   */
-  public long getEndOffset() {
-    return endOffset;
-  }
-
-  /**
-   * Returns the minimum bundle size that should be used when splitting the source into sub-sources.
-   * This value may not be respected if the total range of the source is smaller than the specified
-   * {@code minBundleSize}.
-   */
-  public long getMinBundleSize() {
-    return minBundleSize;
-  }
-
-  @Override
-  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-    long trueEndOffset = (endOffset == Long.MAX_VALUE) ? getMaxEndOffset(options) : endOffset;
-    return getBytesPerOffset() * (trueEndOffset - getStartOffset());
-  }
-
-  @Override
-  public List<? extends OffsetBasedSource<T>> splitIntoBundles(
-      long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-    // Split the range into bundles based on the desiredBundleSizeBytes. Final bundle is adjusted to
-    // make sure that we do not end up with a too small bundle at the end. If the desired bundle
-    // size is smaller than the minBundleSize of the source then minBundleSize will be used instead.
-
-    long desiredBundleSizeOffsetUnits = Math.max(
-        Math.max(1, desiredBundleSizeBytes / getBytesPerOffset()),
-        minBundleSize);
-
-    List<OffsetBasedSource<T>> subSources = new ArrayList<>();
-    long start = startOffset;
-    long maxEnd = Math.min(endOffset, getMaxEndOffset(options));
-
-    while (start < maxEnd) {
-      long end = start + desiredBundleSizeOffsetUnits;
-      end = Math.min(end, maxEnd);
-      // Avoid having a too small bundle at the end and ensure that we respect minBundleSize.
-      long remaining = maxEnd - end;
-      if ((remaining < desiredBundleSizeOffsetUnits / 4) || (remaining < minBundleSize)) {
-        end = maxEnd;
-      }
-      subSources.add(createSourceForSubrange(start, end));
-
-      start = end;
-    }
-    return subSources;
-  }
-
-  @Override
-  public void validate() {
-    Preconditions.checkArgument(
-        this.startOffset >= 0,
-        "Start offset has value %s, must be non-negative", this.startOffset);
-    Preconditions.checkArgument(
-        this.endOffset >= 0,
-        "End offset has value %s, must be non-negative", this.endOffset);
-    Preconditions.checkArgument(
-        this.startOffset < this.endOffset,
-        "Start offset %s must be before end offset %s",
-        this.startOffset, this.endOffset);
-    Preconditions.checkArgument(
-        this.minBundleSize >= 0,
-        "minBundleSize has value %s, must be non-negative",
-        this.minBundleSize);
-  }
-
-  @Override
-  public String toString() {
-    return "[" + startOffset + ", " + endOffset + ")";
-  }
-
-  /**
-   * Returns approximately how many bytes of data correspond to a single offset in this source.
-   * Used for translation between this source's range and methods defined in terms of bytes, such
-   * as {@link #getEstimatedSizeBytes} and {@link #splitIntoBundles}.
-   *
-   * <p>Defaults to {@code 1} byte, which is the common case for, e.g., file sources.
-   */
-  public long getBytesPerOffset() {
-    return 1L;
-  }
-
-  /**
-   * Returns the actual ending offset of the current source. The value returned by this function
-   * will be used to clip the end of the range {@code [startOffset, endOffset)} such that the
-   * range used is {@code [startOffset, min(endOffset, maxEndOffset))}.
-   *
-   * <p>As an example in which {@link OffsetBasedSource} is used to implement a file source, suppose
-   * that this source was constructed with an {@code endOffset} of {@link Long#MAX_VALUE} to
-   * indicate that a file should be read to the end. Then {@link #getMaxEndOffset} should determine
-   * the actual, exact size of the file in bytes and return it.
-   */
-  public abstract long getMaxEndOffset(PipelineOptions options) throws Exception;
-
-  /**
-   * Returns an {@link OffsetBasedSource} for a subrange of the current source. The
-   * subrange {@code [start, end)} must be within the range {@code [startOffset, endOffset)} of
-   * the current source, i.e. {@code startOffset <= start < end <= endOffset}.
-   */
-  public abstract OffsetBasedSource<T> createSourceForSubrange(long start, long end);
-
-  /**
-   * Whether this source should allow dynamic splitting of the offset ranges.
-   *
-   * <p>True by default. Override this to return false if the source cannot
-   * support dynamic splitting correctly. If this returns false,
-   * {@link OffsetBasedSource.OffsetBasedReader#splitAtFraction} will refuse all split requests.
-   */
-  public boolean allowsDynamicSplitting() {
-    return true;
-  }
-
-  /**
-   * A {@link Source.Reader} that implements code common to readers of all
-   * {@link OffsetBasedSource}s.
-   *
-   * <p>Subclasses have to implement:
-   * <ul>
-   *   <li>The methods {@link #startImpl} and {@link #advanceImpl} for reading the
-   *   first or subsequent records.
-   *   <li>The methods {@link #getCurrent}, {@link #getCurrentOffset}, and optionally
-   *   {@link #isAtSplitPoint} and {@link #getCurrentTimestamp} to access properties of
-   *   the last record successfully read by {@link #startImpl} or {@link #advanceImpl}.
-   * </ul>
-   */
-  public abstract static class OffsetBasedReader<T> extends BoundedReader<T> {
-    private static final Logger LOG = LoggerFactory.getLogger(OffsetBasedReader.class);
-
-    private OffsetBasedSource<T> source;
-
-    /** The {@link OffsetRangeTracker} managing the range and current position of the source. */
-    private final OffsetRangeTracker rangeTracker;
-
-    /**
-     * @param source the {@link OffsetBasedSource} to be read by the current reader.
-     */
-    public OffsetBasedReader(OffsetBasedSource<T> source) {
-      this.source = source;
-      this.rangeTracker = new OffsetRangeTracker(source.getStartOffset(), source.getEndOffset());
-    }
-
-    /**
-     * Returns the <i>starting</i> offset of the {@link Source.Reader#getCurrent current record},
-     * which has been read by the last successful {@link Source.Reader#start} or
-     * {@link Source.Reader#advance} call.
-     * <p>If no such call has been made yet, the return value is unspecified.
-     * <p>See {@link RangeTracker} for description of offset semantics.
-     */
-    protected abstract long getCurrentOffset() throws NoSuchElementException;
-
-    /**
-     * Returns whether the current record is at a split point (i.e., whether the current record
-     * would be the first record to be read by a source with a specified start offset of
-     * {@link #getCurrentOffset}).
-     *
-     * <p>See detailed documentation about split points in {@link RangeTracker}.
-     */
-    protected boolean isAtSplitPoint() throws NoSuchElementException {
-      return true;
-    }
-
-    @Override
-    public final boolean start() throws IOException {
-      return startImpl() && rangeTracker.tryReturnRecordAt(isAtSplitPoint(), getCurrentOffset());
-    }
-
-    @Override
-    public final boolean advance() throws IOException {
-      return advanceImpl() && rangeTracker.tryReturnRecordAt(isAtSplitPoint(), getCurrentOffset());
-    }
-
-    /**
-     * Initializes the {@link OffsetBasedSource.OffsetBasedReader} and advances to the first record,
-     * returning {@code true} if there is a record available to be read. This method will be
-     * invoked exactly once and may perform expensive setup operations that are needed to
-     * initialize the reader.
-     *
-     * <p>This function is the {@code OffsetBasedReader} implementation of
-     * {@link BoundedReader#start}. The key difference is that the implementor can ignore the
-     * possibility that it should no longer produce the first record, either because it has exceeded
-     * the original {@code endOffset} assigned to the reader, or because a concurrent call to
-     * {@link #splitAtFraction} has changed the source to shrink the offset range being read.
-     *
-     * @see BoundedReader#start
-     */
-    protected abstract boolean startImpl() throws IOException;
-
-    /**
-     * Advances to the next record and returns {@code true}, or returns false if there is no next
-     * record.
-     *
-     * <p>This function is the {@code OffsetBasedReader} implementation of
-     * {@link BoundedReader#advance}. The key difference is that the implementor can ignore the
-     * possibility that it should no longer produce the next record, either because it has exceeded
-     * the original {@code endOffset} assigned to the reader, or because a concurrent call to
-     * {@link #splitAtFraction} has changed the source to shrink the offset range being read.
-     *
-     * @see BoundedReader#advance
-     */
-    protected abstract boolean advanceImpl() throws IOException;
-
-    @Override
-    public synchronized OffsetBasedSource<T> getCurrentSource() {
-      return source;
-    }
-
-    @Override
-    public Double getFractionConsumed() {
-      return rangeTracker.getFractionConsumed();
-    }
-
-    @Override
-    public final synchronized OffsetBasedSource<T> splitAtFraction(double fraction) {
-      if (!getCurrentSource().allowsDynamicSplitting()) {
-        return null;
-      }
-      if (rangeTracker.getStopPosition() == Long.MAX_VALUE) {
-        LOG.debug(
-            "Refusing to split unbounded OffsetBasedReader {} at fraction {}",
-            rangeTracker, fraction);
-        return null;
-      }
-      long splitOffset = rangeTracker.getPositionForFractionConsumed(fraction);
-      LOG.debug(
-          "Proposing to split OffsetBasedReader {} at fraction {} (offset {})",
-          rangeTracker, fraction, splitOffset);
-      if (!rangeTracker.trySplitAtPosition(splitOffset)) {
-        return null;
-      }
-      long start = source.getStartOffset();
-      long end = source.getEndOffset();
-      OffsetBasedSource<T> primary = source.createSourceForSubrange(start, splitOffset);
-      OffsetBasedSource<T> residual = source.createSourceForSubrange(splitOffset, end);
-      this.source = primary;
-      return residual;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
deleted file mode 100644
index 653b31f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/PubsubIO.java
+++ /dev/null
@@ -1,1044 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import static com.google.common.base.MoreObjects.firstNonNull;
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.api.client.util.Clock;
-import com.google.api.client.util.DateTime;
-import com.google.api.services.pubsub.Pubsub;
-import com.google.api.services.pubsub.model.AcknowledgeRequest;
-import com.google.api.services.pubsub.model.PublishRequest;
-import com.google.api.services.pubsub.model.PubsubMessage;
-import com.google.api.services.pubsub.model.PullRequest;
-import com.google.api.services.pubsub.model.PullResponse;
-import com.google.api.services.pubsub.model.ReceivedMessage;
-import com.google.api.services.pubsub.model.Subscription;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.Transport;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableMap;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nullable;
-
-/**
- * Read and Write {@link PTransform}s for Cloud Pub/Sub streams. These transforms create
- * and consume unbounded {@link PCollection PCollections}.
- *
- * <h3>Permissions</h3>
- * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
- * Dataflow job. Please refer to the documentation of corresponding
- * {@link PipelineRunner PipelineRunners} for more details.
- */
-public class PubsubIO {
-  private static final Logger LOG = LoggerFactory.getLogger(PubsubIO.class);
-
-  /** The default {@link Coder} used to translate to/from Cloud Pub/Sub messages. */
-  public static final Coder<String> DEFAULT_PUBSUB_CODER = StringUtf8Coder.of();
-
-  /**
-   * Project IDs must contain 6-63 lowercase letters, digits, or dashes.
-   * IDs must start with a letter and may not end with a dash.
-   * This regex isn't exact - this allows for patterns that would be rejected by
-   * the service, but this is sufficient for basic parsing of table references.
-   */
-  private static final Pattern PROJECT_ID_REGEXP =
-      Pattern.compile("[a-z][-a-z0-9:.]{4,61}[a-z0-9]");
-
-  private static final Pattern SUBSCRIPTION_REGEXP =
-      Pattern.compile("projects/([^/]+)/subscriptions/(.+)");
-
-  private static final Pattern TOPIC_REGEXP = Pattern.compile("projects/([^/]+)/topics/(.+)");
-
-  private static final Pattern V1BETA1_SUBSCRIPTION_REGEXP =
-      Pattern.compile("/subscriptions/([^/]+)/(.+)");
-
-  private static final Pattern V1BETA1_TOPIC_REGEXP = Pattern.compile("/topics/([^/]+)/(.+)");
-
-  private static final Pattern PUBSUB_NAME_REGEXP = Pattern.compile("[a-zA-Z][-._~%+a-zA-Z0-9]+");
-
-  private static final int PUBSUB_NAME_MAX_LENGTH = 255;
-
-  private static final String SUBSCRIPTION_RANDOM_TEST_PREFIX = "_random/";
-  private static final String SUBSCRIPTION_STARTING_SIGNAL = "_starting_signal/";
-  private static final String TOPIC_DEV_NULL_TEST_NAME = "/topics/dev/null";
-
-  private static void validateProjectName(String project) {
-    Matcher match = PROJECT_ID_REGEXP.matcher(project);
-    if (!match.matches()) {
-      throw new IllegalArgumentException(
-          "Illegal project name specified in Pubsub subscription: " + project);
-    }
-  }
-
-  private static void validatePubsubName(String name) {
-    if (name.length() > PUBSUB_NAME_MAX_LENGTH) {
-      throw new IllegalArgumentException(
-          "Pubsub object name is longer than 255 characters: " + name);
-    }
-
-    if (name.startsWith("goog")) {
-      throw new IllegalArgumentException("Pubsub object name cannot start with goog: " + name);
-    }
-
-    Matcher match = PUBSUB_NAME_REGEXP.matcher(name);
-    if (!match.matches()) {
-      throw new IllegalArgumentException("Illegal Pubsub object name specified: " + name
-          + " Please see Javadoc for naming rules.");
-    }
-  }
-
-  /**
-   * Returns the {@link Instant} that corresponds to the timestamp in the supplied
-   * {@link PubsubMessage} under the specified {@code ink label}. See
-   * {@link PubsubIO.Read#timestampLabel(String)} for details about how these messages are
-   * parsed.
-   *
-   * <p>The {@link Clock} parameter is used to virtualize time for testing.
-   *
-   * @throws IllegalArgumentException if the timestamp label is provided, but there is no
-   *     corresponding attribute in the message or the value provided is not a valid timestamp
-   *     string.
-   * @see PubsubIO.Read#timestampLabel(String)
-   */
-  @VisibleForTesting
-  protected static Instant assignMessageTimestamp(
-      PubsubMessage message, @Nullable String label, Clock clock) {
-    if (label == null) {
-      return new Instant(clock.currentTimeMillis());
-    }
-
-    // Extract message attributes, defaulting to empty map if null.
-    Map<String, String> attributes = firstNonNull(
-        message.getAttributes(), ImmutableMap.<String, String>of());
-
-    String timestampStr = attributes.get(label);
-    checkArgument(timestampStr != null && !timestampStr.isEmpty(),
-        "PubSub message is missing a timestamp in label: %s", label);
-
-    long millisSinceEpoch;
-    try {
-      // Try parsing as milliseconds since epoch. Note there is no way to parse a string in
-      // RFC 3339 format here.
-      // Expected IllegalArgumentException if parsing fails; we use that to fall back to RFC 3339.
-      millisSinceEpoch = Long.parseLong(timestampStr);
-    } catch (IllegalArgumentException e) {
-      // Try parsing as RFC3339 string. DateTime.parseRfc3339 will throw an IllegalArgumentException
-      // if parsing fails, and the caller should handle.
-      millisSinceEpoch = DateTime.parseRfc3339(timestampStr).getValue();
-    }
-    return new Instant(millisSinceEpoch);
-  }
-
-  /**
-   * Class representing a Cloud Pub/Sub Subscription.
-   */
-  public static class PubsubSubscription implements Serializable {
-    private enum Type { NORMAL, FAKE }
-
-    private final Type type;
-    private final String project;
-    private final String subscription;
-
-    private PubsubSubscription(Type type, String project, String subscription) {
-      this.type = type;
-      this.project = project;
-      this.subscription = subscription;
-    }
-
-    /**
-     * Creates a class representing a Pub/Sub subscription from the specified subscription path.
-     *
-     * <p>Cloud Pub/Sub subscription names should be of the form
-     * {@code projects/<project>/subscriptions/<subscription>}, where {@code <project>} is the name
-     * of the project the subscription belongs to. The {@code <subscription>} component must comply
-     * with the following requirements:
-     *
-     * <ul>
-     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
-     * ('.').</li>
-     * <li>Must be between 3 and 255 characters.</li>
-     * <li>Must begin with a letter.</li>
-     * <li>Must end with a letter or a number.</li>
-     * <li>Cannot begin with {@code 'goog'} prefix.</li>
-     * </ul>
-     */
-    public static PubsubSubscription fromPath(String path) {
-      if (path.startsWith(SUBSCRIPTION_RANDOM_TEST_PREFIX)
-          || path.startsWith(SUBSCRIPTION_STARTING_SIGNAL)) {
-        return new PubsubSubscription(Type.FAKE, "", path);
-      }
-
-      String projectName, subscriptionName;
-
-      Matcher v1beta1Match = V1BETA1_SUBSCRIPTION_REGEXP.matcher(path);
-      if (v1beta1Match.matches()) {
-        LOG.warn("Saw subscription in v1beta1 format. Subscriptions should be in the format "
-            + "projects/<project_id>/subscriptions/<subscription_name>");
-        projectName = v1beta1Match.group(1);
-        subscriptionName = v1beta1Match.group(2);
-      } else {
-        Matcher match = SUBSCRIPTION_REGEXP.matcher(path);
-        if (!match.matches()) {
-          throw new IllegalArgumentException("Pubsub subscription is not in "
-              + "projects/<project_id>/subscriptions/<subscription_name> format: " + path);
-        }
-        projectName = match.group(1);
-        subscriptionName = match.group(2);
-      }
-
-      validateProjectName(projectName);
-      validatePubsubName(subscriptionName);
-      return new PubsubSubscription(Type.NORMAL, projectName, subscriptionName);
-    }
-
-    /**
-     * Returns the string representation of this subscription as a path used in the Cloud Pub/Sub
-     * v1beta1 API.
-     *
-     * @deprecated the v1beta1 API for Cloud Pub/Sub is deprecated.
-     */
-    @Deprecated
-    public String asV1Beta1Path() {
-      if (type == Type.NORMAL) {
-        return "/subscriptions/" + project + "/" + subscription;
-      } else {
-        return subscription;
-      }
-    }
-
-    /**
-     * Returns the string representation of this subscription as a path used in the Cloud Pub/Sub
-     * v1beta2 API.
-     *
-     * @deprecated the v1beta2 API for Cloud Pub/Sub is deprecated.
-     */
-    @Deprecated
-    public String asV1Beta2Path() {
-      if (type == Type.NORMAL) {
-        return "projects/" + project + "/subscriptions/" + subscription;
-      } else {
-        return subscription;
-      }
-    }
-
-    /**
-     * Returns the string representation of this subscription as a path used in the Cloud Pub/Sub
-     * API.
-     */
-    public String asPath() {
-      if (type == Type.NORMAL) {
-        return "projects/" + project + "/subscriptions/" + subscription;
-      } else {
-        return subscription;
-      }
-    }
-  }
-
-  /**
-   * Class representing a Cloud Pub/Sub Topic.
-   */
-  public static class PubsubTopic implements Serializable {
-    private enum Type { NORMAL, FAKE }
-
-    private final Type type;
-    private final String project;
-    private final String topic;
-
-    private PubsubTopic(Type type, String project, String topic) {
-      this.type = type;
-      this.project = project;
-      this.topic = topic;
-    }
-
-    /**
-     * Creates a class representing a Cloud Pub/Sub topic from the specified topic path.
-     *
-     * <p>Cloud Pub/Sub topic names should be of the form
-     * {@code /topics/<project>/<topic>}, where {@code <project>} is the name of
-     * the publishing project. The {@code <topic>} component must comply with
-     * the following requirements:
-     *
-     * <ul>
-     * <li>Can only contain lowercase letters, numbers, dashes ('-'), underscores ('_') and periods
-     * ('.').</li>
-     * <li>Must be between 3 and 255 characters.</li>
-     * <li>Must begin with a letter.</li>
-     * <li>Must end with a letter or a number.</li>
-     * <li>Cannot begin with 'goog' prefix.</li>
-     * </ul>
-     */
-    public static PubsubTopic fromPath(String path) {
-      if (path.equals(TOPIC_DEV_NULL_TEST_NAME)) {
-        return new PubsubTopic(Type.FAKE, "", path);
-      }
-
-      String projectName, topicName;
-
-      Matcher v1beta1Match = V1BETA1_TOPIC_REGEXP.matcher(path);
-      if (v1beta1Match.matches()) {
-        LOG.warn("Saw topic in v1beta1 format.  Topics should be in the format "
-            + "projects/<project_id>/topics/<topic_name>");
-        projectName = v1beta1Match.group(1);
-        topicName = v1beta1Match.group(2);
-      } else {
-        Matcher match = TOPIC_REGEXP.matcher(path);
-        if (!match.matches()) {
-          throw new IllegalArgumentException(
-              "Pubsub topic is not in projects/<project_id>/topics/<topic_name> format: " + path);
-        }
-        projectName = match.group(1);
-        topicName = match.group(2);
-      }
-
-      validateProjectName(projectName);
-      validatePubsubName(topicName);
-      return new PubsubTopic(Type.NORMAL, projectName, topicName);
-    }
-
-    /**
-     * Returns the string representation of this topic as a path used in the Cloud Pub/Sub
-     * v1beta1 API.
-     *
-     * @deprecated the v1beta1 API for Cloud Pub/Sub is deprecated.
-     */
-    @Deprecated
-    public String asV1Beta1Path() {
-      if (type == Type.NORMAL) {
-        return "/topics/" + project + "/" + topic;
-      } else {
-        return topic;
-      }
-    }
-
-    /**
-     * Returns the string representation of this topic as a path used in the Cloud Pub/Sub
-     * v1beta2 API.
-     *
-     * @deprecated the v1beta2 API for Cloud Pub/Sub is deprecated.
-     */
-    @Deprecated
-    public String asV1Beta2Path() {
-      if (type == Type.NORMAL) {
-        return "projects/" + project + "/topics/" + topic;
-      } else {
-        return topic;
-      }
-    }
-
-    /**
-     * Returns the string representation of this topic as a path used in the Cloud Pub/Sub
-     * API.
-     */
-    public String asPath() {
-      if (type == Type.NORMAL) {
-        return "projects/" + project + "/topics/" + topic;
-      } else {
-        return topic;
-      }
-    }
-  }
-
-  /**
-   * A {@link PTransform} that continuously reads from a Cloud Pub/Sub stream and
-   * returns a {@link PCollection} of {@link String Strings} containing the items from
-   * the stream.
-   *
-   * <p>When running with a {@link PipelineRunner} that only supports bounded
-   * {@link PCollection PCollections} (such as {@link DirectPipelineRunner} or
-   * {@link DataflowPipelineRunner} without {@code --streaming}), only a bounded portion of the
-   * input Pub/Sub stream can be processed. As such, either {@link Bound#maxNumRecords(int)} or
-   * {@link Bound#maxReadTime(Duration)} must be set.
-   */
-  public static class Read {
-    /**
-     * Creates and returns a transform for reading from Cloud Pub/Sub with the specified transform
-     * name.
-     */
-    public static Bound<String> named(String name) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
-    }
-
-    /**
-     * Creates and returns a transform for reading from a Cloud Pub/Sub topic. Mutually exclusive
-     * with {@link #subscription(String)}.
-     *
-     * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the format
-     * of the {@code topic} string.
-     *
-     * <p>Dataflow will start reading data published on this topic from the time the pipeline is
-     * started. Any data published on the topic before the pipeline is started will not be read by
-     * Dataflow.
-     */
-    public static Bound<String> topic(String topic) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
-    }
-
-    /**
-     * Creates and returns a transform for reading from a specific Cloud Pub/Sub subscription.
-     * Mutually exclusive with {@link #topic(String)}.
-     *
-     * <p>See {@link PubsubIO.PubsubSubscription#fromPath(String)} for more details on the format
-     * of the {@code subscription} string.
-     */
-    public static Bound<String> subscription(String subscription) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).subscription(subscription);
-    }
-
-    /**
-     * Creates and returns a transform reading from Cloud Pub/Sub where record timestamps are
-     * expected to be provided as Pub/Sub message attributes. The {@code timestampLabel}
-     * parameter specifies the name of the attribute that contains the timestamp.
-     *
-     * <p>The timestamp value is expected to be represented in the attribute as either:
-     *
-     * <ul>
-     * <li>a numerical value representing the number of milliseconds since the Unix epoch. For
-     * example, if using the Joda time classes, {@link Instant#getMillis()} returns the correct
-     * value for this attribute.
-     * <li>a String in RFC 3339 format. For example, {@code 2015-10-29T23:41:41.123Z}. The
-     * sub-second component of the timestamp is optional, and digits beyond the first three
-     * (i.e., time units smaller than milliseconds) will be ignored.
-     * </ul>
-     *
-     * <p>If {@code timestampLabel} is not provided, the system will generate record timestamps
-     * the first time it sees each record. All windowing will be done relative to these timestamps.
-     *
-     * <p>By default, windows are emitted based on an estimate of when this source is likely
-     * done producing data for a given timestamp (referred to as the Watermark; see
-     * {@link AfterWatermark} for more details). Any late data will be handled by the trigger
-     * specified with the windowing strategy &ndash; by default it will be output immediately.
-     *
-     * <p>Note that the system can guarantee that no late data will ever be seen when it assigns
-     * timestamps by arrival time (i.e. {@code timestampLabel} is not provided).
-     *
-     * @see <a href="https://www.ietf.org/rfc/rfc3339.txt">RFC 3339</a>
-     */
-    public static Bound<String> timestampLabel(String timestampLabel) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
-    }
-
-    /**
-     * Creates and returns a transform for reading from Cloud Pub/Sub where unique record
-     * identifiers are expected to be provided as Pub/Sub message attributes. The {@code idLabel}
-     * parameter specifies the attribute name. The value of the attribute can be any string
-     * that uniquely identifies this record.
-     *
-     * <p>If {@code idLabel} is not provided, Dataflow cannot guarantee that no duplicate data will
-     * be delivered on the Pub/Sub stream. In this case, deduplication of the stream will be
-     * strictly best effort.
-     */
-    public static Bound<String> idLabel(String idLabel) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
-    }
-
-    /**
-     * Creates and returns a transform for reading from Cloud Pub/Sub that uses the given
-     * {@link Coder} to decode Pub/Sub messages into a value of type {@code T}.
-     *
-     * <p>By default, uses {@link StringUtf8Coder}, which just
-     * returns the text lines as Java strings.
-     *
-     * @param <T> the type of the decoded elements, and the elements
-     * of the resulting PCollection.
-     */
-    public static <T> Bound<T> withCoder(Coder<T> coder) {
-      return new Bound<>(coder);
-    }
-
-    /**
-     * Creates and returns a transform for reading from Cloud Pub/Sub with a maximum number of
-     * records that will be read. The transform produces a <i>bounded</i> {@link PCollection}.
-     *
-     * <p>Either this option or {@link #maxReadTime(Duration)} must be set in order to create a
-     * bounded source.
-     */
-    public static Bound<String> maxNumRecords(int maxNumRecords) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).maxNumRecords(maxNumRecords);
-    }
-
-    /**
-     * Creates and returns a transform for reading from Cloud Pub/Sub with a maximum number of
-     * duration during which records will be read.  The transform produces a <i>bounded</i>
-     * {@link PCollection}.
-     *
-     * <p>Either this option or {@link #maxNumRecords(int)} must be set in order to create a bounded
-     * source.
-     */
-    public static Bound<String> maxReadTime(Duration maxReadTime) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).maxReadTime(maxReadTime);
-    }
-
-    /**
-     * A {@link PTransform} that reads from a Cloud Pub/Sub source and returns
-     * a unbounded {@link PCollection} containing the items from the stream.
-     */
-    public static class Bound<T> extends PTransform<PInput, PCollection<T>> {
-      /** The Cloud Pub/Sub topic to read from. */
-      @Nullable private final PubsubTopic topic;
-
-      /** The Cloud Pub/Sub subscription to read from. */
-      @Nullable private final PubsubSubscription subscription;
-
-      /** The name of the message attribute to read timestamps from. */
-      @Nullable private final String timestampLabel;
-
-      /** The name of the message attribute to read unique message IDs from. */
-      @Nullable private final String idLabel;
-
-      /** The coder used to decode each record. */
-      @Nullable private final Coder<T> coder;
-
-      /** Stop after reading this many records. */
-      private final int maxNumRecords;
-
-      /** Stop after reading for this much time. */
-      @Nullable private final Duration maxReadTime;
-
-      private Bound(Coder<T> coder) {
-        this(null, null, null, null, coder, null, 0, null);
-      }
-
-      private Bound(String name, PubsubSubscription subscription, PubsubTopic topic,
-          String timestampLabel, Coder<T> coder, String idLabel, int maxNumRecords,
-          Duration maxReadTime) {
-        super(name);
-        this.subscription = subscription;
-        this.topic = topic;
-        this.timestampLabel = timestampLabel;
-        this.coder = coder;
-        this.idLabel = idLabel;
-        this.maxNumRecords = maxNumRecords;
-        this.maxReadTime = maxReadTime;
-      }
-
-      /**
-       * Returns a transform that's like this one but with the given step name.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> named(String name) {
-        return new Bound<>(
-            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
-      }
-
-      /**
-       * Returns a transform that's like this one but reading from the
-       * given subscription.
-       *
-       * <p>See {@link PubsubIO.PubsubSubscription#fromPath(String)} for more details on the format
-       * of the {@code subscription} string.
-       *
-       * <p>Multiple readers reading from the same subscription will each receive
-       * some arbitrary portion of the data.  Most likely, separate readers should
-       * use their own subscriptions.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> subscription(String subscription) {
-        return new Bound<>(name, PubsubSubscription.fromPath(subscription), topic, timestampLabel,
-            coder, idLabel, maxNumRecords, maxReadTime);
-      }
-
-      /**
-       * Returns a transform that's like this one but that reads from the specified topic.
-       *
-       * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the
-       * format of the {@code topic} string.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> topic(String topic) {
-        return new Bound<>(name, subscription, PubsubTopic.fromPath(topic), timestampLabel, coder,
-            idLabel, maxNumRecords, maxReadTime);
-      }
-
-      /**
-       * Returns a transform that's like this one but that reads message timestamps
-       * from the given message attribute. See {@link PubsubIO.Read#timestampLabel(String)} for
-       * more details on the format of the timestamp attribute.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> timestampLabel(String timestampLabel) {
-        return new Bound<>(
-            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
-      }
-
-      /**
-       * Returns a transform that's like this one but that reads unique message IDs
-       * from the given message attribute. See {@link PubsubIO.Read#idLabel(String)} for more
-       * details on the format of the ID attribute.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> idLabel(String idLabel) {
-        return new Bound<>(
-            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
-      }
-
-      /**
-       * Returns a transform that's like this one but that uses the given
-       * {@link Coder} to decode each record into a value of type {@code X}.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param <X> the type of the decoded elements, and the
-       * elements of the resulting PCollection.
-       */
-      public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(
-            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
-      }
-
-      /**
-       * Returns a transform that's like this one but will only read up to the specified
-       * maximum number of records from Cloud Pub/Sub. The transform produces a <i>bounded</i>
-       * {@link PCollection}. See {@link PubsubIO.Read#maxNumRecords(int)} for more details.
-       */
-      public Bound<T> maxNumRecords(int maxNumRecords) {
-        return new Bound<>(
-            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
-      }
-
-      /**
-       * Returns a transform that's like this one but will only read during the specified
-       * duration from Cloud Pub/Sub. The transform produces a <i>bounded</i> {@link PCollection}.
-       * See {@link PubsubIO.Read#maxReadTime(Duration)} for more details.
-       */
-      public Bound<T> maxReadTime(Duration maxReadTime) {
-        return new Bound<>(
-            name, subscription, topic, timestampLabel, coder, idLabel, maxNumRecords, maxReadTime);
-      }
-
-      @Override
-      public PCollection<T> apply(PInput input) {
-        if (topic == null && subscription == null) {
-          throw new IllegalStateException("need to set either the topic or the subscription for "
-              + "a PubsubIO.Read transform");
-        }
-        if (topic != null && subscription != null) {
-          throw new IllegalStateException("Can't set both the topic and the subscription for a "
-              + "PubsubIO.Read transform");
-        }
-
-        boolean boundedOutput = getMaxNumRecords() > 0 || getMaxReadTime() != null;
-
-        if (boundedOutput) {
-          return input.getPipeline().begin()
-              .apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-              .apply(ParDo.of(new PubsubReader())).setCoder(coder);
-        } else {
-          return PCollection.<T>createPrimitiveOutputInternal(
-                  input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED)
-              .setCoder(coder);
-        }
-      }
-
-      @Override
-      protected Coder<T> getDefaultOutputCoder() {
-        return coder;
-      }
-
-      public PubsubTopic getTopic() {
-        return topic;
-      }
-
-      public PubsubSubscription getSubscription() {
-        return subscription;
-      }
-
-      public String getTimestampLabel() {
-        return timestampLabel;
-      }
-
-      public Coder<T> getCoder() {
-        return coder;
-      }
-
-      public String getIdLabel() {
-        return idLabel;
-      }
-
-      public int getMaxNumRecords() {
-        return maxNumRecords;
-      }
-
-      public Duration getMaxReadTime() {
-        return maxReadTime;
-      }
-
-      private class PubsubReader extends DoFn<Void, T> {
-        private static final int DEFAULT_PULL_SIZE = 100;
-
-        @Override
-        public void processElement(ProcessContext c) throws IOException {
-          Pubsub pubsubClient =
-              Transport.newPubsubClient(c.getPipelineOptions().as(DataflowPipelineOptions.class))
-                  .build();
-
-          String subscription;
-          if (getSubscription() == null) {
-            String topic = getTopic().asPath();
-            String[] split = topic.split("/");
-            subscription =
-                "projects/" + split[1] + "/subscriptions/" + split[3] + "_dataflow_"
-                + new Random().nextLong();
-            Subscription subInfo = new Subscription().setAckDeadlineSeconds(60).setTopic(topic);
-            try {
-              pubsubClient.projects().subscriptions().create(subscription, subInfo).execute();
-            } catch (Exception e) {
-              throw new RuntimeException("Failed to create subscription: ", e);
-            }
-          } else {
-            subscription = getSubscription().asPath();
-          }
-
-          Instant endTime = (getMaxReadTime() == null)
-              ? new Instant(Long.MAX_VALUE) : Instant.now().plus(getMaxReadTime());
-
-          List<PubsubMessage> messages = new ArrayList<>();
-
-          Throwable finallyBlockException = null;
-          try {
-            while ((getMaxNumRecords() == 0 || messages.size() < getMaxNumRecords())
-                && Instant.now().isBefore(endTime)) {
-              PullRequest pullRequest = new PullRequest().setReturnImmediately(false);
-              if (getMaxNumRecords() > 0) {
-                pullRequest.setMaxMessages(getMaxNumRecords() - messages.size());
-              } else {
-                pullRequest.setMaxMessages(DEFAULT_PULL_SIZE);
-              }
-
-              PullResponse pullResponse =
-                  pubsubClient.projects().subscriptions().pull(subscription, pullRequest).execute();
-              List<String> ackIds = new ArrayList<>();
-              if (pullResponse.getReceivedMessages() != null) {
-                for (ReceivedMessage received : pullResponse.getReceivedMessages()) {
-                  messages.add(received.getMessage());
-                  ackIds.add(received.getAckId());
-                }
-              }
-
-              if (ackIds.size() != 0) {
-                AcknowledgeRequest ackRequest = new AcknowledgeRequest().setAckIds(ackIds);
-                pubsubClient.projects()
-                    .subscriptions()
-                    .acknowledge(subscription, ackRequest)
-                    .execute();
-              }
-            }
-          } catch (IOException e) {
-            throw new RuntimeException("Unexpected exception while reading from Pubsub: ", e);
-          } finally {
-            if (getTopic() != null) {
-              try {
-                pubsubClient.projects().subscriptions().delete(subscription).execute();
-              } catch (IOException e) {
-                finallyBlockException = new RuntimeException("Failed to delete subscription: ", e);
-                LOG.error("Failed to delete subscription: ", e);
-              }
-            }
-          }
-          if (finallyBlockException != null) {
-            Throwables.propagate(finallyBlockException);
-          }
-
-          for (PubsubMessage message : messages) {
-            c.outputWithTimestamp(
-                CoderUtils.decodeFromByteArray(getCoder(), message.decodeData()),
-                assignMessageTimestamp(message, getTimestampLabel(), Clock.SYSTEM));
-          }
-        }
-      }
-    }
-
-    /** Disallow construction of utility class. */
-    private Read() {}
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /** Disallow construction of utility class. */
-  private PubsubIO() {}
-
-  /**
-   * A {@link PTransform} that continuously writes a
-   * {@link PCollection} of {@link String Strings} to a Cloud Pub/Sub stream.
-   */
-  // TODO: Support non-String encodings.
-  public static class Write {
-    /**
-     * Creates a transform that writes to Pub/Sub with the given step name.
-     */
-    public static Bound<String> named(String name) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).named(name);
-    }
-
-    /**
-     * Creates a transform that publishes to the specified topic.
-     *
-     * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the format of the
-     * {@code topic} string.
-     */
-    public static Bound<String> topic(String topic) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).topic(topic);
-    }
-
-    /**
-     * Creates a transform that writes to Pub/Sub, adds each record's timestamp to the published
-     * messages in an attribute with the specified name. The value of the attribute will be a number
-     * representing the number of milliseconds since the Unix epoch. For example, if using the Joda
-     * time classes, {@link Instant#Instant(long)} can be used to parse this value.
-     *
-     * <p>If the output from this sink is being read by another Dataflow source, then
-     * {@link PubsubIO.Read#timestampLabel(String)} can be used to ensure the other source reads
-     * these timestamps from the appropriate attribute.
-     */
-    public static Bound<String> timestampLabel(String timestampLabel) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).timestampLabel(timestampLabel);
-    }
-
-    /**
-     * Creates a transform that writes to Pub/Sub, adding each record's unique identifier to the
-     * published messages in an attribute with the specified name. The value of the attribute is an
-     * opaque string.
-     *
-     * <p>If the the output from this sink is being read by another Dataflow source, then
-     * {@link PubsubIO.Read#idLabel(String)} can be used to ensure that* the other source reads
-     * these unique identifiers from the appropriate attribute.
-     */
-    public static Bound<String> idLabel(String idLabel) {
-      return new Bound<>(DEFAULT_PUBSUB_CODER).idLabel(idLabel);
-    }
-
-    /**
-     * Creates a transform that  uses the given {@link Coder} to encode each of the
-     * elements of the input collection into an output message.
-     *
-     * <p>By default, uses {@link StringUtf8Coder}, which writes input Java strings directly as
-     * records.
-     *
-     * @param <T> the type of the elements of the input PCollection
-     */
-    public static <T> Bound<T> withCoder(Coder<T> coder) {
-      return new Bound<>(coder);
-    }
-
-    /**
-     * A {@link PTransform} that writes an unbounded {@link PCollection} of {@link String Strings}
-     * to a Cloud Pub/Sub stream.
-     */
-    public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-      /** The Cloud Pub/Sub topic to publish to. */
-      @Nullable private final PubsubTopic topic;
-      /** The name of the message attribute to publish message timestamps in. */
-      @Nullable private final String timestampLabel;
-      /** The name of the message attribute to publish unique message IDs in. */
-      @Nullable private final String idLabel;
-      private final Coder<T> coder;
-
-      private Bound(Coder<T> coder) {
-        this(null, null, null, null, coder);
-      }
-
-      private Bound(
-          String name, PubsubTopic topic, String timestampLabel, String idLabel, Coder<T> coder) {
-        super(name);
-        this.topic = topic;
-        this.timestampLabel = timestampLabel;
-        this.idLabel = idLabel;
-        this.coder = coder;
-      }
-
-      /**
-       * Returns a new transform that's like this one but with the specified step
-       * name.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> named(String name) {
-        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
-      }
-
-      /**
-       * Returns a new transform that's like this one but that writes to the specified
-       * topic.
-       *
-       * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the format of the
-       * {@code topic} string.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> topic(String topic) {
-        return new Bound<>(name, PubsubTopic.fromPath(topic), timestampLabel, idLabel, coder);
-      }
-
-      /**
-       * Returns a new transform that's like this one but that publishes record timestamps
-       * to a message attribute with the specified name. See
-       * {@link PubsubIO.Write#timestampLabel(String)} for more details.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> timestampLabel(String timestampLabel) {
-        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
-      }
-
-      /**
-       * Returns a new transform that's like this one but that publishes unique record IDs
-       * to a message attribute with the specified name. See {@link PubsubIO.Write#idLabel(String)}
-       * for more details.
-       *
-       * <p>Does not modify this object.
-       */
-      public Bound<T> idLabel(String idLabel) {
-        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
-      }
-
-      /**
-       * Returns a new transform that's like this one
-       * but that uses the given {@link Coder} to encode each of
-       * the elements of the input {@link PCollection} into an
-       * output record.
-       *
-       * <p>Does not modify this object.
-       *
-       * @param <X> the type of the elements of the input {@link PCollection}
-       */
-      public <X> Bound<X> withCoder(Coder<X> coder) {
-        return new Bound<>(name, topic, timestampLabel, idLabel, coder);
-      }
-
-      @Override
-      public PDone apply(PCollection<T> input) {
-        if (topic == null) {
-          throw new IllegalStateException("need to set the topic of a PubsubIO.Write transform");
-        }
-        input.apply(ParDo.of(new PubsubWriter()));
-        return PDone.in(input.getPipeline());
-      }
-
-      @Override
-      protected Coder<Void> getDefaultOutputCoder() {
-        return VoidCoder.of();
-      }
-
-      public PubsubTopic getTopic() {
-        return topic;
-      }
-
-      public String getTimestampLabel() {
-        return timestampLabel;
-      }
-
-      public String getIdLabel() {
-        return idLabel;
-      }
-
-      public Coder<T> getCoder() {
-        return coder;
-      }
-
-      private class PubsubWriter extends DoFn<T, Void> {
-        private static final int MAX_PUBLISH_BATCH_SIZE = 100;
-        private transient List<PubsubMessage> output;
-        private transient Pubsub pubsubClient;
-
-        @Override
-        public void startBundle(Context c) {
-          this.output = new ArrayList<>();
-          this.pubsubClient =
-              Transport.newPubsubClient(c.getPipelineOptions().as(DataflowPipelineOptions.class))
-                  .build();
-        }
-
-        @Override
-        public void processElement(ProcessContext c) throws IOException {
-          PubsubMessage message =
-              new PubsubMessage().encodeData(CoderUtils.encodeToByteArray(getCoder(), c.element()));
-          if (getTimestampLabel() != null) {
-            Map<String, String> attributes = message.getAttributes();
-            if (attributes == null) {
-              attributes = new HashMap<>();
-              message.setAttributes(attributes);
-            }
-            attributes.put(getTimestampLabel(), String.valueOf(c.timestamp().getMillis()));
-          }
-          output.add(message);
-
-          if (output.size() >= MAX_PUBLISH_BATCH_SIZE) {
-            publish();
-          }
-        }
-
-        @Override
-        public void finishBundle(Context c) throws IOException {
-          if (!output.isEmpty()) {
-            publish();
-          }
-        }
-
-        private void publish() throws IOException {
-          PublishRequest publishRequest = new PublishRequest().setMessages(output);
-          pubsubClient.projects().topics()
-              .publish(getTopic().asPath(), publishRequest)
-              .execute();
-          output.clear();
-        }
-      }
-    }
-
-    /** Disallow construction of utility class. */
-    private Write() {}
-  }
-}

[18/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
deleted file mode 100644
index aac57bc..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResult.java
+++ /dev/null
@@ -1,463 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.join;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addObject;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.common.Reiterator;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterators;
-import com.google.common.collect.PeekingIterator;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * A row result of a {@link CoGroupByKey}.  This is a tuple of {@link Iterable}s produced for
- * a given key, and these can be accessed in different ways.
- */
-public class CoGbkResult {
-  /**
-   * A map of integer union tags to a list of union objects.
-   * Note: the key and the embedded union tag are the same, so it is redundant
-   * to store it multiple times, but for now it makes encoding easier.
-   */
-  private final List<Iterable<?>> valueMap;
-
-  private final CoGbkResultSchema schema;
-
-  private static final int DEFAULT_IN_MEMORY_ELEMENT_COUNT = 10_000;
-
-  private static final Logger LOG = LoggerFactory.getLogger(CoGbkResult.class);
-
-  /**
-   * A row in the {@link PCollection} resulting from a {@link CoGroupByKey} transform.
-   * Currently, this row must fit into memory.
-   *
-   * @param schema the set of tuple tags used to refer to input tables and
-   *               result values
-   * @param taggedValues the raw results from a group-by-key
-   */
-  public CoGbkResult(
-      CoGbkResultSchema schema,
-      Iterable<RawUnionValue> taggedValues) {
-    this(schema, taggedValues, DEFAULT_IN_MEMORY_ELEMENT_COUNT);
-  }
-
-  @SuppressWarnings("unchecked")
-  public CoGbkResult(
-      CoGbkResultSchema schema,
-      Iterable<RawUnionValue> taggedValues,
-      int inMemoryElementCount) {
-    this.schema = schema;
-    valueMap = new ArrayList<>();
-    for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
-      valueMap.add(new ArrayList<>());
-    }
-
-    // Demultiplex the first imMemoryElementCount tagged union values
-    // according to their tag.
-    final Iterator<RawUnionValue> taggedIter = taggedValues.iterator();
-    int elementCount = 0;
-    while (taggedIter.hasNext()) {
-      if (elementCount++ >= inMemoryElementCount && taggedIter instanceof Reiterator) {
-        // Let the tails be lazy.
-        break;
-      }
-      RawUnionValue value = taggedIter.next();
-      // Make sure the given union tag has a corresponding tuple tag in the
-      // schema.
-      int unionTag = value.getUnionTag();
-      if (schema.size() <= unionTag) {
-        throw new IllegalStateException("union tag " + unionTag +
-            " has no corresponding tuple tag in the result schema");
-      }
-      List<Object> valueList = (List<Object>) valueMap.get(unionTag);
-      valueList.add(value.getValue());
-    }
-
-    if (taggedIter.hasNext()) {
-      // If we get here, there were more elements than we can afford to
-      // keep in memory, so we copy the re-iterable of remaining items
-      // and append filtered views to each of the sorted lists computed earlier.
-      LOG.info("CoGbkResult has more than " + inMemoryElementCount + " elements,"
-               + " reiteration (which may be slow) is required.");
-      final Reiterator<RawUnionValue> tail = (Reiterator<RawUnionValue>) taggedIter;
-      // This is a trinary-state array recording whether a given tag is present in the tail. The
-      // initial value is null (unknown) for all tags, and the first iteration through the entire
-      // list will set these values to true or false to avoid needlessly iterating if filtering
-      // against a given tag would not match anything.
-      final Boolean[] containsTag = new Boolean[schema.size()];
-      for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
-        final int unionTag0 = unionTag;
-        updateUnionTag(tail, containsTag, unionTag, unionTag0);
-      }
-    }
-  }
-
-  private <T> void updateUnionTag(
-      final Reiterator<RawUnionValue> tail, final Boolean[] containsTag,
-      int unionTag, final int unionTag0) {
-    @SuppressWarnings("unchecked")
-    final Iterable<T> head = (Iterable<T>) valueMap.get(unionTag);
-    valueMap.set(
-        unionTag,
-        new Iterable<T>() {
-          @Override
-          public Iterator<T> iterator() {
-            return Iterators.concat(
-                head.iterator(),
-                new UnionValueIterator<T>(unionTag0, tail.copy(), containsTag));
-          }
-        });
-  }
-
-  public boolean isEmpty() {
-    for (Iterable<?> tagValues : valueMap) {
-      if (tagValues.iterator().hasNext()) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Returns the schema used by this {@link CoGbkResult}.
-   */
-  public CoGbkResultSchema getSchema() {
-    return schema;
-  }
-
-  @Override
-  public String toString() {
-    return valueMap.toString();
-  }
-
-  /**
-   * Returns the values from the table represented by the given
-   * {@code TupleTag<V>} as an {@code Iterable<V>} (which may be empty if there
-   * are no results).
-   *
-   * <p>If tag was not part of the original {@link CoGroupByKey},
-   * throws an IllegalArgumentException.
-   */
-  public <V> Iterable<V> getAll(TupleTag<V> tag) {
-    int index = schema.getIndex(tag);
-    if (index < 0) {
-      throw new IllegalArgumentException("TupleTag " + tag +
-          " is not in the schema");
-    }
-    @SuppressWarnings("unchecked")
-    Iterable<V> unions = (Iterable<V>) valueMap.get(index);
-    return unions;
-  }
-
-  /**
-   * If there is a singleton value for the given tag, returns it.
-   * Otherwise, throws an IllegalArgumentException.
-   *
-   * <p>If tag was not part of the original {@link CoGroupByKey},
-   * throws an IllegalArgumentException.
-   */
-  public <V> V getOnly(TupleTag<V> tag) {
-    return innerGetOnly(tag, null, false);
-  }
-
-  /**
-   * If there is a singleton value for the given tag, returns it.  If there is
-   * no value for the given tag, returns the defaultValue.
-   *
-   * <p>If tag was not part of the original {@link CoGroupByKey},
-   * throws an IllegalArgumentException.
-   */
-  public <V> V getOnly(TupleTag<V> tag, V defaultValue) {
-    return innerGetOnly(tag, defaultValue, true);
-  }
-
-  /**
-   * A {@link Coder} for {@link CoGbkResult}s.
-   */
-  public static class CoGbkResultCoder extends StandardCoder<CoGbkResult> {
-
-    private final CoGbkResultSchema schema;
-    private final UnionCoder unionCoder;
-
-    /**
-     * Returns a {@link CoGbkResultCoder} for the given schema and {@link UnionCoder}.
-     */
-    public static CoGbkResultCoder of(
-        CoGbkResultSchema schema,
-        UnionCoder unionCoder) {
-      return new CoGbkResultCoder(schema, unionCoder);
-    }
-
-    @JsonCreator
-    public static CoGbkResultCoder of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<?>> components,
-        @JsonProperty(PropertyNames.CO_GBK_RESULT_SCHEMA) CoGbkResultSchema schema) {
-      Preconditions.checkArgument(components.size() == 1,
-          "Expecting 1 component, got " + components.size());
-      return new CoGbkResultCoder(schema, (UnionCoder) components.get(0));
-    }
-
-    private CoGbkResultCoder(
-        CoGbkResultSchema tupleTags,
-        UnionCoder unionCoder) {
-      this.schema = tupleTags;
-      this.unionCoder = unionCoder;
-    }
-
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return null;
-    }
-
-    @Override
-    public List<? extends Coder<?>> getComponents() {
-      return Arrays.<Coder<?>>asList(unionCoder);
-    }
-
-    @Override
-    public CloudObject asCloudObject() {
-      CloudObject result = super.asCloudObject();
-      addObject(result, PropertyNames.CO_GBK_RESULT_SCHEMA, schema.asCloudObject());
-      return result;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void encode(
-        CoGbkResult value,
-        OutputStream outStream,
-        Context context) throws CoderException,
-        IOException {
-      if (!schema.equals(value.getSchema())) {
-        throw new CoderException("input schema does not match coder schema");
-      }
-      for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
-        tagListCoder(unionTag).encode(value.valueMap.get(unionTag), outStream, Context.NESTED);
-      }
-    }
-
-    @Override
-    public CoGbkResult decode(
-        InputStream inStream,
-        Context context)
-        throws CoderException, IOException {
-      List<Iterable<?>> valueMap = new ArrayList<>();
-      for (int unionTag = 0; unionTag < schema.size(); unionTag++) {
-        valueMap.add(tagListCoder(unionTag).decode(inStream, Context.NESTED));
-      }
-      return new CoGbkResult(schema, valueMap);
-    }
-
-    @SuppressWarnings("rawtypes")
-    private IterableCoder tagListCoder(int unionTag) {
-      return IterableCoder.of(unionCoder.getComponents().get(unionTag));
-    }
-
-    @Override
-    public boolean equals(Object object) {
-      if (this == object) {
-        return true;
-      }
-      if (!(object instanceof CoGbkResultCoder)) {
-        return false;
-      }
-      CoGbkResultCoder other = (CoGbkResultCoder) object;
-      return schema.equals(other.schema) && unionCoder.equals(other.unionCoder);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(schema);
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "CoGbkResult requires the union coder to be deterministic", unionCoder);
-    }
-  }
-
-
-  //////////////////////////////////////////////////////////////////////////////
-  // Methods for directly constructing a CoGbkResult
-  //
-  // (for example, creating test data for a transform that consumes a
-  // CoGbkResult)
-
-  /**
-   * Returns a new CoGbkResult that contains just the given tag and given data.
-   */
-  public static <V> CoGbkResult of(TupleTag<V> tag, List<V> data) {
-    return CoGbkResult.empty().and(tag, data);
-  }
-
-  /**
-   * Returns a new {@link CoGbkResult} based on this, with the given tag and given data
-   * added to it.
-   */
-  public <V> CoGbkResult and(TupleTag<V> tag, List<V> data) {
-    if (nextTestUnionId != schema.size()) {
-      throw new IllegalArgumentException(
-          "Attempting to call and() on a CoGbkResult apparently not created by"
-          + " of().");
-    }
-    List<Iterable<?>> valueMap = new ArrayList<>(this.valueMap);
-    valueMap.add(data);
-    return new CoGbkResult(
-        new CoGbkResultSchema(schema.getTupleTagList().and(tag)), valueMap,
-        nextTestUnionId + 1);
-  }
-
-  /**
-   * Returns an empty {@link CoGbkResult}.
-   */
-  public static <V> CoGbkResult empty() {
-    return new CoGbkResult(new CoGbkResultSchema(TupleTagList.empty()),
-        new ArrayList<Iterable<?>>());
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  private int nextTestUnionId = 0;
-
-  private CoGbkResult(
-      CoGbkResultSchema schema,
-      List<Iterable<?>> valueMap,
-      int nextTestUnionId) {
-    this(schema, valueMap);
-    this.nextTestUnionId = nextTestUnionId;
-  }
-
-  private CoGbkResult(
-      CoGbkResultSchema schema,
-      List<Iterable<?>> valueMap) {
-    this.schema = schema;
-    this.valueMap = valueMap;
-  }
-
-  private <V> V innerGetOnly(
-      TupleTag<V> tag,
-      V defaultValue,
-      boolean useDefault) {
-    int index = schema.getIndex(tag);
-    if (index < 0) {
-      throw new IllegalArgumentException("TupleTag " + tag
-          + " is not in the schema");
-    }
-    @SuppressWarnings("unchecked")
-    Iterator<V> unions = (Iterator<V>) valueMap.get(index).iterator();
-    if (!unions.hasNext()) {
-      if (useDefault) {
-        return defaultValue;
-      } else {
-        throw new IllegalArgumentException("TupleTag " + tag
-            + " corresponds to an empty result, and no default was provided");
-      }
-    }
-    V value = unions.next();
-    if (unions.hasNext()) {
-      throw new IllegalArgumentException("TupleTag " + tag
-          + " corresponds to a non-singleton result");
-    }
-    return value;
-  }
-
-  /**
-   * Lazily filters and recasts an {@code Iterator<RawUnionValue>} into an
-   * {@code Iterator<V>}, where V is the type of the raw union value's contents.
-   */
-  private static class UnionValueIterator<V> implements Iterator<V> {
-
-    private final int tag;
-    private final PeekingIterator<RawUnionValue> unions;
-    private final Boolean[] containsTag;
-
-    private UnionValueIterator(int tag, Iterator<RawUnionValue> unions, Boolean[] containsTag) {
-      this.tag = tag;
-      this.unions = Iterators.peekingIterator(unions);
-      this.containsTag = containsTag;
-    }
-
-    @Override
-    public boolean hasNext() {
-      if (containsTag[tag] == Boolean.FALSE) {
-        return false;
-      }
-      advance();
-      if (unions.hasNext()) {
-        return true;
-      } else {
-        // Now that we've iterated over all the values, we can resolve all the "unknown" null
-        // values to false.
-        for (int i = 0; i < containsTag.length; i++) {
-          if (containsTag[i] == null) {
-            containsTag[i] = false;
-          }
-        }
-        return false;
-      }
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public V next() {
-      advance();
-      return (V) unions.next().getValue();
-    }
-
-    private void advance() {
-      while (unions.hasNext()) {
-        int curTag = unions.peek().getUnionTag();
-        containsTag[curTag] = true;
-        if (curTag == tag) {
-          break;
-        }
-        unions.next();
-      }
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
deleted file mode 100644
index 2860ba7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGbkResultSchema.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.join;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addList;
-
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-/**
- * A schema for the results of a {@link CoGroupByKey}.  This maintains the full
- * set of {@link TupleTag}s for the results of a {@link CoGroupByKey} and
- * facilitates mapping between {@link TupleTag}s and
- * {@link RawUnionValue} tags (which are used as secondary keys in the
- * {@link CoGroupByKey}).
- */
-public class CoGbkResultSchema implements Serializable {
-
-  private final TupleTagList tupleTagList;
-
-  @JsonCreator
-  public static CoGbkResultSchema of(
-      @JsonProperty(PropertyNames.TUPLE_TAGS) List<TupleTag<?>> tags) {
-    TupleTagList tupleTags = TupleTagList.empty();
-    for (TupleTag<?> tag : tags) {
-      tupleTags = tupleTags.and(tag);
-    }
-    return new CoGbkResultSchema(tupleTags);
-  }
-
-  /**
-   * Maps TupleTags to union tags.  This avoids needing to encode the tags
-   * themselves.
-   */
-  private final HashMap<TupleTag<?>, Integer> tagMap = new HashMap<>();
-
-  /**
-   * Builds a schema from a tuple of {@code TupleTag<?>}s.
-   */
-  public CoGbkResultSchema(TupleTagList tupleTagList) {
-    this.tupleTagList = tupleTagList;
-    int index = -1;
-    for (TupleTag<?> tag : tupleTagList.getAll()) {
-      index++;
-      tagMap.put(tag, index);
-    }
-  }
-
-  /**
-   * Returns the index for the given tuple tag, if the tag is present in this
-   * schema, -1 if it isn't.
-   */
-  public int getIndex(TupleTag<?> tag) {
-    Integer index = tagMap.get(tag);
-    return index == null ? -1 : index;
-  }
-
-  /**
-   * Returns the tuple tag at the given index.
-   */
-  public TupleTag<?> getTag(int index) {
-    return tupleTagList.get(index);
-  }
-
-  /**
-   * Returns the number of columns for this schema.
-   */
-  public int size() {
-    return tupleTagList.getAll().size();
-  }
-
-  /**
-   * Returns the TupleTagList tuple associated with this schema.
-   */
-  public TupleTagList getTupleTagList() {
-    return tupleTagList;
-  }
-
-  public CloudObject asCloudObject() {
-    CloudObject result = CloudObject.forClass(getClass());
-    List<CloudObject> serializedTags = new ArrayList<>(tupleTagList.size());
-    for (TupleTag<?> tag : tupleTagList.getAll()) {
-      serializedTags.add(tag.asCloudObject());
-    }
-    addList(result, PropertyNames.TUPLE_TAGS, serializedTags);
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == this) {
-      return true;
-    }
-    if (!(obj instanceof CoGbkResultSchema)) {
-      return false;
-    }
-    CoGbkResultSchema other = (CoGbkResultSchema) obj;
-    return tupleTagList.getAll().equals(other.tupleTagList.getAll());
-  }
-
-  @Override
-  public int hashCode() {
-    return tupleTagList.getAll().hashCode();
-  }
-
-  @Override
-  public String toString() {
-    return "CoGbkResultSchema: " + tupleTagList.getAll();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
deleted file mode 100644
index b840682..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/CoGroupByKey.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.join;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult.CoGbkResultCoder;
-import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple.TaggedKeyedPCollection;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * A {@link PTransform} that performs a {@link CoGroupByKey} on a tuple
- * of tables.  A {@link CoGroupByKey} groups results from all
- * tables by like keys into {@link CoGbkResult}s,
- * from which the results for any specific table can be accessed by the
- * {@link com.google.cloud.dataflow.sdk.values.TupleTag}
- * supplied with the initial table.
- *
- * <p>Example of performing a {@link CoGroupByKey} followed by a
- * {@link ParDo} that consumes
- * the results:
- * <pre> {@code
- * PCollection<KV<K, V1>> pt1 = ...;
- * PCollection<KV<K, V2>> pt2 = ...;
- *
- * final TupleTag<V1> t1 = new TupleTag<>();
- * final TupleTag<V2> t2 = new TupleTag<>();
- * PCollection<KV<K, CoGbkResult>> coGbkResultCollection =
- *   KeyedPCollectionTuple.of(t1, pt1)
- *                        .and(t2, pt2)
- *                        .apply(CoGroupByKey.<K>create());
- *
- * PCollection<T> finalResultCollection =
- *   coGbkResultCollection.apply(ParDo.of(
- *     new DoFn<KV<K, CoGbkResult>, T>() {
- *       @Override
- *       public void processElement(ProcessContext c) {
- *         KV<K, CoGbkResult> e = c.element();
- *         Iterable<V1> pt1Vals = e.getValue().getAll(t1);
- *         V2 pt2Val = e.getValue().getOnly(t2);
- *          ... Do Something ....
- *         c.output(...some T...);
- *       }
- *     }));
- * } </pre>
- *
- * @param <K> the type of the keys in the input and output
- * {@code PCollection}s
- */
-public class CoGroupByKey<K> extends
-    PTransform<KeyedPCollectionTuple<K>,
-               PCollection<KV<K, CoGbkResult>>> {
-  /**
-   * Returns a {@code CoGroupByKey<K>} {@code PTransform}.
-   *
-   * @param <K> the type of the keys in the input and output
-   * {@code PCollection}s
-   */
-  public static <K> CoGroupByKey<K> create() {
-    return new CoGroupByKey<>();
-  }
-
-  private CoGroupByKey() { }
-
-  @Override
-  public PCollection<KV<K, CoGbkResult>> apply(
-      KeyedPCollectionTuple<K> input) {
-    if (input.isEmpty()) {
-      throw new IllegalArgumentException(
-          "must have at least one input to a KeyedPCollections");
-    }
-
-    // First build the union coder.
-    // TODO: Look at better integration of union types with the
-    // schema specified in the input.
-    List<Coder<?>> codersList = new ArrayList<>();
-    for (TaggedKeyedPCollection<K, ?> entry : input.getKeyedCollections()) {
-      codersList.add(getValueCoder(entry.pCollection));
-    }
-    UnionCoder unionCoder = UnionCoder.of(codersList);
-    Coder<K> keyCoder = input.getKeyCoder();
-    KvCoder<K, RawUnionValue> kVCoder =
-        KvCoder.of(keyCoder, unionCoder);
-
-    PCollectionList<KV<K, RawUnionValue>> unionTables =
-        PCollectionList.empty(input.getPipeline());
-
-    // TODO: Use the schema to order the indices rather than depending
-    // on the fact that the schema ordering is identical to the ordering from
-    // input.getJoinCollections().
-    int index = -1;
-    for (TaggedKeyedPCollection<K, ?> entry : input.getKeyedCollections()) {
-      index++;
-      PCollection<KV<K, RawUnionValue>> unionTable =
-          makeUnionTable(index, entry.pCollection, kVCoder);
-      unionTables = unionTables.and(unionTable);
-    }
-
-    PCollection<KV<K, RawUnionValue>> flattenedTable =
-        unionTables.apply(Flatten.<KV<K, RawUnionValue>>pCollections());
-
-    PCollection<KV<K, Iterable<RawUnionValue>>> groupedTable =
-        flattenedTable.apply(GroupByKey.<K, RawUnionValue>create());
-
-    CoGbkResultSchema tupleTags = input.getCoGbkResultSchema();
-    PCollection<KV<K, CoGbkResult>> result = groupedTable.apply(
-        ParDo.of(new ConstructCoGbkResultFn<K>(tupleTags))
-          .named("ConstructCoGbkResultFn"));
-    result.setCoder(KvCoder.of(keyCoder,
-        CoGbkResultCoder.of(tupleTags, unionCoder)));
-
-    return result;
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns the value coder for the given PCollection.  Assumes that the value
-   * coder is an instance of {@code KvCoder<K, V>}.
-   */
-  private <V> Coder<V> getValueCoder(PCollection<KV<K, V>> pCollection) {
-    // Assumes that the PCollection uses a KvCoder.
-    Coder<?> entryCoder = pCollection.getCoder();
-    if (!(entryCoder instanceof KvCoder<?, ?>)) {
-      throw new IllegalArgumentException("PCollection does not use a KvCoder");
-    }
-    @SuppressWarnings("unchecked")
-    KvCoder<K, V> coder = (KvCoder<K, V>) entryCoder;
-    return coder.getValueCoder();
-  }
-
-  /**
-   * Returns a UnionTable for the given input PCollection, using the given
-   * union index and the given unionTableEncoder.
-   */
-  private <V> PCollection<KV<K, RawUnionValue>> makeUnionTable(
-      final int index,
-      PCollection<KV<K, V>> pCollection,
-      KvCoder<K, RawUnionValue> unionTableEncoder) {
-
-    return pCollection.apply(ParDo.of(
-        new ConstructUnionTableFn<K, V>(index)).named("MakeUnionTable" + index))
-                                               .setCoder(unionTableEncoder);
-  }
-
-  /**
-   * A DoFn to construct a UnionTable (i.e., a
-   * {@code PCollection<KV<K, RawUnionValue>>} from a
-   * {@code PCollection<KV<K, V>>}.
-   */
-  private static class ConstructUnionTableFn<K, V> extends
-      DoFn<KV<K, V>, KV<K, RawUnionValue>> {
-
-    private final int index;
-
-    public ConstructUnionTableFn(int index) {
-      this.index = index;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<K, ?> e = c.element();
-      c.output(KV.of(e.getKey(), new RawUnionValue(index, e.getValue())));
-    }
-  }
-
-  /**
-   * A DoFn to construct a CoGbkResult from an input grouped union
-   * table.
-    */
-  private static class ConstructCoGbkResultFn<K>
-    extends DoFn<KV<K, Iterable<RawUnionValue>>,
-                 KV<K, CoGbkResult>> {
-
-    private final CoGbkResultSchema schema;
-
-    public ConstructCoGbkResultFn(CoGbkResultSchema schema) {
-      this.schema = schema;
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      KV<K, Iterable<RawUnionValue>> e = c.element();
-      c.output(KV.of(e.getKey(), new CoGbkResult(schema, e.getValue())));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
deleted file mode 100644
index abfbe08..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/KeyedPCollectionTuple.java
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.join;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.PValue;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * An immutable tuple of keyed {@link PCollection PCollections}
- * with key type K.
- * ({@link PCollection PCollections} containing values of type
- * {@code KV<K, ?>})
- *
- * @param <K> the type of key shared by all constituent PCollections
- */
-public class KeyedPCollectionTuple<K> implements PInput {
-  /**
-   * Returns an empty {@code KeyedPCollectionTuple<K>} on the given pipeline.
-   */
-  public static <K> KeyedPCollectionTuple<K> empty(Pipeline pipeline) {
-    return new KeyedPCollectionTuple<>(pipeline);
-  }
-
-  /**
-   * Returns a new {@code KeyedPCollectionTuple<K>} with the given tag and initial
-   * PCollection.
-   */
-  public static <K, InputT> KeyedPCollectionTuple<K> of(
-      TupleTag<InputT> tag,
-      PCollection<KV<K, InputT>> pc) {
-    return new KeyedPCollectionTuple<K>(pc.getPipeline()).and(tag, pc);
-  }
-
-  /**
-   * Returns a new {@code KeyedPCollectionTuple<K>} that is the same as this,
-   * appended with the given PCollection.
-   */
-  public <V> KeyedPCollectionTuple<K> and(
-      TupleTag< V> tag,
-      PCollection<KV<K, V>> pc) {
-    if (pc.getPipeline() != getPipeline()) {
-      throw new IllegalArgumentException(
-          "PCollections come from different Pipelines");
-    }
-    TaggedKeyedPCollection<K, ?> wrapper =
-        new TaggedKeyedPCollection<>(tag, pc);
-    Coder<K> myKeyCoder = keyCoder == null ? getKeyCoder(pc) : keyCoder;
-    List<TaggedKeyedPCollection<K, ?>>
-      newKeyedCollections =
-        copyAddLast(
-            keyedCollections,
-            wrapper);
-    return new KeyedPCollectionTuple<>(
-        getPipeline(),
-        newKeyedCollections,
-        schema.getTupleTagList().and(tag),
-        myKeyCoder);
-  }
-
-  public boolean isEmpty() {
-    return keyedCollections.isEmpty();
-  }
-
-  /**
-   * Returns a list of {@link TaggedKeyedPCollection TaggedKeyedPCollections} for the
-   * {@link PCollection PCollections} contained in this {@link KeyedPCollectionTuple}.
-   */
-  public List<TaggedKeyedPCollection<K, ?>> getKeyedCollections() {
-    return keyedCollections;
-  }
-
-  /**
-   * Like {@link #apply(String, PTransform)} but defaulting to the name
-   * provided by the {@link PTransform}.
-   */
-  public <OutputT extends POutput> OutputT apply(
-      PTransform<KeyedPCollectionTuple<K>, OutputT> transform) {
-    return Pipeline.applyTransform(this, transform);
-  }
-
-  /**
-   * Applies the given {@link PTransform} to this input {@code KeyedPCollectionTuple} and returns
-   * its {@code OutputT}. This uses {@code name} to identify the specific application of
-   * the transform. This name is used in various places, including the monitoring UI,
-   * logging, and to stably identify this application node in the job graph.
-   */
-  public <OutputT extends POutput> OutputT apply(
-      String name, PTransform<KeyedPCollectionTuple<K>, OutputT> transform) {
-    return Pipeline.applyTransform(name, this, transform);
-  }
-
-  /**
-   * Expands the component {@link PCollection PCollections}, stripping off
-   * any tag-specific information.
-   */
-  @Override
-  public Collection<? extends PValue> expand() {
-    List<PCollection<?>> retval = new ArrayList<>();
-    for (TaggedKeyedPCollection<K, ?> taggedPCollection : keyedCollections) {
-      retval.add(taggedPCollection.pCollection);
-    }
-    return retval;
-  }
-
-  /**
-   * Returns the key {@link Coder} for all {@link PCollection PCollections}
-   * in this {@link KeyedPCollectionTuple}.
-   */
-  public Coder<K> getKeyCoder() {
-    if (keyCoder == null) {
-      throw new IllegalStateException("cannot return null keyCoder");
-    }
-    return keyCoder;
-  }
-
-  /**
-   * Returns the {@link CoGbkResultSchema} associated with this
-   * {@link KeyedPCollectionTuple}.
-   */
-  public CoGbkResultSchema getCoGbkResultSchema() {
-    return schema;
-  }
-
-  @Override
-  public Pipeline getPipeline() {
-    return pipeline;
-  }
-
-  @Override
-  public void finishSpecifying() {
-    for (TaggedKeyedPCollection<K, ?> taggedPCollection : keyedCollections) {
-      taggedPCollection.pCollection.finishSpecifying();
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A utility class to help ensure coherence of tag and input PCollection
-   * types.
-   */
-  public static class TaggedKeyedPCollection<K, V> {
-
-    final TupleTag<V> tupleTag;
-    final PCollection<KV<K, V>> pCollection;
-
-    public TaggedKeyedPCollection(
-        TupleTag<V> tupleTag,
-        PCollection<KV<K, V>> pCollection) {
-      this.tupleTag = tupleTag;
-      this.pCollection = pCollection;
-    }
-
-    /**
-     * Returns the underlying PCollection of this TaggedKeyedPCollection.
-     */
-    public PCollection<KV<K, V>> getCollection() {
-      return pCollection;
-    }
-
-    /**
-     * Returns the TupleTag of this TaggedKeyedPCollection.
-     */
-    public TupleTag<V> getTupleTag() {
-      return tupleTag;
-    }
-  }
-
-  /**
-   * We use a List to properly track the order in which collections are added.
-   */
-  private final List<TaggedKeyedPCollection<K, ?>> keyedCollections;
-
-  private final Coder<K> keyCoder;
-
-  private final CoGbkResultSchema schema;
-
-  private final Pipeline pipeline;
-
-  KeyedPCollectionTuple(Pipeline pipeline) {
-    this(pipeline,
-         new ArrayList<TaggedKeyedPCollection<K, ?>>(),
-         TupleTagList.empty(),
-         null);
-  }
-
-  KeyedPCollectionTuple(
-      Pipeline pipeline,
-      List<TaggedKeyedPCollection<K, ?>> keyedCollections,
-      TupleTagList tupleTagList,
-      Coder<K> keyCoder) {
-    this.pipeline = pipeline;
-    this.keyedCollections = keyedCollections;
-    this.schema = new CoGbkResultSchema(tupleTagList);
-    this.keyCoder = keyCoder;
-  }
-
-  private static <K, V> Coder<K> getKeyCoder(PCollection<KV<K, V>> pc) {
-    // Need to run coder inference on this PCollection before inspecting it.
-    pc.finishSpecifying();
-
-    // Assumes that the PCollection uses a KvCoder.
-    Coder<?> entryCoder = pc.getCoder();
-    if (!(entryCoder instanceof KvCoder<?, ?>)) {
-      throw new IllegalArgumentException("PCollection does not use a KvCoder");
-    }
-    @SuppressWarnings("unchecked")
-    KvCoder<K, V> coder = (KvCoder<K, V>) entryCoder;
-    return coder.getKeyCoder();
-  }
-
-  private static <K> List<TaggedKeyedPCollection<K, ?>> copyAddLast(
-        List<TaggedKeyedPCollection<K, ?>> keyedCollections,
-        TaggedKeyedPCollection<K, ?> taggedCollection) {
-    List<TaggedKeyedPCollection<K, ?>> retval =
-        new ArrayList<>(keyedCollections);
-    retval.add(taggedCollection);
-    return retval;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
deleted file mode 100644
index 514853e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/RawUnionValue.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.join;
-
-// TODO: Think about making this a complete dynamic union by adding
-// a schema.  Type would then be defined by the corresponding schema entry.
-
-/**
- * This corresponds to an integer union tag and value.  The mapping of
- * union tag to type must come from elsewhere.
- */
-public class RawUnionValue {
-  private final int unionTag;
-  private final Object value;
-
-  /**
-   * Constructs a partial union from the given union tag and value.
-   */
-  public RawUnionValue(int unionTag, Object value) {
-    this.unionTag = unionTag;
-    this.value = value;
-  }
-
-  public int getUnionTag() {
-    return unionTag;
-  }
-
-  public Object getValue() {
-    return value;
-  }
-
-  @Override
-  public String toString() {
-    return unionTag + ":" + value;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
deleted file mode 100644
index 2f1c2be..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/UnionCoder.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.join;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.VarInt;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.List;
-
-/**
- * A UnionCoder encodes RawUnionValues.
- */
-class UnionCoder extends StandardCoder<RawUnionValue> {
-  // TODO: Think about how to integrate this with a schema object (i.e.
-  // a tuple of tuple tags).
-  /**
-   * Builds a union coder with the given list of element coders.  This list
-   * corresponds to a mapping of union tag to Coder.  Union tags start at 0.
-   */
-  public static UnionCoder of(List<Coder<?>> elementCoders) {
-    return new UnionCoder(elementCoders);
-  }
-
-  @JsonCreator
-  public static UnionCoder jsonOf(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> elements) {
-    return UnionCoder.of(elements);
-  }
-
-  private int getIndexForEncoding(RawUnionValue union) {
-    if (union == null) {
-      throw new IllegalArgumentException("cannot encode a null tagged union");
-    }
-    int index = union.getUnionTag();
-    if (index < 0 || index >= elementCoders.size()) {
-      throw new IllegalArgumentException(
-          "union value index " + index + " not in range [0.." +
-          (elementCoders.size() - 1) + "]");
-    }
-    return index;
-  }
-
-  @SuppressWarnings("unchecked")
-  @Override
-  public void encode(
-      RawUnionValue union,
-      OutputStream outStream,
-      Context context)
-      throws IOException, CoderException  {
-    int index = getIndexForEncoding(union);
-    // Write out the union tag.
-    VarInt.encode(index, outStream);
-
-    // Write out the actual value.
-    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
-    coder.encode(
-        union.getValue(),
-        outStream,
-        context);
-  }
-
-  @Override
-  public RawUnionValue decode(InputStream inStream, Context context)
-      throws IOException, CoderException {
-    int index = VarInt.decodeInt(inStream);
-    Object value = elementCoders.get(index).decode(inStream, context);
-    return new RawUnionValue(index, value);
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return null;
-  }
-
-  @Override
-  public List<? extends Coder<?>> getComponents() {
-    return elementCoders;
-  }
-
-  /**
-   * Since this coder uses elementCoders.get(index) and coders that are known to run in constant
-   * time, we defer the return value to that coder.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(RawUnionValue union, Context context) {
-    int index = getIndexForEncoding(union);
-    @SuppressWarnings("unchecked")
-    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
-    return coder.isRegisterByteSizeObserverCheap(union.getValue(), context);
-  }
-
-  /**
-   * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder.
-   */
-  @Override
-  public void registerByteSizeObserver(
-      RawUnionValue union, ElementByteSizeObserver observer, Context context)
-      throws Exception {
-    int index = getIndexForEncoding(union);
-    // Write out the union tag.
-    observer.update(VarInt.getLength(index));
-    // Write out the actual value.
-    @SuppressWarnings("unchecked")
-    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
-    coder.registerByteSizeObserver(union.getValue(), observer, context);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private final List<Coder<?>> elementCoders;
-
-  private UnionCoder(List<Coder<?>> elementCoders) {
-    this.elementCoders = elementCoders;
-  }
-
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    verifyDeterministic(
-        "UnionCoder is only deterministic if all element coders are",
-        elementCoders);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
deleted file mode 100644
index be8bffa..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/join/package-info.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines the {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey} transform
- * for joining multiple PCollections.
- */
-package com.google.cloud.dataflow.sdk.transforms.join;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
deleted file mode 100644
index 3c041f6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/package-info.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines {@link com.google.cloud.dataflow.sdk.transforms.PTransform}s for transforming
- * data in a pipeline.
- *
- * <p>A {@link com.google.cloud.dataflow.sdk.transforms.PTransform} is an operation that takes an
- * {@code InputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.PInput})
- * and produces an
- * {@code OutputT} (some subtype of {@link com.google.cloud.dataflow.sdk.values.POutput}).
- *
- * <p>Common PTransforms include root PTransforms like
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Read} and
- * {@link com.google.cloud.dataflow.sdk.transforms.Create}, processing and
- * conversion operations like {@link com.google.cloud.dataflow.sdk.transforms.ParDo},
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey},
- * {@link com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey},
- * {@link com.google.cloud.dataflow.sdk.transforms.Combine}, and
- * {@link com.google.cloud.dataflow.sdk.transforms.Count}, and outputting
- * PTransforms like
- * {@link com.google.cloud.dataflow.sdk.io.TextIO.Write}.
- *
- * <p>New PTransforms can be created by composing existing PTransforms.
- * Most PTransforms in this package are composites, and users can also create composite PTransforms
- * for their own application-specific logic.
- *
- */
-package com.google.cloud.dataflow.sdk.transforms;
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
deleted file mode 100644
index bb43010..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterAll.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Instant;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Create a {@link Trigger} that fires and finishes once after all of its sub-triggers have fired.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code Trigger}
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public class AfterAll<W extends BoundedWindow> extends OnceTrigger<W> {
-
-  private AfterAll(List<Trigger<W>> subTriggers) {
-    super(subTriggers);
-    Preconditions.checkArgument(subTriggers.size() > 1);
-  }
-
-  /**
-   * Returns an {@code AfterAll} {@code Trigger} with the given subtriggers.
-   */
-  @SafeVarargs
-  public static <W extends BoundedWindow> OnceTrigger<W> of(
-      OnceTrigger<W>... triggers) {
-    return new AfterAll<W>(Arrays.<Trigger<W>>asList(triggers));
-  }
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.trigger().unfinishedSubTriggers()) {
-      // Since subTriggers are all OnceTriggers, they must either CONTINUE or FIRE_AND_FINISH.
-      // invokeElement will automatically mark the finish bit if they return FIRE_AND_FINISH.
-      subTrigger.invokeOnElement(c);
-    }
-  }
-
-  @Override
-  public void onMerge(OnMergeContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      subTrigger.invokeOnMerge(c);
-    }
-    boolean allFinished = true;
-    for (ExecutableTrigger<W> subTrigger1 : c.trigger().subTriggers()) {
-      allFinished &= c.forTrigger(subTrigger1).trigger().isFinished();
-    }
-    c.trigger().setFinished(allFinished);
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    // This trigger will fire after the latest of its sub-triggers.
-    Instant deadline = BoundedWindow.TIMESTAMP_MIN_VALUE;
-    for (Trigger<W> subTrigger : subTriggers) {
-      Instant subDeadline = subTrigger.getWatermarkThatGuaranteesFiring(window);
-      if (deadline.isBefore(subDeadline)) {
-        deadline = subDeadline;
-      }
-    }
-    return deadline;
-  }
-
-  @Override
-  public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return new AfterAll<W>(continuationTriggers);
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @return {@code true} if all subtriggers return {@code true}.
-   */
-  @Override
-  public boolean shouldFire(TriggerContext context) throws Exception {
-    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
-      if (!context.forTrigger(subtrigger).trigger().isFinished()
-          && !subtrigger.invokeShouldFire(context)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  /**
-   * Invokes {@link #onFire} for all subtriggers, eliding redundant calls to {@link #shouldFire}
-   * because they all must be ready to fire.
-   */
-  @Override
-  public void onOnlyFiring(TriggerContext context) throws Exception {
-    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
-      subtrigger.invokeOnFire(context);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
deleted file mode 100644
index 71968e9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterDelayFromFirstElement.java
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.transforms.Combine;
-import com.google.cloud.dataflow.sdk.transforms.Min;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
-import com.google.cloud.dataflow.sdk.util.state.CombiningState;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateMerging;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.common.collect.ImmutableList;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.util.List;
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * A base class for triggers that happen after a processing time delay from the arrival
- * of the first element in a pane.
- *
- * <p>This class is for internal use only and may change at any time.
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public abstract class AfterDelayFromFirstElement<W extends BoundedWindow> extends OnceTrigger<W> {
-
-  protected static final List<SerializableFunction<Instant, Instant>> IDENTITY =
-      ImmutableList.<SerializableFunction<Instant, Instant>>of();
-
-  protected static final StateTag<Object, AccumulatorCombiningState<Instant,
-                                              Combine.Holder<Instant>, Instant>> DELAYED_UNTIL_TAG =
-      StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
-          "delayed", InstantCoder.of(), Min.MinFn.<Instant>naturalOrder()));
-
-  /**
-   * To complete an implementation, return the desired time from the TriggerContext.
-   */
-  @Nullable
-  public abstract Instant getCurrentTime(Trigger<W>.TriggerContext context);
-
-  /**
-   * To complete an implementation, return a new instance like this one, but incorporating
-   * the provided timestamp mapping functions. Generally should be used by calling the
-   * constructor of this class from the constructor of the subclass.
-   */
-  protected abstract AfterDelayFromFirstElement<W> newWith(
-      List<SerializableFunction<Instant, Instant>> transform);
-
-  /**
-   * A list of timestampMappers m1, m2, m3, ... m_n considered to be composed in sequence. The
-   * overall mapping for an instance `instance` is `m_n(... m3(m2(m1(instant))`,
-   * implemented via #computeTargetTimestamp
-   */
-  protected final List<SerializableFunction<Instant, Instant>> timestampMappers;
-
-  private final TimeDomain timeDomain;
-
-  public AfterDelayFromFirstElement(
-      TimeDomain timeDomain,
-      List<SerializableFunction<Instant, Instant>> timestampMappers) {
-    super(null);
-    this.timestampMappers = timestampMappers;
-    this.timeDomain = timeDomain;
-  }
-
-  private Instant getTargetTimestamp(OnElementContext c) {
-    return computeTargetTimestamp(c.currentProcessingTime());
-  }
-
-  /**
-   * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
-   * than the timestamp.
-   *
-   * <p>TODO: Consider sharing this with FixedWindows, and bring over the equivalent of
-   * CalendarWindows.
-   */
-  public AfterDelayFromFirstElement<W> alignedTo(final Duration size, final Instant offset) {
-    return newWith(new AlignFn(size, offset));
-  }
-
-  /**
-   * Aligns the time to be the smallest multiple of {@code size} greater than the timestamp
-   * since the epoch.
-   */
-  public AfterDelayFromFirstElement<W> alignedTo(final Duration size) {
-    return alignedTo(size, new Instant(0));
-  }
-
-  /**
-   * Adds some delay to the original target time.
-   *
-   * @param delay the delay to add
-   * @return An updated time trigger that will wait the additional time before firing.
-   */
-  public AfterDelayFromFirstElement<W> plusDelayOf(final Duration delay) {
-    return newWith(new DelayFn(delay));
-  }
-
-  /**
-   * @deprecated This will be removed in the next major version. Please use only
-   *             {@link #plusDelayOf} and {@link #alignedTo}.
-   */
-  @Deprecated
-  public OnceTrigger<W> mappedTo(SerializableFunction<Instant, Instant> timestampMapper) {
-    return newWith(timestampMapper);
-  }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    if (!getClass().equals(other.getClass())) {
-      return false;
-    }
-
-    AfterDelayFromFirstElement<?> that = (AfterDelayFromFirstElement<?>) other;
-    return this.timestampMappers.equals(that.timestampMappers);
-  }
-
-
-  private AfterDelayFromFirstElement<W> newWith(
-      SerializableFunction<Instant, Instant> timestampMapper) {
-    return newWith(
-        ImmutableList.<SerializableFunction<Instant, Instant>>builder()
-            .addAll(timestampMappers)
-            .add(timestampMapper)
-            .build());
-  }
-
-  @Override
-  public void prefetchOnElement(StateAccessor<?> state) {
-    state.access(DELAYED_UNTIL_TAG).readLater();
-  }
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    CombiningState<Instant, Instant> delayUntilState = c.state().access(DELAYED_UNTIL_TAG);
-    Instant oldDelayUntil = delayUntilState.read();
-
-    // Since processing time can only advance, resulting in target wake-up times we would
-    // ignore anyhow, we don't bother with it if it is already set.
-    if (oldDelayUntil != null) {
-      return;
-    }
-
-    Instant targetTimestamp = getTargetTimestamp(c);
-    delayUntilState.add(targetTimestamp);
-    c.setTimer(targetTimestamp, timeDomain);
-  }
-
-  @Override
-  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
-    super.prefetchOnMerge(state);
-    StateMerging.prefetchCombiningValues(state, DELAYED_UNTIL_TAG);
-  }
-
-  @Override
-  public void onMerge(OnMergeContext c) throws Exception {
-    // NOTE: We could try to delete all timers which are still active, but we would
-    // need access to a timer context for each merging window.
-    // for (CombiningValueStateInternal<Instant, Combine.Holder<Instant>, Instant> state :
-    //    c.state().accessInEachMergingWindow(DELAYED_UNTIL_TAG).values()) {
-    //   Instant timestamp = state.get().read();
-    //   if (timestamp != null) {
-    //     <context for merging window>.deleteTimer(timestamp, timeDomain);
-    //   }
-    // }
-    // Instead let them fire and be ignored.
-
-    // If the trigger is already finished, there is no way it will become re-activated
-    if (c.trigger().isFinished()) {
-      StateMerging.clear(c.state(), DELAYED_UNTIL_TAG);
-      // NOTE: We do not attempt to delete  the timers.
-      return;
-    }
-
-    // Determine the earliest point across all the windows, and delay to that.
-    StateMerging.mergeCombiningValues(c.state(), DELAYED_UNTIL_TAG);
-
-    Instant earliestTargetTime = c.state().access(DELAYED_UNTIL_TAG).read();
-    if (earliestTargetTime != null) {
-      c.setTimer(earliestTargetTime, timeDomain);
-    }
-  }
-
-  @Override
-  public void prefetchShouldFire(StateAccessor<?> state) {
-    state.access(DELAYED_UNTIL_TAG).readLater();
-  }
-
-  @Override
-  public void clear(TriggerContext c) throws Exception {
-    c.state().access(DELAYED_UNTIL_TAG).clear();
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    return BoundedWindow.TIMESTAMP_MAX_VALUE;
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    Instant delayedUntil = context.state().access(DELAYED_UNTIL_TAG).read();
-    return delayedUntil != null
-        && getCurrentTime(context) != null
-        && getCurrentTime(context).isAfter(delayedUntil);
-  }
-
-  @Override
-  protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception {
-    clear(context);
-  }
-
-  protected Instant computeTargetTimestamp(Instant time) {
-    Instant result = time;
-    for (SerializableFunction<Instant, Instant> timestampMapper : timestampMappers) {
-      result = timestampMapper.apply(result);
-    }
-    return result;
-  }
-
-  /**
-   * A {@link SerializableFunction} to delay the timestamp at which this triggers fires.
-   */
-  private static final class DelayFn implements SerializableFunction<Instant, Instant> {
-    private final Duration delay;
-
-    public DelayFn(Duration delay) {
-      this.delay = delay;
-    }
-
-    @Override
-    public Instant apply(Instant input) {
-      return input.plus(delay);
-    }
-
-    @Override
-    public boolean equals(Object object) {
-      if (object == this) {
-        return true;
-      }
-
-      if (!(object instanceof DelayFn)) {
-        return false;
-      }
-
-      return this.delay.equals(((DelayFn) object).delay);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(delay);
-    }
-  }
-
-  /**
-   * A {@link SerializableFunction} to align an instant to the nearest interval boundary.
-   */
-  static final class AlignFn implements SerializableFunction<Instant, Instant> {
-    private final Duration size;
-    private final Instant offset;
-
-
-    /**
-     * Aligns timestamps to the smallest multiple of {@code size} since the {@code offset} greater
-     * than the timestamp.
-     */
-    public AlignFn(Duration size, Instant offset) {
-      this.size = size;
-      this.offset = offset;
-    }
-
-    @Override
-    public Instant apply(Instant point) {
-      long millisSinceStart = new Duration(offset, point).getMillis() % size.getMillis();
-      return millisSinceStart == 0 ? point : point.plus(size).minus(millisSinceStart);
-    }
-
-    @Override
-    public boolean equals(Object object) {
-      if (object == this) {
-        return true;
-      }
-
-      if (!(object instanceof AlignFn)) {
-        return false;
-      }
-
-      AlignFn other = (AlignFn) object;
-      return other.size.equals(this.size)
-          && other.offset.equals(this.offset);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(size, offset);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
deleted file mode 100644
index 4b052fa..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterEach.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-
-import org.joda.time.Instant;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * A composite {@link Trigger} that executes its sub-triggers in order.
- * Only one sub-trigger is executing at a time,
- * and any time it fires the {@code AfterEach} fires. When the currently executing
- * sub-trigger finishes, the {@code AfterEach} starts executing the next sub-trigger.
- *
- * <p>{@code AfterEach.inOrder(t1, t2, ...)} finishes when all of the sub-triggers have finished.
- *
- * <p>The following properties hold:
- * <ul>
- *   <li> {@code AfterEach.inOrder(AfterEach.inOrder(a, b), c)} behaves the same as
- *   {@code AfterEach.inOrder(a, b, c)} and {@code AfterEach.inOrder(a, AfterEach.inOrder(b, c)}.
- *   <li> {@code AfterEach.inOrder(Repeatedly.forever(a), b)} behaves the same as
- *   {@code Repeatedly.forever(a)}, since the repeated trigger never finishes.
- * </ul>
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code Trigger}
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public class AfterEach<W extends BoundedWindow> extends Trigger<W> {
-
-  private AfterEach(List<Trigger<W>> subTriggers) {
-    super(subTriggers);
-    checkArgument(subTriggers.size() > 1);
-  }
-
-  /**
-   * Returns an {@code AfterEach} {@code Trigger} with the given subtriggers.
-   */
-  @SafeVarargs
-  public static <W extends BoundedWindow> Trigger<W> inOrder(Trigger<W>... triggers) {
-    return new AfterEach<W>(Arrays.<Trigger<W>>asList(triggers));
-  }
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    if (!c.trigger().isMerging()) {
-      // If merges are not possible, we need only run the first unfinished subtrigger
-      c.trigger().firstUnfinishedSubTrigger().invokeOnElement(c);
-    } else {
-      // If merges are possible, we need to run all subtriggers in parallel
-      for (ExecutableTrigger<W> subTrigger :  c.trigger().subTriggers()) {
-        // Even if the subTrigger is done, it may be revived via merging and must have
-        // adequate state.
-        subTrigger.invokeOnElement(c);
-      }
-    }
-  }
-
-  @Override
-  public void onMerge(OnMergeContext context) throws Exception {
-    // If merging makes a subtrigger no-longer-finished, it will automatically
-    // begin participating in shouldFire and onFire appropriately.
-
-    // All the following triggers are retroactively "not started" but that is
-    // also automatic because they are cleared whenever this trigger
-    // fires.
-    boolean priorTriggersAllFinished = true;
-    for (ExecutableTrigger<W> subTrigger : context.trigger().subTriggers()) {
-      if (priorTriggersAllFinished) {
-        subTrigger.invokeOnMerge(context);
-        priorTriggersAllFinished &= context.forTrigger(subTrigger).trigger().isFinished();
-      } else {
-        subTrigger.invokeClear(context);
-      }
-    }
-    updateFinishedState(context);
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    // This trigger will fire at least once when the first trigger in the sequence
-    // fires at least once.
-    return subTriggers.get(0).getWatermarkThatGuaranteesFiring(window);
-  }
-
-  @Override
-  public Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return Repeatedly.forever(new AfterFirst<W>(continuationTriggers));
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    ExecutableTrigger<W> firstUnfinished = context.trigger().firstUnfinishedSubTrigger();
-    return firstUnfinished.invokeShouldFire(context);
-  }
-
-  @Override
-  public void onFire(Trigger<W>.TriggerContext context) throws Exception {
-    context.trigger().firstUnfinishedSubTrigger().invokeOnFire(context);
-
-    // Reset all subtriggers if in a merging context; any may be revived by merging so they are
-    // all run in parallel for each pending pane.
-    if (context.trigger().isMerging()) {
-      for (ExecutableTrigger<W> subTrigger : context.trigger().subTriggers()) {
-        subTrigger.invokeClear(context);
-      }
-    }
-
-    updateFinishedState(context);
-  }
-
-  private void updateFinishedState(TriggerContext context) {
-    context.trigger().setFinished(context.trigger().firstUnfinishedSubTrigger() == null);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
deleted file mode 100644
index 29b19bf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterFirst.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.ExecutableTrigger;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Instant;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Create a composite {@link Trigger} that fires once after at least one of its sub-triggers have
- * fired.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@code Trigger}
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public class AfterFirst<W extends BoundedWindow> extends OnceTrigger<W> {
-
-  AfterFirst(List<Trigger<W>> subTriggers) {
-    super(subTriggers);
-    Preconditions.checkArgument(subTriggers.size() > 1);
-  }
-
-  /**
-   * Returns an {@code AfterFirst} {@code Trigger} with the given subtriggers.
-   */
-  @SafeVarargs
-  public static <W extends BoundedWindow> OnceTrigger<W> of(
-      OnceTrigger<W>... triggers) {
-    return new AfterFirst<W>(Arrays.<Trigger<W>>asList(triggers));
-  }
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      subTrigger.invokeOnElement(c);
-    }
-  }
-
-  @Override
-  public void onMerge(OnMergeContext c) throws Exception {
-    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      subTrigger.invokeOnMerge(c);
-    }
-    updateFinishedStatus(c);
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    // This trigger will fire after the earliest of its sub-triggers.
-    Instant deadline = BoundedWindow.TIMESTAMP_MAX_VALUE;
-    for (Trigger<W> subTrigger : subTriggers) {
-      Instant subDeadline = subTrigger.getWatermarkThatGuaranteesFiring(window);
-      if (deadline.isAfter(subDeadline)) {
-        deadline = subDeadline;
-      }
-    }
-    return deadline;
-  }
-
-  @Override
-  public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return new AfterFirst<W>(continuationTriggers);
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
-      if (context.forTrigger(subtrigger).trigger().isFinished()
-          || subtrigger.invokeShouldFire(context)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  @Override
-  protected void onOnlyFiring(TriggerContext context) throws Exception {
-    for (ExecutableTrigger<W> subtrigger : context.trigger().subTriggers()) {
-      TriggerContext subContext = context.forTrigger(subtrigger);
-      if (subtrigger.invokeShouldFire(subContext)) {
-        // If the trigger is ready to fire, then do whatever it needs to do.
-        subtrigger.invokeOnFire(subContext);
-      } else {
-        // If the trigger is not ready to fire, it is nonetheless true that whatever
-        // pending pane it was tracking is now gone.
-        subtrigger.invokeClear(subContext);
-      }
-    }
-  }
-
-  private void updateFinishedStatus(TriggerContext c) {
-    boolean anyFinished = false;
-    for (ExecutableTrigger<W> subTrigger : c.trigger().subTriggers()) {
-      anyFinished |= c.forTrigger(subTrigger).trigger().isFinished();
-    }
-    c.trigger().setFinished(anyFinished);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
deleted file mode 100644
index 28c8560..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterPane.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.transforms.Sum;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger.OnceTrigger;
-import com.google.cloud.dataflow.sdk.util.state.AccumulatorCombiningState;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.StateMerging;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-
-import org.joda.time.Instant;
-
-import java.util.List;
-import java.util.Objects;
-
-/**
- * {@link Trigger}s that fire based on properties of the elements in the current pane.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used by this
- *            {@link Trigger}
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public class AfterPane<W extends BoundedWindow> extends OnceTrigger<W>{
-
-private static final StateTag<Object, AccumulatorCombiningState<Long, long[], Long>>
-      ELEMENTS_IN_PANE_TAG =
-      StateTags.makeSystemTagInternal(StateTags.combiningValueFromInputInternal(
-          "count", VarLongCoder.of(), new Sum.SumLongFn()));
-
-  private final int countElems;
-
-  private AfterPane(int countElems) {
-    super(null);
-    this.countElems = countElems;
-  }
-
-  /**
-   * Creates a trigger that fires when the pane contains at least {@code countElems} elements.
-   */
-  public static <W extends BoundedWindow> AfterPane<W> elementCountAtLeast(int countElems) {
-    return new AfterPane<>(countElems);
-  }
-
-  @Override
-  public void onElement(OnElementContext c) throws Exception {
-    c.state().access(ELEMENTS_IN_PANE_TAG).add(1L);
-  }
-
-  @Override
-  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
-    super.prefetchOnMerge(state);
-    StateMerging.prefetchCombiningValues(state, ELEMENTS_IN_PANE_TAG);
-  }
-
-  @Override
-  public void onMerge(OnMergeContext context) throws Exception {
-    // If we've already received enough elements and finished in some window,
-    // then this trigger is just finished.
-    if (context.trigger().finishedInAnyMergingWindow()) {
-      context.trigger().setFinished(true);
-      StateMerging.clear(context.state(), ELEMENTS_IN_PANE_TAG);
-      return;
-    }
-
-    // Otherwise, compute the sum of elements in all the active panes.
-    StateMerging.mergeCombiningValues(context.state(), ELEMENTS_IN_PANE_TAG);
-  }
-
-  @Override
-  public void prefetchShouldFire(StateAccessor<?> state) {
-    state.access(ELEMENTS_IN_PANE_TAG).readLater();
-  }
-
-  @Override
-  public boolean shouldFire(Trigger<W>.TriggerContext context) throws Exception {
-    long count = context.state().access(ELEMENTS_IN_PANE_TAG).read();
-    return count >= countElems;
-  }
-
-  @Override
-  public void clear(TriggerContext c) throws Exception {
-    c.state().access(ELEMENTS_IN_PANE_TAG).clear();
-  }
-
-  @Override
-  public boolean isCompatible(Trigger<?> other) {
-    return this.equals(other);
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    return BoundedWindow.TIMESTAMP_MAX_VALUE;
-  }
-
-  @Override
-  public OnceTrigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return AfterPane.elementCountAtLeast(1);
-  }
-
-  @Override
-  public String toString() {
-    return "AfterPane.elementCountAtLeast(" + countElems + ")";
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (!(obj instanceof AfterPane)) {
-      return false;
-    }
-    AfterPane<?> that = (AfterPane<?>) obj;
-    return this.countElems == that.countElems;
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(countElems);
-  }
-
-  @Override
-  protected void onOnlyFiring(Trigger<W>.TriggerContext context) throws Exception {
-    clear(context);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
deleted file mode 100644
index 7e89902..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterProcessingTime.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-
-import org.joda.time.Instant;
-
-import java.util.List;
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * {@code AfterProcessingTime} triggers fire based on the current processing time. They operate in
- * the real-time domain.
- *
- * <p>The time at which to fire the timer can be adjusted via the methods in {@link TimeTrigger},
- * such as {@link TimeTrigger#plusDelayOf} or {@link TimeTrigger#alignedTo}.
- *
- * @param <W> {@link BoundedWindow} subclass used to represent the windows used
- */
-@Experimental(Experimental.Kind.TRIGGER)
-public class AfterProcessingTime<W extends BoundedWindow> extends AfterDelayFromFirstElement<W> {
-
-  @Override
-  @Nullable
-  public Instant getCurrentTime(Trigger<W>.TriggerContext context) {
-    return context.currentProcessingTime();
-  }
-
-  private AfterProcessingTime(List<SerializableFunction<Instant, Instant>> transforms) {
-    super(TimeDomain.PROCESSING_TIME, transforms);
-  }
-
-  /**
-   * Creates a trigger that fires when the current processing time passes the processing time
-   * at which this trigger saw the first element in a pane.
-   */
-  public static <W extends BoundedWindow> AfterProcessingTime<W> pastFirstElementInPane() {
-    return new AfterProcessingTime<W>(IDENTITY);
-  }
-
-  @Override
-  protected AfterProcessingTime<W> newWith(
-      List<SerializableFunction<Instant, Instant>> transforms) {
-    return new AfterProcessingTime<W>(transforms);
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    return BoundedWindow.TIMESTAMP_MAX_VALUE;
-  }
-
-  @Override
-  protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return new AfterSynchronizedProcessingTime<W>();
-  }
-
-  @Override
-  public String toString() {
-    return "AfterProcessingTime.pastFirstElementInPane(" + timestampMappers + ")";
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (!(obj instanceof AfterProcessingTime)) {
-      return false;
-    }
-    AfterProcessingTime<?> that = (AfterProcessingTime<?>) obj;
-    return Objects.equals(this.timestampMappers, that.timestampMappers);
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(getClass(), this.timestampMappers);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
deleted file mode 100644
index 0a274c9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/AfterSynchronizedProcessingTime.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
-import com.google.cloud.dataflow.sdk.util.TimeDomain;
-import com.google.common.base.Objects;
-
-import org.joda.time.Instant;
-
-import java.util.Collections;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-class AfterSynchronizedProcessingTime<W extends BoundedWindow>
-    extends AfterDelayFromFirstElement<W> {
-
-  @Override
-  @Nullable
-  public Instant getCurrentTime(Trigger<W>.TriggerContext context) {
-    return context.currentSynchronizedProcessingTime();
-  }
-
-  public AfterSynchronizedProcessingTime() {
-    super(TimeDomain.SYNCHRONIZED_PROCESSING_TIME,
-        Collections.<SerializableFunction<Instant, Instant>>emptyList());
-  }
-
-  @Override
-  public Instant getWatermarkThatGuaranteesFiring(W window) {
-    return BoundedWindow.TIMESTAMP_MAX_VALUE;
-  }
-
-  @Override
-  protected Trigger<W> getContinuationTrigger(List<Trigger<W>> continuationTriggers) {
-    return this;
-  }
-
-  @Override
-  public String toString() {
-    return "AfterSynchronizedProcessingTime.pastFirstElementInPane()";
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    return this == obj || obj instanceof AfterSynchronizedProcessingTime;
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hashCode(AfterSynchronizedProcessingTime.class);
-  }
-
-  @Override
-  protected AfterSynchronizedProcessingTime<W>
-      newWith(List<SerializableFunction<Instant, Instant>> transforms) {
-    // ignore transforms
-    return this;
-  }
-
-}

[05/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
deleted file mode 100644
index 00d3b3b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/StateSampler.java
+++ /dev/null
@@ -1,365 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util.common.worker;
-
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.TimeUnit;
-
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * A StateSampler object may be used to obtain an approximate
- * breakdown of the time spent by an execution context in various
- * states, as a fraction of the total time.  The sampling is taken at
- * regular intervals, with adjustment for scheduling delay.
- */
-@ThreadSafe
-public class StateSampler implements AutoCloseable {
-
-  /** Different kinds of states. */
-  public enum StateKind {
-    /** IO, user code, etc. */
-    USER,
-    /** Reading/writing from/to shuffle service, etc. */
-    FRAMEWORK
-  }
-
-  public static final long DEFAULT_SAMPLING_PERIOD_MS = 200;
-
-  private final String prefix;
-  private final CounterSet.AddCounterMutator counterSetMutator;
-
-  /** Array of counters indexed by their state. */
-  private ArrayList<Counter<Long>> countersByState = new ArrayList<>();
-
-  /** Map of state name to state. */
-  private Map<String, Integer> statesByName = new HashMap<>();
-
-  /** Map of state id to kind. */
-  private Map<Integer, StateKind> kindsByState = new HashMap<>();
-
-  /** The current state. */
-  private volatile int currentState;
-
-  /** Special value of {@code currentState} that means we do not sample. */
-  public static final int DO_NOT_SAMPLE = -1;
-
-  /**
-   * A counter that increments with each state transition. May be used
-   * to detect a context being stuck in a state for some amount of
-   * time.
-   */
-  private volatile long stateTransitionCount;
-
-  /**
-   * The timestamp (in nanoseconds) corresponding to the last time the
-   * state was sampled (and recorded).
-   */
-  private long stateTimestampNs = 0;
-
-  /** Using a fixed number of timers for all StateSampler objects. */
-  private static final int NUM_EXECUTOR_THREADS = 16;
-
-  private static final ScheduledExecutorService executorService =
-      Executors.newScheduledThreadPool(NUM_EXECUTOR_THREADS,
-          new ThreadFactoryBuilder().setDaemon(true).build());
-
-  private Random rand = new Random();
-
-  private List<SamplingCallback> callbacks = new ArrayList<>();
-
-  private ScheduledFuture<?> invocationTriggerFuture = null;
-
-  private ScheduledFuture<?> invocationFuture = null;
-
-  /**
-   * Constructs a new {@link StateSampler} that can be used to obtain
-   * an approximate breakdown of the time spent by an execution
-   * context in various states, as a fraction of the total time.
-   *
-   * @param prefix the prefix of the counter names for the states
-   * @param counterSetMutator the {@link CounterSet.AddCounterMutator}
-   * used to create a counter for each distinct state
-   * @param samplingPeriodMs the sampling period in milliseconds
-   */
-  public StateSampler(String prefix,
-                      CounterSet.AddCounterMutator counterSetMutator,
-                      final long samplingPeriodMs) {
-    this.prefix = prefix;
-    this.counterSetMutator = counterSetMutator;
-    currentState = DO_NOT_SAMPLE;
-    scheduleSampling(samplingPeriodMs);
-  }
-
-  /**
-   * Constructs a new {@link StateSampler} that can be used to obtain
-   * an approximate breakdown of the time spent by an execution
-   * context in various states, as a fraction of the total time.
-   *
-   * @param prefix the prefix of the counter names for the states
-   * @param counterSetMutator the {@link CounterSet.AddCounterMutator}
-   * used to create a counter for each distinct state
-   */
-  public StateSampler(String prefix,
-                      CounterSet.AddCounterMutator counterSetMutator) {
-    this(prefix, counterSetMutator, DEFAULT_SAMPLING_PERIOD_MS);
-  }
-
-  /**
-   * Called by the constructor to schedule sampling at the given period.
-   *
-   * <p>Should not be overridden by sub-classes unless they want to change
-   * or disable the automatic sampling of state.
-   */
-  protected void scheduleSampling(final long samplingPeriodMs) {
-    // Here "stratified sampling" is used, which makes sure that there's 1 uniformly chosen sampled
-    // point in every bucket of samplingPeriodMs, to prevent pathological behavior in case some
-    // states happen to occur at a similar period.
-    // The current implementation uses a fixed-rate timer with a period samplingPeriodMs as a
-    // trampoline to a one-shot random timer which fires with a random delay within
-    // samplingPeriodMs.
-    stateTimestampNs = System.nanoTime();
-    invocationTriggerFuture =
-        executorService.scheduleAtFixedRate(
-            new Runnable() {
-              @Override
-              public void run() {
-                long delay = rand.nextInt((int) samplingPeriodMs);
-                synchronized (StateSampler.this) {
-                  if (invocationFuture != null) {
-                    invocationFuture.cancel(false);
-                  }
-                  invocationFuture =
-                      executorService.schedule(
-                          new Runnable() {
-                            @Override
-                            public void run() {
-                              StateSampler.this.run();
-                            }
-                          },
-                          delay,
-                          TimeUnit.MILLISECONDS);
-                }
-              }
-            },
-            0,
-            samplingPeriodMs,
-            TimeUnit.MILLISECONDS);
-  }
-
-  public synchronized void run() {
-    long startTimestampNs = System.nanoTime();
-    int state = currentState;
-    if (state != DO_NOT_SAMPLE) {
-      StateKind kind = null;
-      long elapsedMs = TimeUnit.NANOSECONDS.toMillis(startTimestampNs - stateTimestampNs);
-      kind = kindsByState.get(state);
-      countersByState.get(state).addValue(elapsedMs);
-      // Invoke all callbacks.
-      for (SamplingCallback c : callbacks) {
-        c.run(state, kind, elapsedMs);
-      }
-    }
-    stateTimestampNs = startTimestampNs;
-  }
-
-  @Override
-  public synchronized void close() {
-    currentState = DO_NOT_SAMPLE;
-    if (invocationTriggerFuture != null) {
-      invocationTriggerFuture.cancel(false);
-    }
-    if (invocationFuture != null) {
-      invocationFuture.cancel(false);
-    }
-  }
-
-  /**
-   * Returns the state associated with a name; creating a new state if
-   * necessary. Using states instead of state names during state
-   * transitions is done for efficiency.
-   *
-   * @name the name for the state
-   * @kind kind of the state, see {#code StateKind}
-   * @return the state associated with the state name
-   */
-  public int stateForName(String name, StateKind kind) {
-    if (name.isEmpty()) {
-      return DO_NOT_SAMPLE;
-    }
-
-    synchronized (this) {
-      Integer state = statesByName.get(name);
-      if (state == null) {
-        String counterName = prefix + name + "-msecs";
-        Counter<Long> counter = counterSetMutator.addCounter(
-            Counter.longs(counterName, Counter.AggregationKind.SUM));
-        state = countersByState.size();
-        statesByName.put(name, state);
-        countersByState.add(counter);
-        kindsByState.put(state, kind);
-      }
-      StateKind originalKind = kindsByState.get(state);
-      if (originalKind != kind) {
-        throw new IllegalArgumentException(
-            "for state named " + name
-            + ", requested kind " + kind + " different from the original kind " + originalKind);
-      }
-      return state;
-    }
-  }
-
-  /**
-   * An internal class for representing StateSampler information
-   * typically used for debugging.
-   */
-  public static class StateSamplerInfo {
-    public final String state;
-    public final Long transitionCount;
-    public final Long stateDurationMillis;
-
-    public StateSamplerInfo(String state, Long transitionCount,
-                            Long stateDurationMillis) {
-      this.state = state;
-      this.transitionCount = transitionCount;
-      this.stateDurationMillis = stateDurationMillis;
-    }
-  }
-
-  /**
-   * Returns information about the current state of this state sampler
-   * into a {@link StateSamplerInfo} object, or null if sampling is
-   * not turned on.
-   *
-   * @return information about this state sampler or null if sampling is off
-   */
-  public synchronized StateSamplerInfo getInfo() {
-    return currentState == DO_NOT_SAMPLE ? null
-        : new StateSamplerInfo(countersByState.get(currentState).getName(),
-            stateTransitionCount, null);
-  }
-
-  /**
-   * Returns the current state of this state sampler.
-   */
-  public int getCurrentState() {
-    return currentState;
-  }
-
-  /**
-   * Sets the current thread state.
-   *
-   * @param state the new state to transition to
-   * @return the previous state
-   */
-  public int setState(int state) {
-    // Updates to stateTransitionCount are always done by the same
-    // thread, making the non-atomic volatile update below safe. The
-    // count is updated first to avoid incorrectly attributing
-    // stuckness occuring in an old state to the new state.
-    long previousStateTransitionCount = this.stateTransitionCount;
-    this.stateTransitionCount = previousStateTransitionCount + 1;
-    int previousState = currentState;
-    currentState = state;
-    return previousState;
-  }
-
-  /**
-   * Sets the current thread state.
-   *
-   * @param name the name of the new state to transition to
-   * @param kind kind of the new state
-   * @return the previous state
-   */
-  public int setState(String name, StateKind kind) {
-    return setState(stateForName(name, kind));
-  }
-
-  /**
-   * Returns an AutoCloseable {@link ScopedState} that will perform a
-   * state transition to the given state, and will automatically reset
-   * the state to the prior state upon closing.
-   *
-   * @param state the new state to transition to
-   * @return a {@link ScopedState} that automatically resets the state
-   * to the prior state
-   */
-  public ScopedState scopedState(int state) {
-    return new ScopedState(this, setState(state));
-  }
-
-  /**
-   * Add a callback to the sampler.
-   * The callbacks will be executed sequentially upon {@link StateSampler#run}.
-   */
-  public synchronized void addSamplingCallback(SamplingCallback callback) {
-    callbacks.add(callback);
-  }
-
-  /** Get the counter prefix associated with this sampler. */
-  public String getPrefix() {
-    return prefix;
-  }
-
-  /**
-   * A nested class that is used to account for states and state
-   * transitions based on lexical scopes.
-   *
-   * <p>Thread-safe.
-   */
-  public class ScopedState implements AutoCloseable {
-    private StateSampler sampler;
-    private int previousState;
-
-    private ScopedState(StateSampler sampler, int previousState) {
-      this.sampler = sampler;
-      this.previousState = previousState;
-    }
-
-    @Override
-    public void close() {
-      sampler.setState(previousState);
-    }
-  }
-
-  /**
-   * Callbacks which supposed to be called sequentially upon {@link StateSampler#run}.
-   * They should be registered via {@link #addSamplingCallback}.
-   */
-  public static interface SamplingCallback {
-    /**
-     * The entrance method of the callback, it is called in {@link StateSampler#run},
-     * once per sample. This method should be thread safe.
-     *
-     * @param state The state of the StateSampler at the time of sample.
-     * @param kind The kind associated with the state, see {@link StateKind}.
-     * @param elapsedMs Milliseconds since last sample.
-     */
-    public void run(int state, StateKind kind, long elapsedMs);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
deleted file mode 100644
index c3da9ed..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/common/worker/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/** Defines utilities used to implement the harness that runs user code. **/
-package com.google.cloud.dataflow.sdk.util.common.worker;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
deleted file mode 100644
index f72ba4c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/GcsPath.java
+++ /dev/null
@@ -1,619 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.gcsfs;
-
-import com.google.api.services.storage.model.StorageObject;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Strings;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.file.FileSystem;
-import java.nio.file.LinkOption;
-import java.nio.file.Path;
-import java.nio.file.WatchEvent;
-import java.nio.file.WatchKey;
-import java.nio.file.WatchService;
-import java.util.Iterator;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-
-/**
- * Implements the Java NIO {@link Path} API for Google Cloud Storage paths.
- *
- * <p>GcsPath uses a slash ('/') as a directory separator.  Below is
- * a summary of how slashes are treated:
- * <ul>
- *   <li> A GCS bucket may not contain a slash.  An object may contain zero or
- *        more slashes.
- *   <li> A trailing slash always indicates a directory, which is compliant
- *        with POSIX.1-2008.
- *   <li> Slashes separate components of a path.  Empty components are allowed,
- *        these are represented as repeated slashes.  An empty component always
- *        refers to a directory, and always ends in a slash.
- *   <li> {@link #getParent()}} always returns a path ending in a slash, as the
- *        parent of a GcsPath is always a directory.
- *   <li> Use {@link #resolve(String)} to append elements to a GcsPath -- this
- *        applies the rules consistently and is highly recommended over any
- *        custom string concatenation.
- * </ul>
- *
- * <p>GcsPath treats all GCS objects and buckets as belonging to the same
- * filesystem, so the root of a GcsPath is the GcsPath bucket="", object="".
- *
- * <p>Relative paths are not associated with any bucket.  This matches common
- * treatment of Path in which relative paths can be constructed from one
- * filesystem and appended to another filesystem.
- *
- * @see <a href=
- * "http://docs.oracle.com/javase/tutorial/essential/io/pathOps.html"
- * >Java Tutorials: Path Operations</a>
- */
-public class GcsPath implements Path {
-
-  public static final String SCHEME = "gs";
-
-  /**
-   * Creates a GcsPath from a URI.
-   *
-   * <p>The URI must be in the form {@code gs://[bucket]/[path]}, and may not
-   * contain a port, user info, a query, or a fragment.
-   */
-  public static GcsPath fromUri(URI uri) {
-    Preconditions.checkArgument(uri.getScheme().equalsIgnoreCase(SCHEME),
-        "URI: %s is not a GCS URI", uri);
-    Preconditions.checkArgument(uri.getPort() == -1,
-        "GCS URI may not specify port: %s (%i)", uri, uri.getPort());
-    Preconditions.checkArgument(
-        Strings.isNullOrEmpty(uri.getUserInfo()),
-        "GCS URI may not specify userInfo: %s (%s)", uri, uri.getUserInfo());
-    Preconditions.checkArgument(
-        Strings.isNullOrEmpty(uri.getQuery()),
-        "GCS URI may not specify query: %s (%s)", uri, uri.getQuery());
-    Preconditions.checkArgument(
-        Strings.isNullOrEmpty(uri.getFragment()),
-        "GCS URI may not specify fragment: %s (%s)", uri, uri.getFragment());
-
-    return fromUri(uri.toString());
-  }
-
-  /**
-   * Pattern that is used to parse a GCS URL.
-   *
-   * <p>This is used to separate the components.  Verification is handled
-   * separately.
-   */
-  public static final Pattern GCS_URI =
-      Pattern.compile("(?<SCHEME>[^:]+)://(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
-
-  /**
-   * Creates a GcsPath from a URI in string form.
-   *
-   * <p>This does not use URI parsing, which means it may accept patterns that
-   * the URI parser would not accept.
-   */
-  public static GcsPath fromUri(String uri) {
-    Matcher m = GCS_URI.matcher(uri);
-    Preconditions.checkArgument(m.matches(), "Invalid GCS URI: %s", uri);
-
-    Preconditions.checkArgument(m.group("SCHEME").equalsIgnoreCase(SCHEME),
-        "URI: %s is not a GCS URI", uri);
-    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
-  }
-
-  /**
-   * Pattern that is used to parse a GCS resource name.
-   */
-  private static final Pattern GCS_RESOURCE_NAME =
-      Pattern.compile("storage.googleapis.com/(?<BUCKET>[^/]+)(/(?<OBJECT>.*))?");
-
-  /**
-   * Creates a GcsPath from a OnePlatform resource name in string form.
-   */
-  public static GcsPath fromResourceName(String name) {
-    Matcher m = GCS_RESOURCE_NAME.matcher(name);
-    Preconditions.checkArgument(m.matches(), "Invalid GCS resource name: %s", name);
-
-    return new GcsPath(null, m.group("BUCKET"), m.group("OBJECT"));
-  }
-
-  /**
-   * Creates a GcsPath from a {@linkplain StorageObject}.
-   */
-  public static GcsPath fromObject(StorageObject object) {
-    return new GcsPath(null, object.getBucket(), object.getName());
-  }
-
-  /**
-   * Creates a GcsPath from bucket and object components.
-   *
-   * <p>A GcsPath without a bucket name is treated as a relative path, which
-   * is a path component with no linkage to the root element.  This is similar
-   * to a Unix path that does not begin with the root marker (a slash).
-   * GCS has different naming constraints and APIs for working with buckets and
-   * objects, so these two concepts are kept separate to avoid accidental
-   * attempts to treat objects as buckets, or vice versa, as much as possible.
-   *
-   * <p>A GcsPath without an object name is a bucket reference.
-   * A bucket is always a directory, which could be used to lookup or add
-   * files to a bucket, but could not be opened as a file.
-   *
-   * <p>A GcsPath containing neither bucket or object names is treated as
-   * the root of the GCS filesystem.  A listing on the root element would return
-   * the buckets available to the user.
-   *
-   * <p>If {@code null} is passed as either parameter, it is converted to an
-   * empty string internally for consistency.  There is no distinction between
-   * an empty string and a {@code null}, as neither are allowed by GCS.
-   *
-   * @param bucket a GCS bucket name, or none ({@code null} or an empty string)
-   *               if the object is not associated with a bucket
-   *               (e.g. relative paths or the root node).
-   * @param object a GCS object path, or none ({@code null} or an empty string)
-   *               for no object.
-   */
-  public static GcsPath fromComponents(@Nullable String bucket,
-                                       @Nullable String object) {
-    return new GcsPath(null, bucket, object);
-  }
-
-  @Nullable
-  private FileSystem fs;
-  @Nonnull
-  private final String bucket;
-  @Nonnull
-  private final String object;
-
-  /**
-   * Constructs a GcsPath.
-   *
-   * @param fs the associated FileSystem, if any
-   * @param bucket the associated bucket, or none ({@code null} or an empty
-   *               string) for a relative path component
-   * @param object the object, which is a fully-qualified object name if bucket
-   *               was also provided, or none ({@code null} or an empty string)
-   *               for no object
-   * @throws java.lang.IllegalArgumentException if the bucket of object names
-   *         are invalid.
-   */
-  public GcsPath(@Nullable FileSystem fs,
-                 @Nullable String bucket,
-                 @Nullable String object) {
-    if (bucket == null) {
-      bucket = "";
-    }
-    Preconditions.checkArgument(!bucket.contains("/"),
-        "GCS bucket may not contain a slash");
-    Preconditions
-        .checkArgument(bucket.isEmpty()
-                || bucket.matches("[a-z0-9][-_a-z0-9.]+[a-z0-9]"),
-            "GCS bucket names must contain only lowercase letters, numbers, "
-                + "dashes (-), underscores (_), and dots (.). Bucket names "
-                + "must start and end with a number or letter. "
-                + "See https://developers.google.com/storage/docs/bucketnaming "
-                + "for more details.  Bucket name: " + bucket);
-
-    if (object == null) {
-      object = "";
-    }
-    Preconditions.checkArgument(
-        object.indexOf('\n') < 0 && object.indexOf('\r') < 0,
-        "GCS object names must not contain Carriage Return or "
-            + "Line Feed characters.");
-
-    this.fs = fs;
-    this.bucket = bucket;
-    this.object = object;
-  }
-
-  /**
-   * Returns the bucket name associated with this GCS path, or an empty string
-   * if this is a relative path component.
-   */
-  public String getBucket() {
-    return bucket;
-  }
-
-  /**
-   * Returns the object name associated with this GCS path, or an empty string
-   * if no object is specified.
-   */
-  public String getObject() {
-    return object;
-  }
-
-  public void setFileSystem(FileSystem fs) {
-    this.fs = fs;
-  }
-
-  @Override
-  public FileSystem getFileSystem() {
-    return fs;
-  }
-
-  // Absolute paths are those that have a bucket and the root path.
-  @Override
-  public boolean isAbsolute() {
-    return !bucket.isEmpty() || object.isEmpty();
-  }
-
-  @Override
-  public GcsPath getRoot() {
-    return new GcsPath(fs, "", "");
-  }
-
-  @Override
-  public GcsPath getFileName() {
-    throw new UnsupportedOperationException();
-  }
-
-  /**
-   * Returns the <em>parent path</em>, or {@code null} if this path does not
-   * have a parent.
-   *
-   * <p>Returns a path that ends in '/', as the parent path always refers to
-   * a directory.
-   */
-  @Override
-  public GcsPath getParent() {
-    if (bucket.isEmpty() && object.isEmpty()) {
-      // The root path has no parent, by definition.
-      return null;
-    }
-
-    if (object.isEmpty()) {
-      // A GCS bucket. All buckets come from a common root.
-      return getRoot();
-    }
-
-    // Skip last character, in case it is a trailing slash.
-    int i = object.lastIndexOf('/', object.length() - 2);
-    if (i <= 0) {
-      if (bucket.isEmpty()) {
-        // Relative paths are not attached to the root node.
-        return null;
-      }
-      return new GcsPath(fs, bucket, "");
-    }
-
-    // Retain trailing slash.
-    return new GcsPath(fs, bucket, object.substring(0, i + 1));
-  }
-
-  @Override
-  public int getNameCount() {
-    int count = bucket.isEmpty() ? 0 : 1;
-    if (object.isEmpty()) {
-      return count;
-    }
-
-    // Add another for each separator found.
-    int index = -1;
-    while ((index = object.indexOf('/', index + 1)) != -1) {
-      count++;
-    }
-
-    return object.endsWith("/") ? count : count + 1;
-  }
-
-  @Override
-  public GcsPath getName(int count) {
-    Preconditions.checkArgument(count >= 0);
-
-    Iterator<Path> iterator = iterator();
-    for (int i = 0; i < count; ++i) {
-      Preconditions.checkArgument(iterator.hasNext());
-      iterator.next();
-    }
-
-    Preconditions.checkArgument(iterator.hasNext());
-    return (GcsPath) iterator.next();
-  }
-
-  @Override
-  public GcsPath subpath(int beginIndex, int endIndex) {
-    Preconditions.checkArgument(beginIndex >= 0);
-    Preconditions.checkArgument(endIndex > beginIndex);
-
-    Iterator<Path> iterator = iterator();
-    for (int i = 0; i < beginIndex; ++i) {
-      Preconditions.checkArgument(iterator.hasNext());
-      iterator.next();
-    }
-
-    GcsPath path = null;
-    while (beginIndex < endIndex) {
-      Preconditions.checkArgument(iterator.hasNext());
-      if (path == null) {
-        path = (GcsPath) iterator.next();
-      } else {
-        path = path.resolve(iterator.next());
-      }
-      ++beginIndex;
-    }
-
-    return path;
-  }
-
-  @Override
-  public boolean startsWith(Path other) {
-    if (other instanceof GcsPath) {
-      GcsPath gcsPath = (GcsPath) other;
-      return startsWith(gcsPath.bucketAndObject());
-    } else {
-      return startsWith(other.toString());
-    }
-  }
-
-  @Override
-  public boolean startsWith(String prefix) {
-    return bucketAndObject().startsWith(prefix);
-  }
-
-  @Override
-  public boolean endsWith(Path other) {
-    if (other instanceof GcsPath) {
-      GcsPath gcsPath = (GcsPath) other;
-      return endsWith(gcsPath.bucketAndObject());
-    } else {
-      return endsWith(other.toString());
-    }
-  }
-
-  @Override
-  public boolean endsWith(String suffix) {
-    return bucketAndObject().endsWith(suffix);
-  }
-
-  // TODO: support "." and ".." path components?
-  @Override
-  public GcsPath normalize() {
-    return this;
-  }
-
-  @Override
-  public GcsPath resolve(Path other) {
-    if (other instanceof GcsPath) {
-      GcsPath path = (GcsPath) other;
-      if (path.isAbsolute()) {
-        return path;
-      } else {
-        return resolve(path.getObject());
-      }
-    } else {
-      return resolve(other.toString());
-    }
-  }
-
-  @Override
-  public GcsPath resolve(String other) {
-    if (bucket.isEmpty() && object.isEmpty()) {
-      // Resolve on a root path is equivalent to looking up a bucket and object.
-      other = SCHEME + "://" + other;
-    }
-
-    if (other.startsWith(SCHEME + "://")) {
-      GcsPath path = GcsPath.fromUri(other);
-      path.setFileSystem(getFileSystem());
-      return path;
-    }
-
-    if (other.isEmpty()) {
-      // An empty component MUST refer to a directory.
-      other = "/";
-    }
-
-    if (object.isEmpty()) {
-      return new GcsPath(fs, bucket, other);
-    } else if (object.endsWith("/")) {
-      return new GcsPath(fs, bucket, object + other);
-    } else {
-      return new GcsPath(fs, bucket, object + "/" + other);
-    }
-  }
-
-  @Override
-  public Path resolveSibling(Path other) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Path resolveSibling(String other) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Path relativize(Path other) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public GcsPath toAbsolutePath() {
-    return this;
-  }
-
-  @Override
-  public GcsPath toRealPath(LinkOption... options) throws IOException {
-    return this;
-  }
-
-  @Override
-  public File toFile() {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>[] events,
-      WatchEvent.Modifier... modifiers) throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public WatchKey register(WatchService watcher, WatchEvent.Kind<?>... events)
-      throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Iterator<Path> iterator() {
-    return new NameIterator(fs, !bucket.isEmpty(), bucketAndObject());
-  }
-
-  private static class NameIterator implements Iterator<Path> {
-    private final FileSystem fs;
-    private boolean fullPath;
-    private String name;
-
-    NameIterator(FileSystem fs, boolean fullPath, String name) {
-      this.fs = fs;
-      this.fullPath = fullPath;
-      this.name = name;
-    }
-
-    @Override
-    public boolean hasNext() {
-      return !Strings.isNullOrEmpty(name);
-    }
-
-    @Override
-    public GcsPath next() {
-      int i = name.indexOf('/');
-      String component;
-      if (i >= 0) {
-        component = name.substring(0, i);
-        name = name.substring(i + 1);
-      } else {
-        component = name;
-        name = null;
-      }
-      if (fullPath) {
-        fullPath = false;
-        return new GcsPath(fs, component, "");
-      } else {
-        // Relative paths have no bucket.
-        return new GcsPath(fs, "", component);
-      }
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  @Override
-  public int compareTo(Path other) {
-    if (!(other instanceof GcsPath)) {
-      throw new ClassCastException();
-    }
-
-    GcsPath path = (GcsPath) other;
-    int b = bucket.compareTo(path.bucket);
-    if (b != 0) {
-      return b;
-    }
-
-    // Compare a component at a time, so that the separator char doesn't
-    // get compared against component contents.  Eg, "a/b" < "a-1/b".
-    Iterator<Path> left = iterator();
-    Iterator<Path> right = path.iterator();
-
-    while (left.hasNext() && right.hasNext()) {
-      String leftStr = left.next().toString();
-      String rightStr = right.next().toString();
-      int c = leftStr.compareTo(rightStr);
-      if (c != 0) {
-        return c;
-      }
-    }
-
-    if (!left.hasNext() && !right.hasNext()) {
-      return 0;
-    } else {
-      return left.hasNext() ? 1 : -1;
-    }
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    GcsPath paths = (GcsPath) o;
-    return bucket.equals(paths.bucket) && object.equals(paths.object);
-  }
-
-  @Override
-  public int hashCode() {
-    int result = bucket.hashCode();
-    result = 31 * result + object.hashCode();
-    return result;
-  }
-
-  @Override
-  public String toString() {
-    if (!isAbsolute()) {
-      return object;
-    }
-    StringBuilder sb = new StringBuilder();
-    sb.append(SCHEME)
-        .append("://");
-    if (!bucket.isEmpty()) {
-      sb.append(bucket)
-          .append('/');
-    }
-    sb.append(object);
-    return sb.toString();
-  }
-
-  // TODO: Consider using resource names for all GCS paths used by the SDK.
-  public String toResourceName() {
-    StringBuilder sb = new StringBuilder();
-    sb.append("storage.googleapis.com/");
-    if (!bucket.isEmpty()) {
-      sb.append(bucket).append('/');
-    }
-    sb.append(object);
-    return sb.toString();
-  }
-
-  @Override
-  public URI toUri() {
-    try {
-      return new URI(SCHEME, "//" + bucketAndObject(), null);
-    } catch (URISyntaxException e) {
-      throw new RuntimeException("Unable to create URI for GCS path " + this);
-    }
-  }
-
-  private String bucketAndObject() {
-    if (bucket.isEmpty()) {
-      return object;
-    } else {
-      return bucket + "/" + object;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
deleted file mode 100644
index 2f57938..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/gcsfs/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/** Defines utilities used to interact with Google Cloud Storage. **/
-package com.google.cloud.dataflow.sdk.util.gcsfs;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
deleted file mode 100644
index c92adab..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/** Defines utilities used by the Dataflow SDK. **/
-package com.google.cloud.dataflow.sdk.util;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/AccumulatorCombiningState.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/AccumulatorCombiningState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/AccumulatorCombiningState.java
deleted file mode 100644
index 0d78b13..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/AccumulatorCombiningState.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-
-/**
- * State for a single value that is managed by a {@link CombineFn}. This is an internal extension
- * to {@link CombiningState} that includes the {@code AccumT} type.
- *
- * @param <InputT> the type of values added to the state
- * @param <AccumT> the type of accumulator
- * @param <OutputT> the type of value extracted from the state
- */
-public interface AccumulatorCombiningState<InputT, AccumT, OutputT>
-    extends CombiningState<InputT, OutputT> {
-
-  /**
-   * Read the merged accumulator for this combining value. It is implied that reading the
-   * state involes reading the accumulator, so {@link #readLater} is sufficient to prefetch for
-   * this.
-   */
-  AccumT getAccum();
-
-  /**
-   * Add an accumulator to this combining value. Depending on implementation this may immediately
-   * merge it with the previous accumulator, or may buffer this accumulator for a future merge.
-   */
-  void addAccum(AccumT accum);
-
-  /**
-   * Merge the given accumulators according to the underlying combiner.
-   */
-  AccumT mergeAccumulators(Iterable<AccumT> accumulators);
-
-  @Override
-  AccumulatorCombiningState<InputT, AccumT, OutputT> readLater();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
deleted file mode 100644
index 363e480..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/BagState.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-/**
- * State containing a bag values. Items can be added to the bag and the contents read out.
- *
- * @param <T> The type of elements in the bag.
- */
-public interface BagState<T> extends CombiningState<T, Iterable<T>> {
-  @Override
-  BagState<T> readLater();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningState.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningState.java
deleted file mode 100644
index 673bebb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CombiningState.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-
-/**
- * State that combines multiple {@code InputT} values using a {@link CombineFn} to produce a single
- * {@code OutputT} value.
- *
- * @param <InputT> the type of values added to the state
- * @param <OutputT> the type of value extracted from the state
- */
-public interface CombiningState<InputT, OutputT> extends ReadableState<OutputT>, State {
-  /**
-   * Add a value to the buffer.
-   */
-  void add(InputT value);
-
-  /**
-   * Return true if this state is empty.
-   */
-  ReadableState<Boolean> isEmpty();
-
-  @Override
-  CombiningState<InputT, OutputT> readLater();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
deleted file mode 100644
index 3683b74..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/CopyOnAccessInMemoryStateInternals.java
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.util.CombineFnUtil;
-import com.google.cloud.dataflow.sdk.util.state.InMemoryStateInternals.InMemoryState;
-import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
-import com.google.common.base.Optional;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * {@link StateInternals} built on top of an underlying {@link StateTable} that contains instances
- * of {@link InMemoryState}. Whenever state that exists in the underlying {@link StateTable} is
- * accessed, an independent copy will be created within this table.
- */
-public class CopyOnAccessInMemoryStateInternals<K> implements StateInternals<K> {
-  private final K key;
-  private final CopyOnAccessInMemoryStateTable<K> table;
-
-  /**
-   * Creates a new {@link CopyOnAccessInMemoryStateInternals} with the underlying (possibly null)
-   * StateInternals.
-   */
-  public static <K> CopyOnAccessInMemoryStateInternals<K> withUnderlying(
-      K key, @Nullable CopyOnAccessInMemoryStateInternals<K> underlying) {
-    return new CopyOnAccessInMemoryStateInternals<K>(key, underlying);
-  }
-
-  private CopyOnAccessInMemoryStateInternals(
-      K key, CopyOnAccessInMemoryStateInternals<K> underlying) {
-    this.key = key;
-    table =
-        new CopyOnAccessInMemoryStateTable<K>(key, underlying == null ? null : underlying.table);
-  }
-
-  /**
-   * Ensures this {@link CopyOnAccessInMemoryStateInternals} is complete. Other copies of state for
-   * the same Step and Key may be discarded after invoking this method.
-   *
-   * <p>For each {@link StateNamespace}, for each {@link StateTag address} in that namespace that
-   * has not been bound in this {@link CopyOnAccessInMemoryStateInternals}, put a reference to that
-   * state within this {@link StateInternals}.
-   *
-   * <p>Additionally, stores the {@link WatermarkHoldState} with the earliest time bound in the
-   * state table after the commit is completed, enabling calls to
-   * {@link #getEarliestWatermarkHold()}.
-   *
-   * @return this table
-   */
-  public CopyOnAccessInMemoryStateInternals<K> commit() {
-    table.commit();
-    return this;
-  }
-
-  /**
-   * Gets the earliest Watermark Hold present in this table.
-   *
-   * <p>Must be called after this state has been committed. Will throw an
-   * {@link IllegalStateException} if the state has not been committed.
-   */
-  public Instant getEarliestWatermarkHold() {
-    // After commit, the watermark hold is always present, but may be
-    // BoundedWindow#TIMESTAMP_MAX_VALUE if there is no hold set.
-    checkState(
-        table.earliestWatermarkHold.isPresent(),
-        "Can't get the earliest watermark hold in a %s before it is committed",
-        getClass().getSimpleName());
-    return table.earliestWatermarkHold.get();
-  }
-
-  @Override
-  public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address) {
-    return state(namespace, address, StateContexts.nullContext());
-  }
-
-  @Override
-  public <T extends State> T state(
-      StateNamespace namespace, StateTag<? super K, T> address, StateContext<?> c) {
-    return table.get(namespace, address, c);
-  }
-
-  @Override
-  public K getKey() {
-    return key;
-  }
-
-  public boolean isEmpty() {
-    return Iterables.isEmpty(table.values());
-  }
-
-  /**
-   * A {@link StateTable} that, when a value is retrieved with
-   * {@link StateTable#get(StateNamespace, StateTag)}, first attempts to obtain a copy of existing
-   * {@link State} from an underlying {@link StateTable}.
-   */
-  private static class CopyOnAccessInMemoryStateTable<K> extends StateTable<K> {
-    private final K key;
-    private Optional<StateTable<K>> underlying;
-
-    /**
-     * The StateBinderFactory currently in use by this {@link CopyOnAccessInMemoryStateTable}.
-     *
-     * <p>There are three {@link StateBinderFactory} implementations used by the {@link
-     * CopyOnAccessInMemoryStateTable}.
-     * <ul>
-     *   <li>The default {@link StateBinderFactory} is a {@link CopyOnBindBinderFactory}, allowing
-     *       the table to copy any existing {@link State} values to this {@link StateTable} from the
-     *       underlying table when accessed, at which point mutations will not be visible to the
-     *       underlying table - effectively a "Copy by Value" binder.</li>
-     *   <li>During the execution of the {@link #commit()} method, this is a
-     *       {@link ReadThroughBinderFactory}, which copies the references to the existing
-     *       {@link State} objects to this {@link StateTable}.</li>
-     *   <li>After the execution of the {@link #commit()} method, this is an
-     *       instance of {@link InMemoryStateBinderFactory}, which constructs new instances of state
-     *       when a {@link StateTag} is bound.</li>
-     * </ul>
-     */
-    private StateBinderFactory<K> binderFactory;
-
-    /**
-     * The earliest watermark hold in this table.
-     */
-    private Optional<Instant> earliestWatermarkHold;
-
-    public CopyOnAccessInMemoryStateTable(K key, StateTable<K> underlying) {
-      this.key = key;
-      this.underlying = Optional.fromNullable(underlying);
-      binderFactory = new CopyOnBindBinderFactory<>(key, this.underlying);
-      earliestWatermarkHold = Optional.absent();
-    }
-
-    /**
-     * Copies all values in the underlying table to this table, then discards the underlying table.
-     *
-     * <p>If there is an underlying table, this replaces the existing
-     * {@link CopyOnBindBinderFactory} with a {@link ReadThroughBinderFactory}, then reads all of
-     * the values in the existing table, binding the state values to this table. The old StateTable
-     * should be discarded after the call to {@link #commit()}.
-     *
-     * <p>After copying all of the existing values, replace the binder factory with an instance of
-     * {@link InMemoryStateBinderFactory} to construct new values, since all existing values
-     * are bound in this {@link StateTable table} and this table represents the canonical state.
-     */
-    private void commit() {
-      Instant earliestHold = getEarliestWatermarkHold();
-      if (underlying.isPresent()) {
-        ReadThroughBinderFactory<K> readThroughBinder =
-            new ReadThroughBinderFactory<>(underlying.get());
-        binderFactory = readThroughBinder;
-        Instant earliestUnderlyingHold = readThroughBinder.readThroughAndGetEarliestHold(this);
-        if (earliestUnderlyingHold.isBefore(earliestHold)) {
-          earliestHold = earliestUnderlyingHold;
-        }
-      }
-      earliestWatermarkHold = Optional.of(earliestHold);
-      clearEmpty();
-      binderFactory = new InMemoryStateBinderFactory<>(key);
-      underlying = Optional.absent();
-    }
-
-    /**
-     * Get the earliest watermark hold in this table. Ignores the contents of any underlying table.
-     */
-    private Instant getEarliestWatermarkHold() {
-      Instant earliest = BoundedWindow.TIMESTAMP_MAX_VALUE;
-      for (State existingState : this.values()) {
-        if (existingState instanceof WatermarkHoldState) {
-          Instant hold = ((WatermarkHoldState<?>) existingState).read();
-          if (hold != null && hold.isBefore(earliest)) {
-            earliest = hold;
-          }
-        }
-      }
-      return earliest;
-    }
-
-    /**
-     * Clear all empty {@link StateNamespace StateNamespaces} from this table. If all states are
-     * empty, clear the entire table.
-     *
-     * <p>Because {@link InMemoryState} is not removed from the {@link StateTable} after it is
-     * cleared, in case contents are modified after being cleared, the table must be explicitly
-     * checked to ensure that it contains state and removed if not (otherwise we may never use
-     * the table again).
-     */
-    private void clearEmpty() {
-      Collection<StateNamespace> emptyNamespaces = new HashSet<>(this.getNamespacesInUse());
-      for (StateNamespace namespace : this.getNamespacesInUse()) {
-        for (State existingState : this.getTagsInUse(namespace).values()) {
-          if (!((InMemoryState<?>) existingState).isCleared()) {
-            emptyNamespaces.remove(namespace);
-            break;
-          }
-        }
-      }
-      for (StateNamespace empty : emptyNamespaces) {
-        this.clearNamespace(empty);
-      }
-    }
-
-    @Override
-    protected StateBinder<K> binderForNamespace(final StateNamespace namespace, StateContext<?> c) {
-      return binderFactory.forNamespace(namespace, c);
-    }
-
-    private static interface StateBinderFactory<K> {
-      StateBinder<K> forNamespace(StateNamespace namespace, StateContext<?> c);
-    }
-
-    /**
-     * {@link StateBinderFactory} that creates a copy of any existing state when the state is bound.
-     */
-    private static class CopyOnBindBinderFactory<K> implements StateBinderFactory<K> {
-      private final K key;
-      private final Optional<StateTable<K>> underlying;
-
-      public CopyOnBindBinderFactory(K key, Optional<StateTable<K>> underlying) {
-        this.key = key;
-        this.underlying = underlying;
-      }
-
-      private boolean containedInUnderlying(StateNamespace namespace, StateTag<? super K, ?> tag) {
-        return underlying.isPresent() && underlying.get().isNamespaceInUse(namespace)
-            && underlying.get().getTagsInUse(namespace).containsKey(tag);
-      }
-
-      @Override
-      public StateBinder<K> forNamespace(final StateNamespace namespace, final StateContext<?> c) {
-        return new StateBinder<K>() {
-          @Override
-          public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-              StateTag<? super K, WatermarkHoldState<W>> address,
-              OutputTimeFn<? super W> outputTimeFn) {
-            if (containedInUnderlying(namespace, address)) {
-              @SuppressWarnings("unchecked")
-              InMemoryState<? extends WatermarkHoldState<W>> existingState =
-                  (InMemoryStateInternals.InMemoryState<? extends WatermarkHoldState<W>>)
-                  underlying.get().get(namespace, address, c);
-              return existingState.copy();
-            } else {
-              return new InMemoryStateInternals.InMemoryWatermarkHold<>(
-                  outputTimeFn);
-            }
-          }
-
-          @Override
-          public <T> ValueState<T> bindValue(
-              StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
-            if (containedInUnderlying(namespace, address)) {
-              @SuppressWarnings("unchecked")
-              InMemoryState<? extends ValueState<T>> existingState =
-                  (InMemoryStateInternals.InMemoryState<? extends ValueState<T>>)
-                  underlying.get().get(namespace, address, c);
-              return existingState.copy();
-            } else {
-              return new InMemoryStateInternals.InMemoryValue<>();
-            }
-          }
-
-          @Override
-          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-              bindCombiningValue(
-                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-                  Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-            if (containedInUnderlying(namespace, address)) {
-              @SuppressWarnings("unchecked")
-              InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT, OutputT>>
-                  existingState = (
-                      InMemoryStateInternals
-                          .InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT,
-                          OutputT>>) underlying.get().get(namespace, address, c);
-              return existingState.copy();
-            } else {
-              return new InMemoryStateInternals.InMemoryCombiningValue<>(
-                  key, combineFn.asKeyedFn());
-            }
-          }
-
-          @Override
-          public <T> BagState<T> bindBag(
-              StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
-            if (containedInUnderlying(namespace, address)) {
-              @SuppressWarnings("unchecked")
-              InMemoryState<? extends BagState<T>> existingState =
-                  (InMemoryStateInternals.InMemoryState<? extends BagState<T>>)
-                  underlying.get().get(namespace, address, c);
-              return existingState.copy();
-            } else {
-              return new InMemoryStateInternals.InMemoryBag<>();
-            }
-          }
-
-          @Override
-          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-              bindKeyedCombiningValue(
-                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-                  Coder<AccumT> accumCoder,
-                  KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-            if (containedInUnderlying(namespace, address)) {
-              @SuppressWarnings("unchecked")
-              InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT, OutputT>>
-                  existingState = (
-                      InMemoryStateInternals
-                          .InMemoryState<? extends AccumulatorCombiningState<InputT, AccumT,
-                          OutputT>>) underlying.get().get(namespace, address, c);
-              return existingState.copy();
-            } else {
-              return new InMemoryStateInternals.InMemoryCombiningValue<>(key, combineFn);
-            }
-          }
-
-          @Override
-          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-          bindKeyedCombiningValueWithContext(
-                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-                  Coder<AccumT> accumCoder,
-                  KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn) {
-            return bindKeyedCombiningValue(
-                address, accumCoder, CombineFnUtil.bindContext(combineFn, c));
-          }
-        };
-      }
-    }
-
-    /**
-     * {@link StateBinderFactory} that reads directly from the underlying table. Used during calls
-     * to {@link CopyOnAccessInMemoryStateTable#commit()} to read all values from
-     * the underlying table.
-     */
-    private static class ReadThroughBinderFactory<K> implements StateBinderFactory<K> {
-      private final StateTable<K> underlying;
-
-      public ReadThroughBinderFactory(StateTable<K> underlying) {
-        this.underlying = underlying;
-      }
-
-      public Instant readThroughAndGetEarliestHold(StateTable<K> readTo) {
-        Instant earliestHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
-        for (StateNamespace namespace : underlying.getNamespacesInUse()) {
-          for (Map.Entry<StateTag<? super K, ?>, ? extends State> existingState :
-              underlying.getTagsInUse(namespace).entrySet()) {
-            if (!((InMemoryState<?>) existingState.getValue()).isCleared()) {
-              // Only read through non-cleared values to ensure that completed windows are
-              // eventually discarded, and remember the earliest watermark hold from among those
-              // values.
-              State state =
-                  readTo.get(namespace, existingState.getKey(), StateContexts.nullContext());
-              if (state instanceof WatermarkHoldState) {
-                Instant hold = ((WatermarkHoldState<?>) state).read();
-                if (hold != null && hold.isBefore(earliestHold)) {
-                  earliestHold = hold;
-                }
-              }
-            }
-          }
-        }
-        return earliestHold;
-      }
-
-      @Override
-      public StateBinder<K> forNamespace(final StateNamespace namespace, final StateContext<?> c) {
-        return new StateBinder<K>() {
-          @Override
-          public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-              StateTag<? super K, WatermarkHoldState<W>> address,
-              OutputTimeFn<? super W> outputTimeFn) {
-            return underlying.get(namespace, address, c);
-          }
-
-          @Override
-          public <T> ValueState<T> bindValue(
-              StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
-            return underlying.get(namespace, address, c);
-          }
-
-          @Override
-          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-              bindCombiningValue(
-                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-                  Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
-            return underlying.get(namespace, address, c);
-          }
-
-          @Override
-          public <T> BagState<T> bindBag(
-              StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
-            return underlying.get(namespace, address, c);
-          }
-
-          @Override
-          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-              bindKeyedCombiningValue(
-                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-                  Coder<AccumT> accumCoder,
-                  KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-            return underlying.get(namespace, address, c);
-          }
-
-          @Override
-          public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-          bindKeyedCombiningValueWithContext(
-                  StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-                  Coder<AccumT> accumCoder,
-                  KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn) {
-            return bindKeyedCombiningValue(
-                address, accumCoder, CombineFnUtil.bindContext(combineFn, c));
-          }
-        };
-      }
-    }
-
-    private static class InMemoryStateBinderFactory<K> implements StateBinderFactory<K> {
-      private final K key;
-
-      public InMemoryStateBinderFactory(K key) {
-        this.key = key;
-      }
-
-      @Override
-      public StateBinder<K> forNamespace(StateNamespace namespace, StateContext<?> c) {
-        return new InMemoryStateInternals.InMemoryStateBinder<>(key, c);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
deleted file mode 100644
index 8404801..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/InMemoryStateInternals.java
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.util.CombineFnUtil;
-import com.google.cloud.dataflow.sdk.util.state.StateTag.StateBinder;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-
-import javax.annotation.Nullable;
-
-/**
- * In-memory implementation of {@link StateInternals}. Used in {@code BatchModeExecutionContext}
- * and for running tests that need state.
- */
-@Experimental(Kind.STATE)
-public class InMemoryStateInternals<K> implements StateInternals<K> {
-
-  public static <K> InMemoryStateInternals<K> forKey(K key) {
-    return new InMemoryStateInternals<>(key);
-  }
-
-  private final K key;
-
-  protected InMemoryStateInternals(K key) {
-    this.key = key;
-  }
-
-  @Override
-  public K getKey() {
-    return key;
-  }
-
-  interface InMemoryState<T extends InMemoryState<T>> {
-    boolean isCleared();
-    T copy();
-  }
-
-  protected final StateTable<K> inMemoryState = new StateTable<K>() {
-    @Override
-    protected StateBinder<K> binderForNamespace(StateNamespace namespace, StateContext<?> c) {
-      return new InMemoryStateBinder<K>(key, c);
-    }
-  };
-
-  public void clear() {
-    inMemoryState.clear();
-  }
-
-  /**
-   * Return true if the given state is empty. This is used by the test framework to make sure
-   * that the state has been properly cleaned up.
-   */
-  protected boolean isEmptyForTesting(State state) {
-    return ((InMemoryState<?>) state).isCleared();
-  }
-
-  @Override
-  public <T extends State> T state(StateNamespace namespace, StateTag<? super K, T> address) {
-    return inMemoryState.get(namespace, address, StateContexts.nullContext());
-  }
-
-  @Override
-  public <T extends State> T state(
-      StateNamespace namespace, StateTag<? super K, T> address, final StateContext<?> c) {
-    return inMemoryState.get(namespace, address, c);
-  }
-
-  /**
-   * A {@link StateBinder} that returns In Memory {@link State} objects.
-   */
-  static class InMemoryStateBinder<K> implements StateBinder<K> {
-    private final K key;
-    private final StateContext<?> c;
-
-    InMemoryStateBinder(K key, StateContext<?> c) {
-      this.key = key;
-      this.c = c;
-    }
-
-    @Override
-    public <T> ValueState<T> bindValue(
-        StateTag<? super K, ValueState<T>> address, Coder<T> coder) {
-      return new InMemoryValue<T>();
-    }
-
-    @Override
-    public <T> BagState<T> bindBag(
-        final StateTag<? super K, BagState<T>> address, Coder<T> elemCoder) {
-      return new InMemoryBag<T>();
-    }
-
-    @Override
-    public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-        bindCombiningValue(
-            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-            Coder<AccumT> accumCoder,
-            final CombineFn<InputT, AccumT, OutputT> combineFn) {
-      return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn.<K>asKeyedFn());
-    }
-
-    @Override
-    public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-        StateTag<? super K, WatermarkHoldState<W>> address,
-        OutputTimeFn<? super W> outputTimeFn) {
-      return new InMemoryWatermarkHold<W>(outputTimeFn);
-    }
-
-    @Override
-    public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-        bindKeyedCombiningValue(
-            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-            Coder<AccumT> accumCoder,
-            KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-      return new InMemoryCombiningValue<K, InputT, AccumT, OutputT>(key, combineFn);
-    }
-
-    @Override
-    public <InputT, AccumT, OutputT> AccumulatorCombiningState<InputT, AccumT, OutputT>
-        bindKeyedCombiningValueWithContext(
-            StateTag<? super K, AccumulatorCombiningState<InputT, AccumT, OutputT>> address,
-            Coder<AccumT> accumCoder,
-            KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn) {
-      return bindKeyedCombiningValue(address, accumCoder, CombineFnUtil.bindContext(combineFn, c));
-    }
-  }
-
-  static final class InMemoryValue<T> implements ValueState<T>, InMemoryState<InMemoryValue<T>> {
-    private boolean isCleared = true;
-    private T value = null;
-
-    @Override
-    public void clear() {
-      // Even though we're clearing we can't remove this from the in-memory state map, since
-      // other users may already have a handle on this Value.
-      value = null;
-      isCleared = true;
-    }
-
-    @Override
-    public InMemoryValue<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public T read() {
-      return value;
-    }
-
-    @Override
-    public void write(T input) {
-      isCleared = false;
-      this.value = input;
-    }
-
-    @Override
-    public InMemoryValue<T> copy() {
-      InMemoryValue<T> that = new InMemoryValue<>();
-      if (!this.isCleared) {
-        that.isCleared = this.isCleared;
-        that.value = this.value;
-      }
-      return that;
-    }
-
-    @Override
-    public boolean isCleared() {
-      return isCleared;
-    }
-  }
-
-  static final class InMemoryWatermarkHold<W extends BoundedWindow>
-      implements WatermarkHoldState<W>, InMemoryState<InMemoryWatermarkHold<W>> {
-
-    private final OutputTimeFn<? super W> outputTimeFn;
-
-    @Nullable
-    private Instant combinedHold = null;
-
-    public InMemoryWatermarkHold(OutputTimeFn<? super W> outputTimeFn) {
-      this.outputTimeFn = outputTimeFn;
-    }
-
-    @Override
-    public InMemoryWatermarkHold<W> readLater() {
-      return this;
-    }
-
-    @Override
-    public void clear() {
-      // Even though we're clearing we can't remove this from the in-memory state map, since
-      // other users may already have a handle on this WatermarkBagInternal.
-      combinedHold = null;
-    }
-
-    @Override
-    public Instant read() {
-      return combinedHold;
-    }
-
-    @Override
-    public void add(Instant outputTime) {
-      combinedHold = combinedHold == null ? outputTime
-          : outputTimeFn.combine(combinedHold, outputTime);
-    }
-
-    @Override
-    public boolean isCleared() {
-      return combinedHold == null;
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-        @Override
-        public Boolean read() {
-          return combinedHold == null;
-        }
-      };
-    }
-
-    @Override
-    public OutputTimeFn<? super W> getOutputTimeFn() {
-      return outputTimeFn;
-    }
-
-    @Override
-    public String toString() {
-      return Objects.toString(combinedHold);
-    }
-
-    @Override
-    public InMemoryWatermarkHold<W> copy() {
-      InMemoryWatermarkHold<W> that =
-          new InMemoryWatermarkHold<>(outputTimeFn);
-      that.combinedHold = this.combinedHold;
-      return that;
-    }
-  }
-
-  static final class InMemoryCombiningValue<K, InputT, AccumT, OutputT>
-      implements AccumulatorCombiningState<InputT, AccumT, OutputT>,
-          InMemoryState<InMemoryCombiningValue<K, InputT, AccumT, OutputT>> {
-    private final K key;
-    private boolean isCleared = true;
-    private final KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
-    private AccumT accum;
-
-    InMemoryCombiningValue(
-        K key, KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-      this.key = key;
-      this.combineFn = combineFn;
-      accum = combineFn.createAccumulator(key);
-    }
-
-    @Override
-    public InMemoryCombiningValue<K, InputT, AccumT, OutputT> readLater() {
-      return this;
-    }
-
-    @Override
-    public void clear() {
-      // Even though we're clearing we can't remove this from the in-memory state map, since
-      // other users may already have a handle on this CombiningValue.
-      accum = combineFn.createAccumulator(key);
-      isCleared = true;
-    }
-
-    @Override
-    public OutputT read() {
-      return combineFn.extractOutput(key, accum);
-    }
-
-    @Override
-    public void add(InputT input) {
-      isCleared = false;
-      accum = combineFn.addInput(key, accum, input);
-    }
-
-    @Override
-    public AccumT getAccum() {
-      return accum;
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-        @Override
-        public Boolean read() {
-          return isCleared;
-        }
-      };
-    }
-
-    @Override
-    public void addAccum(AccumT accum) {
-      isCleared = false;
-      this.accum = combineFn.mergeAccumulators(key, Arrays.asList(this.accum, accum));
-    }
-
-    @Override
-    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(key, accumulators);
-    }
-
-    @Override
-    public boolean isCleared() {
-      return isCleared;
-    }
-
-    @Override
-    public InMemoryCombiningValue<K, InputT, AccumT, OutputT> copy() {
-      InMemoryCombiningValue<K, InputT, AccumT, OutputT> that =
-          new InMemoryCombiningValue<>(key, combineFn);
-      if (!this.isCleared) {
-        that.isCleared = this.isCleared;
-        that.addAccum(accum);
-      }
-      return that;
-    }
-  }
-
-  static final class InMemoryBag<T> implements BagState<T>, InMemoryState<InMemoryBag<T>> {
-    private List<T> contents = new ArrayList<>();
-
-    @Override
-    public void clear() {
-      // Even though we're clearing we can't remove this from the in-memory state map, since
-      // other users may already have a handle on this Bag.
-      // The result of get/read below must be stable for the lifetime of the bundle within which it
-      // was generated. In batch and direct runners the bundle lifetime can be
-      // greater than the window lifetime, in which case this method can be called while
-      // the result is still in use. We protect against this by hot-swapping instead of
-      // clearing the contents.
-      contents = new ArrayList<>();
-    }
-
-    @Override
-    public InMemoryBag<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public Iterable<T> read() {
-      return contents;
-    }
-
-    @Override
-    public void add(T input) {
-      contents.add(input);
-    }
-
-    @Override
-    public boolean isCleared() {
-      return contents.isEmpty();
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-
-        @Override
-        public Boolean read() {
-          return contents.isEmpty();
-        }
-      };
-    }
-
-    @Override
-    public InMemoryBag<T> copy() {
-      InMemoryBag<T> that = new InMemoryBag<>();
-      that.contents.addAll(this.contents);
-      return that;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateAccessor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateAccessor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateAccessor.java
deleted file mode 100644
index 40211d7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/MergingStateAccessor.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-
-import java.util.Map;
-
-/**
- * Interface for accessing persistent state while windows are merging.
- *
- * <p>For internal use only.
- */
-@Experimental(Kind.STATE)
-public interface MergingStateAccessor<K, W extends BoundedWindow>
-    extends StateAccessor<K> {
-  /**
-   * Analogous to {@link #access}, but returned as a map from each window which is
-   * about to be merged to the corresponding state. Only includes windows which
-   * are known to have state.
-   */
-  <StateT extends State> Map<W, StateT> accessInEachMergingWindow(
-      StateTag<? super K, StateT> address);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ReadableState.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ReadableState.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ReadableState.java
deleted file mode 100644
index 8f690a3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/ReadableState.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-
-/**
- * A {@code StateContents} is produced by the read methods on all {@link State} objects.
- * Calling {@link #read} returns the associated value.
- *
- * <p>This class is similar to {@link java.util.concurrent.Future}, but each invocation of
- * {@link #read} need not return the same value.
- *
- * <p>Getting the {@code StateContents} from a read method indicates the desire to eventually
- * read a value. Depending on the runner this may or may not immediately start the read.
- *
- * @param <T> The type of value returned by {@link #read}.
- */
-@Experimental(Kind.STATE)
-public interface ReadableState<T> {
-  /**
-   * Read the current value, blocking until it is available.
-   *
-   * <p>If there will be many calls to {@link #read} for different state in short succession,
-   * you should first call {@link #readLater} for all of them so the reads can potentially be
-   * batched (depending on the underlying {@link StateInternals} implementation}.
-   */
-  T read();
-
-  /**
-   * Indicate that the value will be read later.
-   *
-   * <p>This allows a {@link StateInternals} implementation to start an asynchronous prefetch or
-   * to include this state in the next batch of reads.
-   *
-   * @return this for convenient chaining
-   */
-  ReadableState<T> readLater();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
deleted file mode 100644
index 0cef786..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/State.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-/**
- * Base interface for all state locations.
- *
- * <p>Specific types of state add appropriate accessors for reading and writing values, see
- * {@link ValueState}, {@link BagState}, and {@link CombiningState}.
- */
-public interface State {
-
-  /**
-   * Clear out the state location.
-   */
-  void clear();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateAccessor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateAccessor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateAccessor.java
deleted file mode 100644
index 6cfbecf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateAccessor.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-
-/**
- * Interface for accessing a {@link StateTag} in the current context.
- *
- * <p>For internal use only.
- */
-@Experimental(Kind.STATE)
-public interface StateAccessor<K> {
-  /**
-   * Access the storage for the given {@code address} in the current window.
-   *
-   * <p>Never accounts for merged windows. When windows are merged, any state accessed via
-   * this method must be eagerly combined and written into the result window.
-   */
-  <StateT extends State> StateT access(StateTag<? super K, StateT> address);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
deleted file mode 100644
index 96387d8..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/state/StateContext.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2016 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util.state;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-
-/**
- * Information accessible the state API.
- */
-public interface StateContext<W extends BoundedWindow> {
-  /**
-   * Returns the {@code PipelineOptions} specified with the
-   * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}.
-   */
-  public abstract PipelineOptions getPipelineOptions();
-
-  /**
-   * Returns the value of the side input for the corresponding state window.
-   */
-  public abstract <T> T sideInput(PCollectionView<T> view);
-
-  /**
-   * Returns the window corresponding to the state.
-   */
-  public abstract W window();
-}

[40/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
deleted file mode 100644
index 0b78b83..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/Write.java
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
-import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
-import com.google.cloud.dataflow.sdk.io.Sink.Writer;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.transforms.View;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.PDone;
-
-import org.joda.time.Instant;
-
-import java.util.UUID;
-
-/**
- * A {@link PTransform} that writes to a {@link Sink}. A write begins with a sequential global
- * initialization of a sink, followed by a parallel write, and ends with a sequential finalization
- * of the write. The output of a write is {@link PDone}.  In the case of an empty PCollection, only
- * the global initialization and finalization will be performed.
- *
- * <p>Currently, only batch workflows can contain Write transforms.
- *
- * <p>Example usage:
- *
- * <p>{@code p.apply(Write.to(new MySink(...)));}
- */
-@Experimental(Experimental.Kind.SOURCE_SINK)
-public class Write {
-  /**
-   * Creates a Write transform that writes to the given Sink.
-   */
-  public static <T> Bound<T> to(Sink<T> sink) {
-    return new Bound<>(sink);
-  }
-
-  /**
-   * A {@link PTransform} that writes to a {@link Sink}. See {@link Write} and {@link Sink} for
-   * documentation about writing to Sinks.
-   */
-  public static class Bound<T> extends PTransform<PCollection<T>, PDone> {
-    private final Sink<T> sink;
-
-    private Bound(Sink<T> sink) {
-      this.sink = sink;
-    }
-
-    @Override
-    public PDone apply(PCollection<T> input) {
-      PipelineOptions options = input.getPipeline().getOptions();
-      sink.validate(options);
-      return createWrite(input, sink.createWriteOperation(options));
-    }
-
-    /**
-     * Returns the {@link Sink} associated with this PTransform.
-     */
-    public Sink<T> getSink() {
-      return sink;
-    }
-
-    /**
-     * A write is performed as sequence of three {@link ParDo}'s.
-     *
-     * <p>In the first, a do-once ParDo is applied to a singleton PCollection containing the Sink's
-     * {@link WriteOperation}. In this initialization ParDo, {@link WriteOperation#initialize} is
-     * called. The output of this ParDo is a singleton PCollection
-     * containing the WriteOperation.
-     *
-     * <p>This singleton collection containing the WriteOperation is then used as a side input to a
-     * ParDo over the PCollection of elements to write. In this bundle-writing phase,
-     * {@link WriteOperation#createWriter} is called to obtain a {@link Writer}.
-     * {@link Writer#open} and {@link Writer#close} are called in {@link DoFn#startBundle} and
-     * {@link DoFn#finishBundle}, respectively, and {@link Writer#write} method is called for every
-     * element in the bundle. The output of this ParDo is a PCollection of <i>writer result</i>
-     * objects (see {@link Sink} for a description of writer results)-one for each bundle.
-     *
-     * <p>The final do-once ParDo uses the singleton collection of the WriteOperation as input and
-     * the collection of writer results as a side-input. In this ParDo,
-     * {@link WriteOperation#finalize} is called to finalize the write.
-     *
-     * <p>If the write of any element in the PCollection fails, {@link Writer#close} will be called
-     * before the exception that caused the write to fail is propagated and the write result will be
-     * discarded.
-     *
-     * <p>Since the {@link WriteOperation} is serialized after the initialization ParDo and
-     * deserialized in the bundle-writing and finalization phases, any state change to the
-     * WriteOperation object that occurs during initialization is visible in the latter phases.
-     * However, the WriteOperation is not serialized after the bundle-writing phase.  This is why
-     * implementations should guarantee that {@link WriteOperation#createWriter} does not mutate
-     * WriteOperation).
-     */
-    private <WriteT> PDone createWrite(
-        PCollection<T> input, WriteOperation<T, WriteT> writeOperation) {
-      Pipeline p = input.getPipeline();
-
-      // A coder to use for the WriteOperation.
-      @SuppressWarnings("unchecked")
-      Coder<WriteOperation<T, WriteT>> operationCoder =
-          (Coder<WriteOperation<T, WriteT>>) SerializableCoder.of(writeOperation.getClass());
-
-      // A singleton collection of the WriteOperation, to be used as input to a ParDo to initialize
-      // the sink.
-      PCollection<WriteOperation<T, WriteT>> operationCollection =
-          p.apply(Create.<WriteOperation<T, WriteT>>of(writeOperation).withCoder(operationCoder));
-
-      // Initialize the resource in a do-once ParDo on the WriteOperation.
-      operationCollection = operationCollection
-          .apply("Initialize", ParDo.of(
-              new DoFn<WriteOperation<T, WriteT>, WriteOperation<T, WriteT>>() {
-            @Override
-            public void processElement(ProcessContext c) throws Exception {
-              WriteOperation<T, WriteT> writeOperation = c.element();
-              writeOperation.initialize(c.getPipelineOptions());
-              // The WriteOperation is also the output of this ParDo, so it can have mutable
-              // state.
-              c.output(writeOperation);
-            }
-          }))
-          .setCoder(operationCoder);
-
-      // Create a view of the WriteOperation to be used as a sideInput to the parallel write phase.
-      final PCollectionView<WriteOperation<T, WriteT>> writeOperationView =
-          operationCollection.apply(View.<WriteOperation<T, WriteT>>asSingleton());
-
-      // Perform the per-bundle writes as a ParDo on the input PCollection (with the WriteOperation
-      // as a side input) and collect the results of the writes in a PCollection.
-      // There is a dependency between this ParDo and the first (the WriteOperation PCollection
-      // as a side input), so this will happen after the initial ParDo.
-      PCollection<WriteT> results = input
-          .apply("WriteBundles", ParDo.of(new DoFn<T, WriteT>() {
-            // Writer that will write the records in this bundle. Lazily
-            // initialized in processElement.
-            private Writer<T, WriteT> writer = null;
-
-            @Override
-            public void processElement(ProcessContext c) throws Exception {
-              // Lazily initialize the Writer
-              if (writer == null) {
-                WriteOperation<T, WriteT> writeOperation = c.sideInput(writeOperationView);
-                writer = writeOperation.createWriter(c.getPipelineOptions());
-                writer.open(UUID.randomUUID().toString());
-              }
-              try {
-                writer.write(c.element());
-              } catch (Exception e) {
-                // Discard write result and close the write.
-                try {
-                  writer.close();
-                } catch (Exception closeException) {
-                  // Do not mask the exception that caused the write to fail.
-                }
-                throw e;
-              }
-            }
-
-            @Override
-            public void finishBundle(Context c) throws Exception {
-              if (writer != null) {
-                WriteT result = writer.close();
-                // Output the result of the write.
-                c.outputWithTimestamp(result, Instant.now());
-              }
-            }
-          }).withSideInputs(writeOperationView))
-          .setCoder(writeOperation.getWriterResultCoder())
-          .apply(Window.<WriteT>into(new GlobalWindows()));
-
-      final PCollectionView<Iterable<WriteT>> resultsView =
-          results.apply(View.<WriteT>asIterable());
-
-      // Finalize the write in another do-once ParDo on the singleton collection containing the
-      // Writer. The results from the per-bundle writes are given as an Iterable side input.
-      // The WriteOperation's state is the same as after its initialization in the first do-once
-      // ParDo. There is a dependency between this ParDo and the parallel write (the writer results
-      // collection as a side input), so it will happen after the parallel write.
-      @SuppressWarnings("unused")
-      final PCollection<Integer> done = operationCollection
-          .apply("Finalize", ParDo.of(new DoFn<WriteOperation<T, WriteT>, Integer>() {
-            @Override
-            public void processElement(ProcessContext c) throws Exception {
-              Iterable<WriteT> results = c.sideInput(resultsView);
-              WriteOperation<T, WriteT> writeOperation = c.element();
-              writeOperation.finalize(results, c.getPipelineOptions());
-            }
-          }).withSideInputs(resultsView));
-      return PDone.in(input.getPipeline());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
deleted file mode 100644
index b728c0a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSink.java
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriteOperation;
-import com.google.cloud.dataflow.sdk.io.FileBasedSink.FileBasedWriter;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.Preconditions;
-
-import java.io.OutputStream;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBException;
-import javax.xml.bind.Marshaller;
-
-// CHECKSTYLE.OFF: JavadocStyle
-/**
- * A {@link Sink} that outputs records as XML-formatted elements. Writes a {@link PCollection} of
- * records from JAXB-annotated classes to a single file location.
- *
- * <p>Given a PCollection containing records of type T that can be marshalled to XML elements, this
- * Sink will produce a single file consisting of a single root element that contains all of the
- * elements in the PCollection.
- *
- * <p>XML Sinks are created with a base filename to write to, a root element name that will be used
- * for the root element of the output files, and a class to bind to an XML element. This class
- * will be used in the marshalling of records in an input PCollection to their XML representation
- * and must be able to be bound using JAXB annotations (checked at pipeline construction time).
- *
- * <p>XML Sinks can be written to using the {@link Write} transform:
- *
- * <pre>
- * p.apply(Write.to(
- *      XmlSink.ofRecordClass(Type.class)
- *          .withRootElementName(root_element)
- *          .toFilenamePrefix(output_filename)));
- * </pre>
- *
- * <p>For example, consider the following class with JAXB annotations:
- *
- * <pre>
- *  {@literal @}XmlRootElement(name = "word_count_result")
- *  {@literal @}XmlType(propOrder = {"word", "frequency"})
- *  public class WordFrequency {
- *    private String word;
- *    private long frequency;
- *
- *    public WordFrequency() { }
- *
- *    public WordFrequency(String word, long frequency) {
- *      this.word = word;
- *      this.frequency = frequency;
- *    }
- *
- *    public void setWord(String word) {
- *      this.word = word;
- *    }
- *
- *    public void setFrequency(long frequency) {
- *      this.frequency = frequency;
- *    }
- *
- *    public long getFrequency() {
- *      return frequency;
- *    }
- *
- *    public String getWord() {
- *      return word;
- *    }
- *  }
- * </pre>
- *
- * <p>The following will produce XML output with a root element named "words" from a PCollection of
- * WordFrequency objects:
- * <pre>
- * p.apply(Write.to(
- *  XmlSink.ofRecordClass(WordFrequency.class)
- *      .withRootElement("words")
- *      .toFilenamePrefix(output_file)));
- * </pre>
- *
- * <p>The output of which will look like:
- * <pre>
- * {@code
- * <words>
- *
- *  <word_count_result>
- *    <word>decreased</word>
- *    <frequency>1</frequency>
- *  </word_count_result>
- *
- *  <word_count_result>
- *    <word>War</word>
- *    <frequency>4</frequency>
- *  </word_count_result>
- *
- *  <word_count_result>
- *    <word>empress'</word>
- *    <frequency>14</frequency>
- *  </word_count_result>
- *
- *  <word_count_result>
- *    <word>stoops</word>
- *    <frequency>6</frequency>
- *  </word_count_result>
- *
- *  ...
- * </words>
- * }</pre>
- */
-// CHECKSTYLE.ON: JavadocStyle
-@SuppressWarnings("checkstyle:javadocstyle")
-public class XmlSink {
-  protected static final String XML_EXTENSION = "xml";
-
-  /**
-   * Returns a builder for an XmlSink. You'll need to configure the class to bind, the root
-   * element name, and the output file prefix with {@link Bound#ofRecordClass}, {@link
-   * Bound#withRootElement}, and {@link Bound#toFilenamePrefix}, respectively.
-   */
-  public static Bound<?> write() {
-    return new Bound<>(null, null, null);
-  }
-
-  /**
-   * Returns an XmlSink that writes objects as XML entities.
-   *
-   * <p>Output files will have the name {@literal {baseOutputFilename}-0000i-of-0000n.xml} where n
-   * is the number of output bundles that the Dataflow service divides the output into.
-   *
-   * @param klass the class of the elements to write.
-   * @param rootElementName the enclosing root element.
-   * @param baseOutputFilename the output filename prefix.
-   */
-  public static <T> Bound<T> writeOf(
-      Class<T> klass, String rootElementName, String baseOutputFilename) {
-    return new Bound<>(klass, rootElementName, baseOutputFilename);
-  }
-
-  /**
-   * A {@link FileBasedSink} that writes objects as XML elements.
-   */
-  public static class Bound<T> extends FileBasedSink<T> {
-    final Class<T> classToBind;
-    final String rootElementName;
-
-    private Bound(Class<T> classToBind, String rootElementName, String baseOutputFilename) {
-      super(baseOutputFilename, XML_EXTENSION);
-      this.classToBind = classToBind;
-      this.rootElementName = rootElementName;
-    }
-
-    /**
-     * Returns an XmlSink that writes objects of the class specified as XML elements.
-     *
-     * <p>The specified class must be able to be used to create a JAXB context.
-     */
-    public <T> Bound<T> ofRecordClass(Class<T> classToBind) {
-      return new Bound<>(classToBind, rootElementName, baseOutputFilename);
-    }
-
-    /**
-     * Returns an XmlSink that writes to files with the given prefix.
-     *
-     * <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is
-     * the number of output bundles that the Dataflow service divides the output into.
-     */
-    public Bound<T> toFilenamePrefix(String baseOutputFilename) {
-      return new Bound<>(classToBind, rootElementName, baseOutputFilename);
-    }
-
-    /**
-     * Returns an XmlSink that writes XML files with an enclosing root element of the
-     * supplied name.
-     */
-    public Bound<T> withRootElement(String rootElementName) {
-      return new Bound<>(classToBind, rootElementName, baseOutputFilename);
-    }
-
-    /**
-     * Validates that the root element, class to bind to a JAXB context, and filenamePrefix have
-     * been set and that the class can be bound in a JAXB context.
-     */
-    @Override
-    public void validate(PipelineOptions options) {
-      Preconditions.checkNotNull(classToBind, "Missing a class to bind to a JAXB context.");
-      Preconditions.checkNotNull(rootElementName, "Missing a root element name.");
-      Preconditions.checkNotNull(baseOutputFilename, "Missing a filename to write to.");
-      try {
-        JAXBContext.newInstance(classToBind);
-      } catch (JAXBException e) {
-        throw new RuntimeException("Error binding classes to a JAXB Context.", e);
-      }
-    }
-
-    /**
-     * Creates an {@link XmlWriteOperation}.
-     */
-    @Override
-    public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
-      return new XmlWriteOperation<>(this);
-    }
-  }
-
-  /**
-   * {@link Sink.WriteOperation} for XML {@link Sink}s.
-   */
-  protected static final class XmlWriteOperation<T> extends FileBasedWriteOperation<T> {
-    public XmlWriteOperation(XmlSink.Bound<T> sink) {
-      super(sink);
-    }
-
-    /**
-     * Creates a {@link XmlWriter} with a marshaller for the type it will write.
-     */
-    @Override
-    public XmlWriter<T> createWriter(PipelineOptions options) throws Exception {
-      JAXBContext context;
-      Marshaller marshaller;
-      context = JAXBContext.newInstance(getSink().classToBind);
-      marshaller = context.createMarshaller();
-      marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
-      marshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
-      marshaller.setProperty(Marshaller.JAXB_ENCODING, "UTF-8");
-      return new XmlWriter<>(this, marshaller);
-    }
-
-    /**
-     * Return the XmlSink.Bound for this write operation.
-     */
-    @Override
-    public XmlSink.Bound<T> getSink() {
-      return (XmlSink.Bound<T>) super.getSink();
-    }
-  }
-
-  /**
-   * A {@link Sink.Writer} that can write objects as XML elements.
-   */
-  protected static final class XmlWriter<T> extends FileBasedWriter<T> {
-    final Marshaller marshaller;
-    private OutputStream os = null;
-
-    public XmlWriter(XmlWriteOperation<T> writeOperation, Marshaller marshaller) {
-      super(writeOperation);
-      this.marshaller = marshaller;
-    }
-
-    /**
-     * Creates the output stream that elements will be written to.
-     */
-    @Override
-    protected void prepareWrite(WritableByteChannel channel) throws Exception {
-      os = Channels.newOutputStream(channel);
-    }
-
-    /**
-     * Writes the root element opening tag.
-     */
-    @Override
-    protected void writeHeader() throws Exception {
-      String rootElementName = getWriteOperation().getSink().rootElementName;
-      os.write(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "<" + rootElementName + ">\n"));
-    }
-
-    /**
-     * Writes the root element closing tag.
-     */
-    @Override
-    protected void writeFooter() throws Exception {
-      String rootElementName = getWriteOperation().getSink().rootElementName;
-      os.write(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "\n</" + rootElementName + ">"));
-    }
-
-    /**
-     * Writes a value to the stream.
-     */
-    @Override
-    public void write(T value) throws Exception {
-      marshaller.marshal(value, os);
-    }
-
-    /**
-     * Return the XmlWriteOperation this write belongs to.
-     */
-    @Override
-    public XmlWriteOperation<T> getWriteOperation() {
-      return (XmlWriteOperation<T>) super.getWriteOperation();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
deleted file mode 100644
index 1ead391..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/XmlSource.java
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
- * in compliance with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed under the License
- * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
- * or implied. See the License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.JAXBCoder;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.common.base.Preconditions;
-
-import org.codehaus.stax2.XMLInputFactory2;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.SequenceInputStream;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.charset.StandardCharsets;
-import java.util.NoSuchElementException;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBElement;
-import javax.xml.bind.JAXBException;
-import javax.xml.bind.Unmarshaller;
-import javax.xml.bind.ValidationEvent;
-import javax.xml.bind.ValidationEventHandler;
-import javax.xml.stream.FactoryConfigurationError;
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-
-// CHECKSTYLE.OFF: JavadocStyle
-/**
- * A source that can be used to read XML files. This source reads one or more
- * XML files and creates a {@code PCollection} of a given type. An Dataflow read transform can be
- * created by passing an {@code XmlSource} object to {@code Read.from()}. Please note the
- * example given below.
- *
- * <p>The XML file must be of the following form, where {@code root} and {@code record} are XML
- * element names that are defined by the user:
- *
- * <pre>
- * {@code
- * <root>
- * <record> ... </record>
- * <record> ... </record>
- * <record> ... </record>
- * ...
- * <record> ... </record>
- * </root>
- * }
- * </pre>
- *
- * <p>Basically, the XML document should contain a single root element with an inner list consisting
- * entirely of record elements. The records may contain arbitrary XML content; however, that content
- * <b>must not</b> contain the start {@code <record>} or end {@code </record>} tags. This
- * restriction enables reading from large XML files in parallel from different offsets in the file.
- *
- * <p>Root and/or record elements may additionally contain an arbitrary number of XML attributes.
- * Additionally users must provide a class of a JAXB annotated Java type that can be used convert
- * records into Java objects and vice versa using JAXB marshalling/unmarshalling mechanisms. Reading
- * the source will generate a {@code PCollection} of the given JAXB annotated Java type.
- * Optionally users may provide a minimum size of a bundle that should be created for the source.
- *
- * <p>The following example shows how to read from {@link XmlSource} in a Dataflow pipeline:
- *
- * <pre>
- * {@code
- * XmlSource<String> source = XmlSource.<String>from(file.toPath().toString())
- *     .withRootElement("root")
- *     .withRecordElement("record")
- *     .withRecordClass(Record.class);
- * PCollection<String> output = p.apply(Read.from(source));
- * }
- * </pre>
- *
- * <p>Currently, only XML files that use single-byte characters are supported. Using a file that
- * contains multi-byte characters may result in data loss or duplication.
- *
- * <p>To use {@link XmlSource}:
- * <ol>
- *   <li>Explicitly declare a dependency on org.codehaus.woodstox:stax2-api</li>
- *   <li>Include a compatible implementation on the classpath at run-time,
- *       such as org.codehaus.woodstox:woodstox-core-asl</li>
- * </ol>
- *
- * <p>These dependencies have been declared as optional in Maven sdk/pom.xml file of
- * Google Cloud Dataflow.
- *
- * <p><h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Dataflow job. Please refer to the documentation of corresponding
- * {@link PipelineRunner PipelineRunners} for more details.
- *
- * @param <T> Type of the objects that represent the records of the XML file. The
- *        {@code PCollection} generated by this source will be of this type.
- */
-// CHECKSTYLE.ON: JavadocStyle
-public class XmlSource<T> extends FileBasedSource<T> {
-
-  private static final String XML_VERSION = "1.1";
-  private static final int DEFAULT_MIN_BUNDLE_SIZE = 8 * 1024;
-  private final String rootElement;
-  private final String recordElement;
-  private final Class<T> recordClass;
-
-  /**
-   * Creates an XmlSource for a single XML file or a set of XML files defined by a Java "glob" file
-   * pattern. Each XML file should be of the form defined in {@link XmlSource}.
-   */
-  public static <T> XmlSource<T> from(String fileOrPatternSpec) {
-    return new XmlSource<>(fileOrPatternSpec, DEFAULT_MIN_BUNDLE_SIZE, null, null, null);
-  }
-
-  /**
-   * Sets name of the root element of the XML document. This will be used to create a valid starting
-   * root element when initiating a bundle of records created from an XML document. This is a
-   * required parameter.
-   */
-  public XmlSource<T> withRootElement(String rootElement) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
-  }
-
-  /**
-   * Sets name of the record element of the XML document. This will be used to determine offset of
-   * the first record of a bundle created from the XML document. This is a required parameter.
-   */
-  public XmlSource<T> withRecordElement(String recordElement) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
-  }
-
-  /**
-   * Sets a JAXB annotated class that can be populated using a record of the provided XML file. This
-   * will be used when unmarshalling record objects from the XML file.  This is a required
-   * parameter.
-   */
-  public XmlSource<T> withRecordClass(Class<T> recordClass) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
-  }
-
-  /**
-   * Sets a parameter {@code minBundleSize} for the minimum bundle size of the source. Please refer
-   * to {@link OffsetBasedSource} for the definition of minBundleSize.  This is an optional
-   * parameter.
-   */
-  public XmlSource<T> withMinBundleSize(long minBundleSize) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), minBundleSize, rootElement, recordElement, recordClass);
-  }
-
-  private XmlSource(String fileOrPattern, long minBundleSize, String rootElement,
-      String recordElement, Class<T> recordClass) {
-    super(fileOrPattern, minBundleSize);
-    this.rootElement = rootElement;
-    this.recordElement = recordElement;
-    this.recordClass = recordClass;
-  }
-
-  private XmlSource(String fileOrPattern, long minBundleSize, long startOffset, long endOffset,
-      String rootElement, String recordElement, Class<T> recordClass) {
-    super(fileOrPattern, minBundleSize, startOffset, endOffset);
-    this.rootElement = rootElement;
-    this.recordElement = recordElement;
-    this.recordClass = recordClass;
-  }
-
-  @Override
-  protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
-    return new XmlSource<T>(
-        fileName, getMinBundleSize(), start, end, rootElement, recordElement, recordClass);
-  }
-
-  @Override
-  protected FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
-    return new XMLReader<T>(this);
-  }
-
-  @Override
-  public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-    return false;
-  }
-
-  @Override
-  public void validate() {
-    super.validate();
-    Preconditions.checkNotNull(
-        rootElement, "rootElement is null. Use builder method withRootElement() to set this.");
-    Preconditions.checkNotNull(
-        recordElement,
-        "recordElement is null. Use builder method withRecordElement() to set this.");
-    Preconditions.checkNotNull(
-        recordClass, "recordClass is null. Use builder method withRecordClass() to set this.");
-  }
-
-  @Override
-  public Coder<T> getDefaultOutputCoder() {
-    return JAXBCoder.of(recordClass);
-  }
-
-  public String getRootElement() {
-    return rootElement;
-  }
-
-  public String getRecordElement() {
-    return recordElement;
-  }
-
-  public Class<T> getRecordClass() {
-    return recordClass;
-  }
-
-  /**
-   * A {@link Source.Reader} for reading JAXB annotated Java objects from an XML file. The XML
-   * file should be of the form defined at {@link XmlSource}.
-   *
-   * <p>Timestamped values are currently unsupported - all values implicitly have the timestamp
-   * of {@code BoundedWindow.TIMESTAMP_MIN_VALUE}.
-   *
-   * @param <T> Type of objects that will be read by the reader.
-   */
-  private static class XMLReader<T> extends FileBasedReader<T> {
-    // The amount of bytes read from the channel to memory when determining the starting offset of
-    // the first record in a bundle. After matching to starting offset of the first record the
-    // remaining bytes read to this buffer and the bytes still not read from the channel are used to
-    // create the XML parser.
-    private static final int BUF_SIZE = 1024;
-
-    // This should be the maximum number of bytes a character will encode to, for any encoding
-    // supported by XmlSource. Currently this is set to 4 since UTF-8 characters may be
-    // four bytes.
-    private static final int MAX_CHAR_BYTES = 4;
-
-    // In order to support reading starting in the middle of an XML file, we construct an imaginary
-    // well-formed document (a header and root tag followed by the contents of the input starting at
-    // the record boundary) and feed it to the parser. Because of this, the offset reported by the
-    // XML parser is not the same as offset in the original file. They differ by a constant amount:
-    // offsetInOriginalFile = parser.getLocation().getCharacterOffset() + parserBaseOffset;
-    // Note that this is true only for files with single-byte characters.
-    // It appears that, as of writing, there does not exist a Java XML parser capable of correctly
-    // reporting byte offsets of elements in the presence of multi-byte characters.
-    private long parserBaseOffset = 0;
-    private boolean readingStarted = false;
-
-    // If true, the current bundle does not contain any records.
-    private boolean emptyBundle = false;
-
-    private Unmarshaller jaxbUnmarshaller = null;
-    private XMLStreamReader parser = null;
-
-    private T currentRecord = null;
-
-    // Byte offset of the current record in the XML file provided when creating the source.
-    private long currentByteOffset = 0;
-
-    public XMLReader(XmlSource<T> source) {
-      super(source);
-
-      // Set up a JAXB Unmarshaller that can be used to unmarshall record objects.
-      try {
-        JAXBContext jaxbContext = JAXBContext.newInstance(getCurrentSource().recordClass);
-        jaxbUnmarshaller = jaxbContext.createUnmarshaller();
-
-        // Throw errors if validation fails. JAXB by default ignores validation errors.
-        jaxbUnmarshaller.setEventHandler(new ValidationEventHandler() {
-          @Override
-          public boolean handleEvent(ValidationEvent event) {
-            throw new RuntimeException(event.getMessage(), event.getLinkedException());
-          }
-        });
-      } catch (JAXBException e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    @Override
-    public synchronized XmlSource<T> getCurrentSource() {
-      return (XmlSource<T>) super.getCurrentSource();
-    }
-
-    @Override
-    protected void startReading(ReadableByteChannel channel) throws IOException {
-      // This method determines the correct starting offset of the first record by reading bytes
-      // from the ReadableByteChannel. This implementation does not need the channel to be a
-      // SeekableByteChannel.
-      // The method tries to determine the first record element in the byte channel. The first
-      // record must start with the characters "<recordElement" where "recordElement" is the
-      // record element of the XML document described above. For the match to be complete this
-      // has to be followed by one of following.
-      // * any whitespace character
-      // * '>' character
-      // * '/' character (to support empty records).
-      //
-      // After this match this method creates the XML parser for parsing the XML document,
-      // feeding it a fake document consisting of an XML header and the <rootElement> tag followed
-      // by the contents of channel starting from <recordElement. The <rootElement> tag may be never
-      // closed.
-
-      // This stores any bytes that should be used prior to the remaining bytes of the channel when
-      // creating an XML parser object.
-      ByteArrayOutputStream preambleByteBuffer = new ByteArrayOutputStream();
-      // A dummy declaration and root for the document with proper XML version and encoding. Without
-      // this XML parsing may fail or may produce incorrect results.
-
-      byte[] dummyStartDocumentBytes =
-          ("<?xml version=\"" + XML_VERSION + "\" encoding=\"UTF-8\" ?>"
-              + "<" + getCurrentSource().rootElement + ">").getBytes(StandardCharsets.UTF_8);
-      preambleByteBuffer.write(dummyStartDocumentBytes);
-      // Gets the byte offset (in the input file) of the first record in ReadableByteChannel. This
-      // method returns the offset and stores any bytes that should be used when creating the XML
-      // parser in preambleByteBuffer.
-      long offsetInFileOfRecordElement =
-          getFirstOccurenceOfRecordElement(channel, preambleByteBuffer);
-      if (offsetInFileOfRecordElement < 0) {
-        // Bundle has no records. So marking this bundle as an empty bundle.
-        emptyBundle = true;
-        return;
-      } else {
-        byte[] preambleBytes = preambleByteBuffer.toByteArray();
-        currentByteOffset = offsetInFileOfRecordElement;
-        setUpXMLParser(channel, preambleBytes);
-        parserBaseOffset = offsetInFileOfRecordElement - dummyStartDocumentBytes.length;
-      }
-      readingStarted = true;
-    }
-
-    // Gets the first occurrence of the next record within the given ReadableByteChannel. Puts
-    // any bytes read past the starting offset of the next record back to the preambleByteBuffer.
-    // If a record is found, returns the starting offset of the record, otherwise
-    // returns -1.
-    private long getFirstOccurenceOfRecordElement(
-        ReadableByteChannel channel, ByteArrayOutputStream preambleByteBuffer) throws IOException {
-      int byteIndexInRecordElementToMatch = 0;
-      // Index of the byte in the string "<recordElement" to be matched
-      // against the current byte from the stream.
-      boolean recordStartBytesMatched = false; // "<recordElement" matched. Still have to match the
-      // next character to confirm if this is a positive match.
-      boolean fullyMatched = false; // If true, record element was fully matched.
-
-      // This gives the offset of the byte currently being read. We do a '-1' here since we
-      // increment this value at the beginning of the while loop below.
-      long offsetInFileOfCurrentByte = getCurrentSource().getStartOffset() - 1;
-      long startingOffsetInFileOfCurrentMatch = -1;
-      // If this is non-negative, currently there is a match in progress and this value gives the
-      // starting offset of the match currently being conducted.
-      boolean matchStarted = false; // If true, a match is currently in progress.
-
-      // These two values are used to determine the character immediately following a match for
-      // "<recordElement". Please see the comment for 'MAX_CHAR_BYTES' above.
-      byte[] charBytes = new byte[MAX_CHAR_BYTES];
-      int charBytesFound = 0;
-
-      ByteBuffer buf = ByteBuffer.allocate(BUF_SIZE);
-      byte[] recordStartBytes =
-          ("<" + getCurrentSource().recordElement).getBytes(StandardCharsets.UTF_8);
-
-      outer: while (channel.read(buf) > 0) {
-        buf.flip();
-        while (buf.hasRemaining()) {
-          offsetInFileOfCurrentByte++;
-          byte b = buf.get();
-          boolean reset = false;
-          if (recordStartBytesMatched) {
-            // We already matched "<recordElement" reading the next character to determine if this
-            // is a positive match for a new record.
-            charBytes[charBytesFound] = b;
-            charBytesFound++;
-            Character c = null;
-            if (charBytesFound == charBytes.length) {
-              CharBuffer charBuf = CharBuffer.allocate(1);
-              InputStream charBufStream = new ByteArrayInputStream(charBytes);
-              java.io.Reader reader =
-                  new InputStreamReader(charBufStream, StandardCharsets.UTF_8);
-              int read = reader.read();
-              if (read <= 0) {
-                return -1;
-              }
-              charBuf.flip();
-              c = (char) read;
-            } else {
-              continue;
-            }
-
-            // Record start may be of following forms
-            // * "<recordElement<whitespace>..."
-            // * "<recordElement>..."
-            // * "<recordElement/..."
-            if (Character.isWhitespace(c) || c == '>' || c == '/') {
-              fullyMatched = true;
-              // Add the recordStartBytes and charBytes to preambleByteBuffer since these were
-              // already read from the channel.
-              preambleByteBuffer.write(recordStartBytes);
-              preambleByteBuffer.write(charBytes);
-              // Also add the rest of the current buffer to preambleByteBuffer.
-              while (buf.hasRemaining()) {
-                preambleByteBuffer.write(buf.get());
-              }
-              break outer;
-            } else {
-              // Matching was unsuccessful. Reset the buffer to include bytes read for the char.
-              ByteBuffer newbuf = ByteBuffer.allocate(BUF_SIZE);
-              newbuf.put(charBytes);
-              offsetInFileOfCurrentByte -= charBytes.length;
-              while (buf.hasRemaining()) {
-                newbuf.put(buf.get());
-              }
-              newbuf.flip();
-              buf = newbuf;
-
-              // Ignore everything and try again starting from the current buffer.
-              reset = true;
-            }
-          } else if (b == recordStartBytes[byteIndexInRecordElementToMatch]) {
-            // Next byte matched.
-            if (!matchStarted) {
-              // Match was for the first byte, record the starting offset.
-              matchStarted = true;
-              startingOffsetInFileOfCurrentMatch = offsetInFileOfCurrentByte;
-            }
-            byteIndexInRecordElementToMatch++;
-          } else {
-            // Not a match. Ignore everything and try again starting at current point.
-            reset = true;
-          }
-          if (reset) {
-            // Clear variables and try to match starting from the next byte.
-            byteIndexInRecordElementToMatch = 0;
-            startingOffsetInFileOfCurrentMatch = -1;
-            matchStarted = false;
-            recordStartBytesMatched = false;
-            charBytes = new byte[MAX_CHAR_BYTES];
-            charBytesFound = 0;
-          }
-          if (byteIndexInRecordElementToMatch == recordStartBytes.length) {
-            // "<recordElement" matched. Need to still check next byte since this might be an
-            // element that has "recordElement" as a prefix.
-            recordStartBytesMatched = true;
-          }
-        }
-        buf.clear();
-      }
-
-      if (!fullyMatched) {
-        return -1;
-      } else {
-        return startingOffsetInFileOfCurrentMatch;
-      }
-    }
-
-    private void setUpXMLParser(ReadableByteChannel channel, byte[] lookAhead) throws IOException {
-      try {
-        // We use Woodstox because the StAX implementation provided by OpenJDK reports
-        // character locations incorrectly. Note that Woodstox still currently reports *byte*
-        // locations incorrectly when parsing documents that contain multi-byte characters.
-        XMLInputFactory2 xmlInputFactory = (XMLInputFactory2) XMLInputFactory.newInstance();
-        this.parser = xmlInputFactory.createXMLStreamReader(
-            new SequenceInputStream(
-                new ByteArrayInputStream(lookAhead), Channels.newInputStream(channel)),
-            "UTF-8");
-
-        // Current offset should be the offset before reading the record element.
-        while (true) {
-          int event = parser.next();
-          if (event == XMLStreamConstants.START_ELEMENT) {
-            String localName = parser.getLocalName();
-            if (localName.equals(getCurrentSource().recordElement)) {
-              break;
-            }
-          }
-        }
-      } catch (FactoryConfigurationError | XMLStreamException e) {
-        throw new IOException(e);
-      }
-    }
-
-    @Override
-    protected boolean readNextRecord() throws IOException {
-      if (emptyBundle) {
-        currentByteOffset = Long.MAX_VALUE;
-        return false;
-      }
-      try {
-        // Update current offset and check if the next value is the record element.
-        currentByteOffset = parserBaseOffset + parser.getLocation().getCharacterOffset();
-        while (parser.getEventType() != XMLStreamConstants.START_ELEMENT) {
-          parser.next();
-          currentByteOffset = parserBaseOffset + parser.getLocation().getCharacterOffset();
-          if (parser.getEventType() == XMLStreamConstants.END_DOCUMENT) {
-            currentByteOffset = Long.MAX_VALUE;
-            return false;
-          }
-        }
-        JAXBElement<T> jb = jaxbUnmarshaller.unmarshal(parser, getCurrentSource().recordClass);
-        currentRecord = jb.getValue();
-        return true;
-      } catch (JAXBException | XMLStreamException e) {
-        throw new IOException(e);
-      }
-    }
-
-    @Override
-    public T getCurrent() throws NoSuchElementException {
-      if (!readingStarted) {
-        throw new NoSuchElementException();
-      }
-      return currentRecord;
-    }
-
-    @Override
-    protected boolean isAtSplitPoint() {
-      // Every record is at a split point.
-      return true;
-    }
-
-    @Override
-    protected long getCurrentOffset() {
-      return currentByteOffset;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
deleted file mode 100644
index 7d59b09..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableIO.java
+++ /dev/null
@@ -1,987 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.io.bigtable;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.bigtable.v1.Mutation;
-import com.google.bigtable.v1.Row;
-import com.google.bigtable.v1.RowFilter;
-import com.google.bigtable.v1.SampleRowKeysResponse;
-import com.google.cloud.bigtable.config.BigtableOptions;
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.Proto2Coder;
-import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
-import com.google.cloud.dataflow.sdk.io.BoundedSource;
-import com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader;
-import com.google.cloud.dataflow.sdk.io.Sink.WriteOperation;
-import com.google.cloud.dataflow.sdk.io.Sink.Writer;
-import com.google.cloud.dataflow.sdk.io.range.ByteKey;
-import com.google.cloud.dataflow.sdk.io.range.ByteKeyRange;
-import com.google.cloud.dataflow.sdk.io.range.ByteKeyRangeTracker;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PBegin;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PDone;
-import com.google.common.base.MoreObjects;
-import com.google.common.collect.ImmutableList;
-import com.google.common.util.concurrent.FutureCallback;
-import com.google.common.util.concurrent.Futures;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.Empty;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.concurrent.ConcurrentLinkedQueue;
-
-import javax.annotation.Nullable;
-
-/**
- * A bounded source and sink for Google Cloud Bigtable.
- *
- * <p>For more information, see the online documentation at
- * <a href="https://cloud.google.com/bigtable/">Google Cloud Bigtable</a>.
- *
- * <h3>Reading from Cloud Bigtable</h3>
- *
- * <p>The Bigtable source returns a set of rows from a single table, returning a
- * {@code PCollection<Row>}.
- *
- * <p>To configure a Cloud Bigtable source, you must supply a table id and a {@link BigtableOptions}
- * or builder configured with the project and other information necessary to identify the
- * Bigtable cluster. A {@link RowFilter} may also optionally be specified using
- * {@link BigtableIO.Read#withRowFilter}. For example:
- *
- * <pre>{@code
- * BigtableOptions.Builder optionsBuilder =
- *     new BigtableOptions.Builder()
- *         .setProjectId("project")
- *         .setClusterId("cluster")
- *         .setZoneId("zone");
- *
- * Pipeline p = ...;
- *
- * // Scan the entire table.
- * p.apply("read",
- *     BigtableIO.read()
- *         .withBigtableOptions(optionsBuilder)
- *         .withTableId("table"));
- *
- * // Scan a subset of rows that match the specified row filter.
- * p.apply("filtered read",
- *     BigtableIO.read()
- *         .withBigtableOptions(optionsBuilder)
- *         .withTableId("table")
- *         .withRowFilter(filter));
- * }</pre>
- *
- * <h3>Writing to Cloud Bigtable</h3>
- *
- * <p>The Bigtable sink executes a set of row mutations on a single table. It takes as input a
- * {@link PCollection PCollection&lt;KV&lt;ByteString, Iterable&lt;Mutation&gt;&gt;&gt;}, where the
- * {@link ByteString} is the key of the row being mutated, and each {@link Mutation} represents an
- * idempotent transformation to that row.
- *
- * <p>To configure a Cloud Bigtable sink, you must supply a table id and a {@link BigtableOptions}
- * or builder configured with the project and other information necessary to identify the
- * Bigtable cluster, for example:
- *
- * <pre>{@code
- * BigtableOptions.Builder optionsBuilder =
- *     new BigtableOptions.Builder()
- *         .setProjectId("project")
- *         .setClusterId("cluster")
- *         .setZoneId("zone");
- *
- * PCollection<KV<ByteString, Iterable<Mutation>>> data = ...;
- *
- * data.apply("write",
- *     BigtableIO.write()
- *         .withBigtableOptions(optionsBuilder)
- *         .withTableId("table"));
- * }</pre>
- *
- * <h3>Experimental</h3>
- *
- * <p>This connector for Cloud Bigtable is considered experimental and may break or receive
- * backwards-incompatible changes in future versions of the Cloud Dataflow SDK. Cloud Bigtable is
- * in Beta, and thus it may introduce breaking changes in future revisions of its service or APIs.
- *
- * <h3>Permissions</h3>
- *
- * <p>Permission requirements depend on the {@link PipelineRunner} that is used to execute the
- * Dataflow job. Please refer to the documentation of corresponding
- * {@link PipelineRunner PipelineRunners} for more details.
- */
-@Experimental
-public class BigtableIO {
-  private static final Logger logger = LoggerFactory.getLogger(BigtableIO.class);
-
-  /**
-   * Creates an uninitialized {@link BigtableIO.Read}. Before use, the {@code Read} must be
-   * initialized with a
-   * {@link BigtableIO.Read#withBigtableOptions(BigtableOptions) BigtableOptions} that specifies
-   * the source Cloud Bigtable cluster, and a {@link BigtableIO.Read#withTableId tableId} that
-   * specifies which table to read. A {@link RowFilter} may also optionally be specified using
-   * {@link BigtableIO.Read#withRowFilter}.
-   */
-  @Experimental
-  public static Read read() {
-    return new Read(null, "", null, null);
-  }
-
-  /**
-   * Creates an uninitialized {@link BigtableIO.Write}. Before use, the {@code Write} must be
-   * initialized with a
-   * {@link BigtableIO.Write#withBigtableOptions(BigtableOptions) BigtableOptions} that specifies
-   * the destination Cloud Bigtable cluster, and a {@link BigtableIO.Write#withTableId tableId} that
-   * specifies which table to write.
-   */
-  @Experimental
-  public static Write write() {
-    return new Write(null, "", null);
-  }
-
-  /**
-   * A {@link PTransform} that reads from Google Cloud Bigtable. See the class-level Javadoc on
-   * {@link BigtableIO} for more information.
-   *
-   * @see BigtableIO
-   */
-  @Experimental
-  public static class Read extends PTransform<PBegin, PCollection<Row>> {
-    /**
-     * Returns a new {@link BigtableIO.Read} that will read from the Cloud Bigtable cluster
-     * indicated by the given options, and using any other specified customizations.
-     *
-     * <p>Does not modify this object.
-     */
-    public Read withBigtableOptions(BigtableOptions options) {
-      checkNotNull(options, "options");
-      return withBigtableOptions(options.toBuilder());
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Read} that will read from the Cloud Bigtable cluster
-     * indicated by the given options, and using any other specified customizations.
-     *
-     * <p>Clones the given {@link BigtableOptions} builder so that any further changes
-     * will have no effect on the returned {@link BigtableIO.Read}.
-     *
-     * <p>Does not modify this object.
-     */
-    public Read withBigtableOptions(BigtableOptions.Builder optionsBuilder) {
-      checkNotNull(optionsBuilder, "optionsBuilder");
-      // TODO: is there a better way to clone a Builder? Want it to be immune from user changes.
-      BigtableOptions.Builder clonedBuilder = optionsBuilder.build().toBuilder();
-      BigtableOptions optionsWithAgent = clonedBuilder.setUserAgent(getUserAgent()).build();
-      return new Read(optionsWithAgent, tableId, filter, bigtableService);
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Read} that will filter the rows read from Cloud Bigtable
-     * using the given row filter.
-     *
-     * <p>Does not modify this object.
-     */
-    public Read withRowFilter(RowFilter filter) {
-      checkNotNull(filter, "filter");
-      return new Read(options, tableId, filter, bigtableService);
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Read} that will read from the specified table.
-     *
-     * <p>Does not modify this object.
-     */
-    public Read withTableId(String tableId) {
-      checkNotNull(tableId, "tableId");
-      return new Read(options, tableId, filter, bigtableService);
-    }
-
-    /**
-     * Returns the Google Cloud Bigtable cluster being read from, and other parameters.
-     */
-    public BigtableOptions getBigtableOptions() {
-      return options;
-    }
-
-    /**
-     * Returns the table being read from.
-     */
-    public String getTableId() {
-      return tableId;
-    }
-
-    @Override
-    public PCollection<Row> apply(PBegin input) {
-      BigtableSource source =
-          new BigtableSource(getBigtableService(), tableId, filter, ByteKeyRange.ALL_KEYS, null);
-      return input.getPipeline().apply(com.google.cloud.dataflow.sdk.io.Read.from(source));
-    }
-
-    @Override
-    public void validate(PBegin input) {
-      checkArgument(options != null, "BigtableOptions not specified");
-      checkArgument(!tableId.isEmpty(), "Table ID not specified");
-      try {
-        checkArgument(
-            getBigtableService().tableExists(tableId), "Table %s does not exist", tableId);
-      } catch (IOException e) {
-        logger.warn("Error checking whether table {} exists; proceeding.", tableId, e);
-      }
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(Read.class)
-          .add("options", options)
-          .add("tableId", tableId)
-          .add("filter", filter)
-          .toString();
-    }
-
-    /////////////////////////////////////////////////////////////////////////////////////////
-    /**
-     * Used to define the Cloud Bigtable cluster and any options for the networking layer.
-     * Cannot actually be {@code null} at validation time, but may start out {@code null} while
-     * source is being built.
-     */
-    @Nullable private final BigtableOptions options;
-    private final String tableId;
-    @Nullable private final RowFilter filter;
-    @Nullable private final BigtableService bigtableService;
-
-    private Read(
-        @Nullable BigtableOptions options,
-        String tableId,
-        @Nullable RowFilter filter,
-        @Nullable BigtableService bigtableService) {
-      this.options = options;
-      this.tableId = checkNotNull(tableId, "tableId");
-      this.filter = filter;
-      this.bigtableService = bigtableService;
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Read} that will read using the given Cloud Bigtable
-     * service implementation.
-     *
-     * <p>This is used for testing.
-     *
-     * <p>Does not modify this object.
-     */
-    Read withBigtableService(BigtableService bigtableService) {
-      checkNotNull(bigtableService, "bigtableService");
-      return new Read(options, tableId, filter, bigtableService);
-    }
-
-    /**
-     * Helper function that either returns the mock Bigtable service supplied by
-     * {@link #withBigtableService} or creates and returns an implementation that talks to
-     * {@code Cloud Bigtable}.
-     */
-    private BigtableService getBigtableService() {
-      if (bigtableService != null) {
-        return bigtableService;
-      }
-      return new BigtableServiceImpl(options);
-    }
-  }
-
-  /**
-   * A {@link PTransform} that writes to Google Cloud Bigtable. See the class-level Javadoc on
-   * {@link BigtableIO} for more information.
-   *
-   * @see BigtableIO
-   */
-  @Experimental
-  public static class Write
-      extends PTransform<PCollection<KV<ByteString, Iterable<Mutation>>>, PDone> {
-    /**
-     * Used to define the Cloud Bigtable cluster and any options for the networking layer.
-     * Cannot actually be {@code null} at validation time, but may start out {@code null} while
-     * source is being built.
-     */
-    @Nullable private final BigtableOptions options;
-    private final String tableId;
-    @Nullable private final BigtableService bigtableService;
-
-    private Write(
-        @Nullable BigtableOptions options,
-        String tableId,
-        @Nullable BigtableService bigtableService) {
-      this.options = options;
-      this.tableId = checkNotNull(tableId, "tableId");
-      this.bigtableService = bigtableService;
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Write} that will write to the Cloud Bigtable cluster
-     * indicated by the given options, and using any other specified customizations.
-     *
-     * <p>Does not modify this object.
-     */
-    public Write withBigtableOptions(BigtableOptions options) {
-      checkNotNull(options, "options");
-      return withBigtableOptions(options.toBuilder());
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Write} that will write to the Cloud Bigtable cluster
-     * indicated by the given options, and using any other specified customizations.
-     *
-     * <p>Clones the given {@link BigtableOptions} builder so that any further changes
-     * will have no effect on the returned {@link BigtableIO.Write}.
-     *
-     * <p>Does not modify this object.
-     */
-    public Write withBigtableOptions(BigtableOptions.Builder optionsBuilder) {
-      checkNotNull(optionsBuilder, "optionsBuilder");
-      // TODO: is there a better way to clone a Builder? Want it to be immune from user changes.
-      BigtableOptions.Builder clonedBuilder = optionsBuilder.build().toBuilder();
-      BigtableOptions optionsWithAgent = clonedBuilder.setUserAgent(getUserAgent()).build();
-      return new Write(optionsWithAgent, tableId, bigtableService);
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Write} that will write to the specified table.
-     *
-     * <p>Does not modify this object.
-     */
-    public Write withTableId(String tableId) {
-      checkNotNull(tableId, "tableId");
-      return new Write(options, tableId, bigtableService);
-    }
-
-    /**
-     * Returns the Google Cloud Bigtable cluster being written to, and other parameters.
-     */
-    public BigtableOptions getBigtableOptions() {
-      return options;
-    }
-
-    /**
-     * Returns the table being written to.
-     */
-    public String getTableId() {
-      return tableId;
-    }
-
-    @Override
-    public PDone apply(PCollection<KV<ByteString, Iterable<Mutation>>> input) {
-      Sink sink = new Sink(tableId, getBigtableService());
-      return input.apply(com.google.cloud.dataflow.sdk.io.Write.to(sink));
-    }
-
-    @Override
-    public void validate(PCollection<KV<ByteString, Iterable<Mutation>>> input) {
-      checkArgument(options != null, "BigtableOptions not specified");
-      checkArgument(!tableId.isEmpty(), "Table ID not specified");
-      try {
-        checkArgument(
-            getBigtableService().tableExists(tableId), "Table %s does not exist", tableId);
-      } catch (IOException e) {
-        logger.warn("Error checking whether table {} exists; proceeding.", tableId, e);
-      }
-    }
-
-    /**
-     * Returns a new {@link BigtableIO.Write} that will write using the given Cloud Bigtable
-     * service implementation.
-     *
-     * <p>This is used for testing.
-     *
-     * <p>Does not modify this object.
-     */
-    Write withBigtableService(BigtableService bigtableService) {
-      checkNotNull(bigtableService, "bigtableService");
-      return new Write(options, tableId, bigtableService);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(Write.class)
-          .add("options", options)
-          .add("tableId", tableId)
-          .toString();
-    }
-
-    /**
-     * Helper function that either returns the mock Bigtable service supplied by
-     * {@link #withBigtableService} or creates and returns an implementation that talks to
-     * {@code Cloud Bigtable}.
-     */
-    private BigtableService getBigtableService() {
-      if (bigtableService != null) {
-        return bigtableService;
-      }
-      return new BigtableServiceImpl(options);
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////
-  /** Disallow construction of utility class. */
-  private BigtableIO() {}
-
-  static class BigtableSource extends BoundedSource<Row> {
-    public BigtableSource(
-        BigtableService service,
-        String tableId,
-        @Nullable RowFilter filter,
-        ByteKeyRange range,
-        Long estimatedSizeBytes) {
-      this.service = service;
-      this.tableId = tableId;
-      this.filter = filter;
-      this.range = range;
-      this.estimatedSizeBytes = estimatedSizeBytes;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(BigtableSource.class)
-          .add("tableId", tableId)
-          .add("filter", filter)
-          .add("range", range)
-          .add("estimatedSizeBytes", estimatedSizeBytes)
-          .toString();
-    }
-
-    ////// Private state and internal implementation details //////
-    private final BigtableService service;
-    @Nullable private final String tableId;
-    @Nullable private final RowFilter filter;
-    private final ByteKeyRange range;
-    @Nullable private Long estimatedSizeBytes;
-    @Nullable private transient List<SampleRowKeysResponse> sampleRowKeys;
-
-    protected BigtableSource withStartKey(ByteKey startKey) {
-      checkNotNull(startKey, "startKey");
-      return new BigtableSource(
-          service, tableId, filter, range.withStartKey(startKey), estimatedSizeBytes);
-    }
-
-    protected BigtableSource withEndKey(ByteKey endKey) {
-      checkNotNull(endKey, "endKey");
-      return new BigtableSource(
-          service, tableId, filter, range.withEndKey(endKey), estimatedSizeBytes);
-    }
-
-    protected BigtableSource withEstimatedSizeBytes(Long estimatedSizeBytes) {
-      checkNotNull(estimatedSizeBytes, "estimatedSizeBytes");
-      return new BigtableSource(service, tableId, filter, range, estimatedSizeBytes);
-    }
-
-    /**
-     * Makes an API call to the Cloud Bigtable service that gives information about tablet key
-     * boundaries and estimated sizes. We can use these samples to ensure that splits are on
-     * different tablets, and possibly generate sub-splits within tablets.
-     */
-    private List<SampleRowKeysResponse> getSampleRowKeys() throws IOException {
-      return service.getSampleRowKeys(this);
-    }
-
-    @Override
-    public List<BigtableSource> splitIntoBundles(
-        long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-      // Update the desiredBundleSizeBytes in order to limit the
-      // number of splits to maximumNumberOfSplits.
-      long maximumNumberOfSplits = 4000;
-      long sizeEstimate = getEstimatedSizeBytes(options);
-      desiredBundleSizeBytes =
-          Math.max(sizeEstimate / maximumNumberOfSplits, desiredBundleSizeBytes);
-
-      // Delegate to testable helper.
-      return splitIntoBundlesBasedOnSamples(desiredBundleSizeBytes, getSampleRowKeys());
-    }
-
-    /** Helper that splits this source into bundles based on Cloud Bigtable sampled row keys. */
-    private List<BigtableSource> splitIntoBundlesBasedOnSamples(
-        long desiredBundleSizeBytes, List<SampleRowKeysResponse> sampleRowKeys) {
-      // There are no regions, or no samples available. Just scan the entire range.
-      if (sampleRowKeys.isEmpty()) {
-        logger.info("Not splitting source {} because no sample row keys are available.", this);
-        return Collections.singletonList(this);
-      }
-
-      logger.info(
-          "About to split into bundles of size {} with sampleRowKeys length {} first element {}",
-          desiredBundleSizeBytes,
-          sampleRowKeys.size(),
-          sampleRowKeys.get(0));
-
-      // Loop through all sampled responses and generate splits from the ones that overlap the
-      // scan range. The main complication is that we must track the end range of the previous
-      // sample to generate good ranges.
-      ByteKey lastEndKey = ByteKey.EMPTY;
-      long lastOffset = 0;
-      ImmutableList.Builder<BigtableSource> splits = ImmutableList.builder();
-      for (SampleRowKeysResponse response : sampleRowKeys) {
-        ByteKey responseEndKey = ByteKey.of(response.getRowKey());
-        long responseOffset = response.getOffsetBytes();
-        checkState(
-            responseOffset >= lastOffset,
-            "Expected response byte offset %s to come after the last offset %s",
-            responseOffset,
-            lastOffset);
-
-        if (!range.overlaps(ByteKeyRange.of(lastEndKey, responseEndKey))) {
-          // This region does not overlap the scan, so skip it.
-          lastOffset = responseOffset;
-          lastEndKey = responseEndKey;
-          continue;
-        }
-
-        // Calculate the beginning of the split as the larger of startKey and the end of the last
-        // split. Unspecified start is smallest key so is correctly treated as earliest key.
-        ByteKey splitStartKey = lastEndKey;
-        if (splitStartKey.compareTo(range.getStartKey()) < 0) {
-          splitStartKey = range.getStartKey();
-        }
-
-        // Calculate the end of the split as the smaller of endKey and the end of this sample. Note
-        // that range.containsKey handles the case when range.getEndKey() is empty.
-        ByteKey splitEndKey = responseEndKey;
-        if (!range.containsKey(splitEndKey)) {
-          splitEndKey = range.getEndKey();
-        }
-
-        // We know this region overlaps the desired key range, and we know a rough estimate of its
-        // size. Split the key range into bundle-sized chunks and then add them all as splits.
-        long sampleSizeBytes = responseOffset - lastOffset;
-        List<BigtableSource> subSplits =
-            splitKeyRangeIntoBundleSizedSubranges(
-                sampleSizeBytes,
-                desiredBundleSizeBytes,
-                ByteKeyRange.of(splitStartKey, splitEndKey));
-        splits.addAll(subSplits);
-
-        // Move to the next region.
-        lastEndKey = responseEndKey;
-        lastOffset = responseOffset;
-      }
-
-      // We must add one more region after the end of the samples if both these conditions hold:
-      //  1. we did not scan to the end yet (lastEndKey is concrete, not 0-length).
-      //  2. we want to scan to the end (endKey is empty) or farther (lastEndKey < endKey).
-      if (!lastEndKey.isEmpty()
-          && (range.getEndKey().isEmpty() || lastEndKey.compareTo(range.getEndKey()) < 0)) {
-        splits.add(this.withStartKey(lastEndKey).withEndKey(range.getEndKey()));
-      }
-
-      List<BigtableSource> ret = splits.build();
-      logger.info("Generated {} splits. First split: {}", ret.size(), ret.get(0));
-      return ret;
-    }
-
-    @Override
-    public long getEstimatedSizeBytes(PipelineOptions options) throws IOException {
-      // Delegate to testable helper.
-      if (estimatedSizeBytes == null) {
-        estimatedSizeBytes = getEstimatedSizeBytesBasedOnSamples(getSampleRowKeys());
-      }
-      return estimatedSizeBytes;
-    }
-
-    /**
-     * Computes the estimated size in bytes based on the total size of all samples that overlap
-     * the key range this source will scan.
-     */
-    private long getEstimatedSizeBytesBasedOnSamples(List<SampleRowKeysResponse> samples) {
-      long estimatedSizeBytes = 0;
-      long lastOffset = 0;
-      ByteKey currentStartKey = ByteKey.EMPTY;
-      // Compute the total estimated size as the size of each sample that overlaps the scan range.
-      // TODO: In future, Bigtable service may provide finer grained APIs, e.g., to sample given a
-      // filter or to sample on a given key range.
-      for (SampleRowKeysResponse response : samples) {
-        ByteKey currentEndKey = ByteKey.of(response.getRowKey());
-        long currentOffset = response.getOffsetBytes();
-        if (!currentStartKey.isEmpty() && currentStartKey.equals(currentEndKey)) {
-          // Skip an empty region.
-          lastOffset = currentOffset;
-          continue;
-        } else if (range.overlaps(ByteKeyRange.of(currentStartKey, currentEndKey))) {
-          estimatedSizeBytes += currentOffset - lastOffset;
-        }
-        currentStartKey = currentEndKey;
-        lastOffset = currentOffset;
-      }
-      return estimatedSizeBytes;
-    }
-
-    /**
-     * Cloud Bigtable returns query results ordered by key.
-     */
-    @Override
-    public boolean producesSortedKeys(PipelineOptions options) throws Exception {
-      return true;
-    }
-
-    @Override
-    public BoundedReader<Row> createReader(PipelineOptions options) throws IOException {
-      return new BigtableReader(this, service);
-    }
-
-    @Override
-    public void validate() {
-      checkArgument(!tableId.isEmpty(), "tableId cannot be empty");
-    }
-
-    @Override
-    public Coder<Row> getDefaultOutputCoder() {
-      return Proto2Coder.of(Row.class);
-    }
-
-    /** Helper that splits the specified range in this source into bundles. */
-    private List<BigtableSource> splitKeyRangeIntoBundleSizedSubranges(
-        long sampleSizeBytes, long desiredBundleSizeBytes, ByteKeyRange range) {
-      // Catch the trivial cases. Split is small enough already, or this is the last region.
-      logger.debug(
-          "Subsplit for sampleSizeBytes {} and desiredBundleSizeBytes {}",
-          sampleSizeBytes,
-          desiredBundleSizeBytes);
-      if (sampleSizeBytes <= desiredBundleSizeBytes) {
-        return Collections.singletonList(
-            this.withStartKey(range.getStartKey()).withEndKey(range.getEndKey()));
-      }
-
-      checkArgument(
-          sampleSizeBytes > 0, "Sample size %s bytes must be greater than 0.", sampleSizeBytes);
-      checkArgument(
-          desiredBundleSizeBytes > 0,
-          "Desired bundle size %s bytes must be greater than 0.",
-          desiredBundleSizeBytes);
-
-      int splitCount = (int) Math.ceil(((double) sampleSizeBytes) / (desiredBundleSizeBytes));
-      List<ByteKey> splitKeys = range.split(splitCount);
-      ImmutableList.Builder<BigtableSource> splits = ImmutableList.builder();
-      Iterator<ByteKey> keys = splitKeys.iterator();
-      ByteKey prev = keys.next();
-      while (keys.hasNext()) {
-        ByteKey next = keys.next();
-        splits.add(
-            this
-                .withStartKey(prev)
-                .withEndKey(next)
-                .withEstimatedSizeBytes(sampleSizeBytes / splitCount));
-        prev = next;
-      }
-      return splits.build();
-    }
-
-    public ByteKeyRange getRange() {
-      return range;
-    }
-
-    public RowFilter getRowFilter() {
-      return filter;
-    }
-
-    public String getTableId() {
-      return tableId;
-    }
-  }
-
-  private static class BigtableReader extends BoundedReader<Row> {
-    // Thread-safety: source is protected via synchronization and is only accessed or modified
-    // inside a synchronized block (or constructor, which is the same).
-    private BigtableSource source;
-    private BigtableService service;
-    private BigtableService.Reader reader;
-    private final ByteKeyRangeTracker rangeTracker;
-    private long recordsReturned;
-
-    public BigtableReader(BigtableSource source, BigtableService service) {
-      this.source = source;
-      this.service = service;
-      rangeTracker = ByteKeyRangeTracker.of(source.getRange());
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      reader = service.createReader(getCurrentSource());
-      boolean hasRecord =
-          reader.start()
-              && rangeTracker.tryReturnRecordAt(true, ByteKey.of(reader.getCurrentRow().getKey()));
-      if (hasRecord) {
-        ++recordsReturned;
-      }
-      return hasRecord;
-    }
-
-    @Override
-    public synchronized BigtableSource getCurrentSource() {
-      return source;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      boolean hasRecord =
-          reader.advance()
-              && rangeTracker.tryReturnRecordAt(true, ByteKey.of(reader.getCurrentRow().getKey()));
-      if (hasRecord) {
-        ++recordsReturned;
-      }
-      return hasRecord;
-    }
-
-    @Override
-    public Row getCurrent() throws NoSuchElementException {
-      return reader.getCurrentRow();
-    }
-
-    @Override
-    public void close() throws IOException {
-      logger.info("Closing reader after reading {} records.", recordsReturned);
-      if (reader != null) {
-        reader.close();
-        reader = null;
-      }
-    }
-
-    @Override
-    public final Double getFractionConsumed() {
-      return rangeTracker.getFractionConsumed();
-    }
-
-    @Override
-    public final synchronized BigtableSource splitAtFraction(double fraction) {
-      ByteKey splitKey;
-      try {
-        splitKey = source.getRange().interpolateKey(fraction);
-      } catch (IllegalArgumentException e) {
-        logger.info("%s: Failed to interpolate key for fraction %s.", source.getRange(), fraction);
-        return null;
-      }
-      logger.debug(
-          "Proposing to split {} at fraction {} (key {})", rangeTracker, fraction, splitKey);
-      if (!rangeTracker.trySplitAtPosition(splitKey)) {
-        return null;
-      }
-      BigtableSource primary = source.withEndKey(splitKey);
-      BigtableSource residual = source.withStartKey(splitKey);
-      this.source = primary;
-      return residual;
-    }
-  }
-
-  private static class Sink
-      extends com.google.cloud.dataflow.sdk.io.Sink<KV<ByteString, Iterable<Mutation>>> {
-
-    public Sink(String tableId, BigtableService bigtableService) {
-      this.tableId = checkNotNull(tableId, "tableId");
-      this.bigtableService = checkNotNull(bigtableService, "bigtableService");
-    }
-
-    public String getTableId() {
-      return tableId;
-    }
-
-    public BigtableService getBigtableService() {
-      return bigtableService;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(Sink.class)
-          .add("bigtableService", bigtableService)
-          .add("tableId", tableId)
-          .toString();
-    }
-
-    ///////////////////////////////////////////////////////////////////////////////
-    private final String tableId;
-    private final BigtableService bigtableService;
-
-    @Override
-    public WriteOperation<KV<ByteString, Iterable<Mutation>>, Long> createWriteOperation(
-        PipelineOptions options) {
-      return new BigtableWriteOperation(this);
-    }
-
-    /** Does nothing, as it is redundant with {@link Write#validate}. */
-    @Override
-    public void validate(PipelineOptions options) {}
-  }
-
-  private static class BigtableWriteOperation
-      extends WriteOperation<KV<ByteString, Iterable<Mutation>>, Long> {
-    private final Sink sink;
-
-    public BigtableWriteOperation(Sink sink) {
-      this.sink = sink;
-    }
-
-    @Override
-    public Writer<KV<ByteString, Iterable<Mutation>>, Long> createWriter(PipelineOptions options)
-        throws Exception {
-      return new BigtableWriter(this);
-    }
-
-    @Override
-    public void initialize(PipelineOptions options) {}
-
-    @Override
-    public void finalize(Iterable<Long> writerResults, PipelineOptions options) {
-      long count = 0;
-      for (Long value : writerResults) {
-        value += count;
-      }
-      logger.debug("Wrote {} elements to BigtableIO.Sink {}", sink);
-    }
-
-    @Override
-    public Sink getSink() {
-      return sink;
-    }
-
-    @Override
-    public Coder<Long> getWriterResultCoder() {
-      return VarLongCoder.of();
-    }
-  }
-
-  private static class BigtableWriter extends Writer<KV<ByteString, Iterable<Mutation>>, Long> {
-    private final BigtableWriteOperation writeOperation;
-    private final Sink sink;
-    private BigtableService.Writer bigtableWriter;
-    private long recordsWritten;
-    private final ConcurrentLinkedQueue<BigtableWriteException> failures;
-
-    public BigtableWriter(BigtableWriteOperation writeOperation) {
-      this.writeOperation = writeOperation;
-      this.sink = writeOperation.getSink();
-      this.failures = new ConcurrentLinkedQueue<>();
-    }
-
-    @Override
-    public void open(String uId) throws Exception {
-      bigtableWriter = sink.getBigtableService().openForWriting(sink.getTableId());
-      recordsWritten = 0;
-    }
-
-    /**
-     * If any write has asynchronously failed, fail the bundle with a useful error.
-     */
-    private void checkForFailures() throws IOException {
-      // Note that this function is never called by multiple threads and is the only place that
-      // we remove from failures, so this code is safe.
-      if (failures.isEmpty()) {
-        return;
-      }
-
-      StringBuilder logEntry = new StringBuilder();
-      int i = 0;
-      for (; i < 10 && !failures.isEmpty(); ++i) {
-        BigtableWriteException exc = failures.remove();
-        logEntry.append("\n").append(exc.getMessage());
-        if (exc.getCause() != null) {
-          logEntry.append(": ").append(exc.getCause().getMessage());
-        }
-      }
-      String message =
-          String.format(
-              "At least %d errors occurred writing to Bigtable. First %d errors: %s",
-              i + failures.size(),
-              i,
-              logEntry.toString());
-      logger.error(message);
-      throw new IOException(message);
-    }
-
-    @Override
-    public void write(KV<ByteString, Iterable<Mutation>> rowMutations) throws Exception {
-      checkForFailures();
-      Futures.addCallback(
-          bigtableWriter.writeRecord(rowMutations), new WriteExceptionCallback(rowMutations));
-      ++recordsWritten;
-    }
-
-    @Override
-    public Long close() throws Exception {
-      bigtableWriter.close();
-      bigtableWriter = null;
-      checkForFailures();
-      logger.info("Wrote {} records", recordsWritten);
-      return recordsWritten;
-    }
-
-    @Override
-    public WriteOperation<KV<ByteString, Iterable<Mutation>>, Long> getWriteOperation() {
-      return writeOperation;
-    }
-
-    private class WriteExceptionCallback implements FutureCallback<Empty> {
-      private final KV<ByteString, Iterable<Mutation>> value;
-
-      public WriteExceptionCallback(KV<ByteString, Iterable<Mutation>> value) {
-        this.value = value;
-      }
-
-      @Override
-      public void onFailure(Throwable cause) {
-        failures.add(new BigtableWriteException(value, cause));
-      }
-
-      @Override
-      public void onSuccess(Empty produced) {}
-    }
-  }
-
-  /**
-   * An exception that puts information about the failed record being written in its message.
-   */
-  static class BigtableWriteException extends IOException {
-    public BigtableWriteException(KV<ByteString, Iterable<Mutation>> record, Throwable cause) {
-      super(
-          String.format(
-              "Error mutating row %s with mutations %s",
-              record.getKey().toStringUtf8(),
-              record.getValue()),
-          cause);
-    }
-  }
-
-  /**
-   * A helper function to produce a Cloud Bigtable user agent string.
-   */
-  private static String getUserAgent() {
-    String javaVersion = System.getProperty("java.specification.version");
-    DataflowReleaseInfo info = DataflowReleaseInfo.getReleaseInfo();
-    return String.format(
-        "%s/%s (%s); %s",
-        info.getName(),
-        info.getVersion(),
-        javaVersion,
-        "0.2.3" /* TODO get Bigtable client version directly from jar. */);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
deleted file mode 100644
index 85d706c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/io/bigtable/BigtableService.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.io.bigtable;
-
-import com.google.bigtable.v1.Mutation;
-import com.google.bigtable.v1.Row;
-import com.google.bigtable.v1.SampleRowKeysResponse;
-import com.google.cloud.dataflow.sdk.io.bigtable.BigtableIO.BigtableSource;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.common.util.concurrent.ListenableFuture;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.Empty;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * An interface for real or fake implementations of Cloud Bigtable.
- */
-interface BigtableService extends Serializable {
-
-  /**
-   * The interface of a class that can write to Cloud Bigtable.
-   */
-  interface Writer {
-    /**
-     * Writes a single row transaction to Cloud Bigtable. The key of the {@code record} is the
-     * row key to be mutated and the iterable of mutations represent the changes to be made to the
-     * row.
-     *
-     * @throws IOException if there is an error submitting the write.
-     */
-    ListenableFuture<Empty> writeRecord(KV<ByteString, Iterable<Mutation>> record)
-        throws IOException;
-
-    /**
-     * Closes the writer.
-     *
-     * @throws IOException if any writes did not succeed
-     */
-    void close() throws IOException;
-  }
-
-  /**
-   * The interface of a class that reads from Cloud Bigtable.
-   */
-  interface Reader {
-    /**
-     * Reads the first element (including initialization, such as opening a network connection) and
-     * returns true if an element was found.
-     */
-    boolean start() throws IOException;
-
-    /**
-     * Attempts to read the next element, and returns true if an element has been read.
-     */
-    boolean advance() throws IOException;
-
-    /**
-     * Closes the reader.
-     *
-     * @throws IOException if there is an error.
-     */
-    void close() throws IOException;
-
-    /**
-     * Returns the last row read by a successful start() or advance(), or throws if there is no
-     * current row because the last such call was unsuccessful.
-     */
-    Row getCurrentRow() throws NoSuchElementException;
-  }
-
-  /**
-   * Returns {@code true} if the table with the give name exists.
-   */
-  boolean tableExists(String tableId) throws IOException;
-
-  /**
-   * Returns a {@link Reader} that will read from the specified source.
-   */
-  Reader createReader(BigtableSource source) throws IOException;
-
-  /**
-   * Returns a {@link Writer} that will write to the specified table.
-   */
-  Writer openForWriting(String tableId) throws IOException;
-
-  /**
-   * Returns a set of row keys sampled from the underlying table. These contain information about
-   * the distribution of keys within the table.
-   */
-  List<SampleRowKeysResponse> getSampleRowKeys(BigtableSource source) throws IOException;
-}

[57/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
new file mode 100644
index 0000000..745c5d6
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
+import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TupleTag;
+
+/**
+ * This example shows how to do a join on two collections.
+ * It uses a sample of the GDELT 'world event' data (http://goo.gl/OB6oin), joining the event
+ * 'action' country code against a table that maps country codes to country names.
+ *
+ * <p>Concepts: Join operation; multiple input sources.
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and a local output file or output prefix on GCS:
+ * <pre>{@code
+ *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and an output prefix on GCS:
+ * <pre>{@code
+ *   --output=gs://YOUR_OUTPUT_PREFIX
+ * }</pre>
+ */
+public class JoinExamples {
+
+  // A 1000-row sample of the GDELT data here: gdelt-bq:full.events.
+  private static final String GDELT_EVENTS_TABLE =
+      "clouddataflow-readonly:samples.gdelt_sample";
+  // A table that maps country codes to country names.
+  private static final String COUNTRY_CODES =
+      "gdelt-bq:full.crosswalk_geocountrycodetohuman";
+
+  /**
+   * Join two collections, using country code as the key.
+   */
+  static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
+      PCollection<TableRow> countryCodes) throws Exception {
+
+    final TupleTag<String> eventInfoTag = new TupleTag<String>();
+    final TupleTag<String> countryInfoTag = new TupleTag<String>();
+
+    // transform both input collections to tuple collections, where the keys are country
+    // codes in both cases.
+    PCollection<KV<String, String>> eventInfo = eventsTable.apply(
+        ParDo.of(new ExtractEventDataFn()));
+    PCollection<KV<String, String>> countryInfo = countryCodes.apply(
+        ParDo.of(new ExtractCountryInfoFn()));
+
+    // country code 'key' -> CGBKR (<event info>, <country name>)
+    PCollection<KV<String, CoGbkResult>> kvpCollection = KeyedPCollectionTuple
+        .of(eventInfoTag, eventInfo)
+        .and(countryInfoTag, countryInfo)
+        .apply(CoGroupByKey.<String>create());
+
+    // Process the CoGbkResult elements generated by the CoGroupByKey transform.
+    // country code 'key' -> string of <event info>, <country name>
+    PCollection<KV<String, String>> finalResultCollection =
+      kvpCollection.apply(ParDo.named("Process").of(
+        new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            KV<String, CoGbkResult> e = c.element();
+            String countryCode = e.getKey();
+            String countryName = "none";
+            countryName = e.getValue().getOnly(countryInfoTag);
+            for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
+              // Generate a string that combines information from both collection values
+              c.output(KV.of(countryCode, "Country name: " + countryName
+                      + ", Event info: " + eventInfo));
+            }
+          }
+      }));
+
+    // write to GCS
+    PCollection<String> formattedResults = finalResultCollection
+        .apply(ParDo.named("Format").of(new DoFn<KV<String, String>, String>() {
+          @Override
+          public void processElement(ProcessContext c) {
+            String outputstring = "Country code: " + c.element().getKey()
+                + ", " + c.element().getValue();
+            c.output(outputstring);
+          }
+        }));
+    return formattedResults;
+  }
+
+  /**
+   * Examines each row (event) in the input table. Output a KV with the key the country
+   * code of the event, and the value a string encoding event information.
+   */
+  static class ExtractEventDataFn extends DoFn<TableRow, KV<String, String>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      String countryCode = (String) row.get("ActionGeo_CountryCode");
+      String sqlDate = (String) row.get("SQLDATE");
+      String actor1Name = (String) row.get("Actor1Name");
+      String sourceUrl = (String) row.get("SOURCEURL");
+      String eventInfo = "Date: " + sqlDate + ", Actor1: " + actor1Name + ", url: " + sourceUrl;
+      c.output(KV.of(countryCode, eventInfo));
+    }
+  }
+
+
+  /**
+   * Examines each row (country info) in the input table. Output a KV with the key the country
+   * code, and the value the country name.
+   */
+  static class ExtractCountryInfoFn extends DoFn<TableRow, KV<String, String>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      String countryCode = (String) row.get("FIPSCC");
+      String countryName = (String) row.get("HumanName");
+      c.output(KV.of(countryCode, countryName));
+    }
+  }
+
+
+  /**
+   * Options supported by {@link JoinExamples}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Path of the file to write to")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args) throws Exception {
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+    // the following two 'applys' create multiple inputs to our pipeline, one for each
+    // of our two input sources.
+    PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE));
+    PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES));
+    PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
+    formattedResults.apply(TextIO.Write.to(options.getOutput()));
+    p.run();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
new file mode 100644
index 0000000..1c26d0f
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Max;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An example that reads the public samples of weather data from BigQuery, and finds
+ * the maximum temperature ('mean_temp') for each month.
+ *
+ * <p>Concepts: The 'Max' statistical combination function, and how to find the max per
+ * key group.
+ *
+ * <p>Note: Before running this example, you must create a BigQuery dataset to contain your output
+ * table.
+ *
+ * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ * }
+ * </pre>
+ * and the BigQuery table for the output, with the form
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
+ *
+ * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <pre>{@code
+ *   --project=YOUR_PROJECT_ID
+ *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
+ *   --runner=BlockingDataflowPipelineRunner
+ * }
+ * </pre>
+ * and the BigQuery table for the output:
+ * <pre>{@code
+ *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ * }</pre>
+ *
+ * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations }
+ * and can be overridden with {@code --input}.
+ */
+public class MaxPerKeyExamples {
+  // Default to using a 1000 row subset of the public weather station table publicdata:samples.gsod.
+  private static final String WEATHER_SAMPLES_TABLE =
+      "clouddataflow-readonly:samples.weather_stations";
+
+  /**
+   * Examines each row (weather reading) in the input table. Output the month of the reading,
+   * and the mean_temp.
+   */
+  static class ExtractTempFn extends DoFn<TableRow, KV<Integer, Double>> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = c.element();
+      Integer month = Integer.parseInt((String) row.get("month"));
+      Double meanTemp = Double.parseDouble(row.get("mean_temp").toString());
+      c.output(KV.of(month, meanTemp));
+    }
+  }
+
+  /**
+   * Format the results to a TableRow, to save to BigQuery.
+   *
+   */
+  static class FormatMaxesFn extends DoFn<KV<Integer, Double>, TableRow> {
+    @Override
+    public void processElement(ProcessContext c) {
+      TableRow row = new TableRow()
+          .set("month", c.element().getKey())
+          .set("max_mean_temp", c.element().getValue());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Reads rows from a weather data table, and finds the max mean_temp for each
+   * month via the 'Max' statistical combination function.
+   */
+  static class MaxMeanTemp
+      extends PTransform<PCollection<TableRow>, PCollection<TableRow>> {
+    @Override
+    public PCollection<TableRow> apply(PCollection<TableRow> rows) {
+
+      // row... => <month, mean_temp> ...
+      PCollection<KV<Integer, Double>> temps = rows.apply(
+          ParDo.of(new ExtractTempFn()));
+
+      // month, mean_temp... => <month, max mean temp>...
+      PCollection<KV<Integer, Double>> tempMaxes =
+          temps.apply(Max.<Integer>doublesPerKey());
+
+      // <month, max>... => row...
+      PCollection<TableRow> results = tempMaxes.apply(
+          ParDo.of(new FormatMaxesFn()));
+
+      return results;
+    }
+  }
+
+  /**
+   * Options supported by {@link MaxPerKeyExamples}.
+   *
+   * <p>Inherits standard configuration options.
+   */
+  private static interface Options extends PipelineOptions {
+    @Description("Table to read from, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Default.String(WEATHER_SAMPLES_TABLE)
+    String getInput();
+    void setInput(String value);
+
+    @Description("Table to write to, specified as "
+        + "<project_id>:<dataset_id>.<table_id>")
+    @Validation.Required
+    String getOutput();
+    void setOutput(String value);
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+
+    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
+    Pipeline p = Pipeline.create(options);
+
+    // Build the table schema for the output table.
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("month").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("max_mean_temp").setType("FLOAT"));
+    TableSchema schema = new TableSchema().setFields(fields);
+
+    p.apply(BigQueryIO.Read.from(options.getInput()))
+     .apply(new MaxMeanTemp())
+     .apply(BigQueryIO.Write
+        .to(options.getOutput())
+        .withSchema(schema)
+        .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
+        .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
new file mode 100644
index 0000000..99f3080
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/README.md
@@ -0,0 +1,55 @@
+
+# "Cookbook" Examples
+
+This directory holds simple "cookbook" examples, which show how to define
+commonly-used data analysis patterns that you would likely incorporate into a
+larger Dataflow pipeline. They include:
+
+ <ul>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoes.java">BigQueryTornadoes</a>
+  &mdash; An example that reads the public samples of weather data from Google
+  BigQuery, counts the number of tornadoes that occur in each month, and
+  writes the results to BigQuery. Demonstrates reading/writing BigQuery,
+  counting a <code>PCollection</code>, and user-defined <code>PTransforms</code>.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamples.java">CombinePerKeyExamples</a>
+  &mdash; An example that reads the public &quot;Shakespeare&quot; data, and for
+  each word in the dataset that exceeds a given length, generates a string
+  containing the list of play names in which that word appears.
+  Demonstrates the <code>Combine.perKey</code>
+  transform, which lets you combine the values in a key-grouped
+  <code>PCollection</code>.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DatastoreWordCount.java">DatastoreWordCount</a>
+  &mdash; An example that shows you how to read from Google Cloud Datastore.</li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/DeDupExample.java">DeDupExample</a>
+  &mdash; An example that uses Shakespeare's plays as plain text files, and
+  removes duplicate lines across all the files. Demonstrates the
+  <code>RemoveDuplicates</code>, <code>TextIO.Read</code>,
+  and <code>TextIO.Write</code> transforms, and how to wire transforms together.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/FilterExamples.java">FilterExamples</a>
+  &mdash; An example that shows different approaches to filtering, including
+  selection and projection. It also shows how to dynamically set parameters
+  by defining and using new pipeline options, and use how to use a value derived
+  by a pipeline. Demonstrates the <code>Mean</code> transform,
+  <code>Options</code> configuration, and using pipeline-derived data as a side
+  input.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/JoinExamples.java">JoinExamples</a>
+  &mdash; An example that shows how to join two collections. It uses a
+  sample of the <a href="http://goo.gl/OB6oin">GDELT &quot;world event&quot;
+  data</a>, joining the event <code>action</code> country code against a table
+  that maps country codes to country names. Demonstrates the <code>Join</code>
+  operation, and using multiple input sources.
+  </li>
+  <li><a href="https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/examples/src/main/java/com/google/cloud/dataflow/examples/cookbook/MaxPerKeyExamples.java">MaxPerKeyExamples</a>
+  &mdash; An example that reads the public samples of weather data from BigQuery,
+  and finds the maximum temperature (<code>mean_temp</code>) for each month.
+  Demonstrates the <code>Max</code> statistical combination transform, and how to
+  find the max-per-key group.
+  </li>
+  </ul>
+
+See the [documentation](https://cloud.google.com/dataflow/getting-started) and the [Examples
+README](../../../../../../../../../README.md) for
+information about how to run these examples.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
new file mode 100644
index 0000000..ce5e08e
--- /dev/null
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/cookbook/TriggerExample.java
@@ -0,0 +1,564 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableFieldSchema;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.cloud.dataflow.examples.common.DataflowExampleOptions;
+import com.google.cloud.dataflow.examples.common.DataflowExampleUtils;
+import com.google.cloud.dataflow.examples.common.ExampleBigQueryTableOptions;
+import com.google.cloud.dataflow.examples.common.ExamplePubsubTopicOptions;
+import com.google.cloud.dataflow.examples.common.PubsubFileInjector;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.PipelineResult;
+import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+import com.google.cloud.dataflow.sdk.io.PubsubIO;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
+import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterEach;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterProcessingTime;
+import com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark;
+import com.google.cloud.dataflow.sdk.transforms.windowing.FixedWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Repeatedly;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.PCollectionList;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * This example illustrates the basic concepts behind triggering. It shows how to use different
+ * trigger definitions to produce partial (speculative) results before all the data is processed and
+ * to control when updated results are produced for late data. The example performs a streaming
+ * analysis of the data coming in from PubSub and writes the results to BigQuery. It divides the
+ * data into {@link Window windows} to be processed, and demonstrates using various kinds of {@link
+ * Trigger triggers} to control when the results for each window are emitted.
+ *
+ * <p> This example uses a portion of real traffic data from San Diego freeways. It contains
+ * readings from sensor stations set up along each freeway. Each sensor reading includes a
+ * calculation of the 'total flow' across all lanes in that freeway direction.
+ *
+ * <p> Concepts:
+ * <pre>
+ *   1. The default triggering behavior
+ *   2. Late data with the default trigger
+ *   3. How to get speculative estimates
+ *   4. Combining late data and speculative estimates
+ * </pre>
+ *
+ * <p> Before running this example, it will be useful to familiarize yourself with Dataflow triggers
+ * and understand the concept of 'late data',
+ * See:  <a href="https://cloud.google.com/dataflow/model/triggers">
+ * https://cloud.google.com/dataflow/model/triggers </a> and
+ * <a href="https://cloud.google.com/dataflow/model/windowing#Advanced">
+ * https://cloud.google.com/dataflow/model/windowing#Advanced </a>
+ *
+ * <p> The example pipeline reads data from a Pub/Sub topic. By default, running the example will
+ * also run an auxiliary pipeline to inject data from the default {@code --input} file to the
+ * {@code --pubsubTopic}. The auxiliary pipeline puts a timestamp on the injected data so that the
+ * example pipeline can operate on <i>event time</i> (rather than arrival time). The auxiliary
+ * pipeline also randomly simulates late data, by setting the timestamps of some of the data
+ * elements to be in the past. You may override the default {@code --input} with the file of your
+ * choosing or set {@code --input=""} which will disable the automatic Pub/Sub injection, and allow
+ * you to use a separate tool to publish to the given topic.
+ *
+ * <p> The example is configured to use the default Pub/Sub topic and the default BigQuery table
+ * from the example common package (there are no defaults for a general Dataflow pipeline).
+ * You can override them by using the {@code --pubsubTopic}, {@code --bigQueryDataset}, and
+ * {@code --bigQueryTable} options. If the Pub/Sub topic or the BigQuery table do not exist,
+ * the example will try to create them.
+ *
+ * <p> The pipeline outputs its results to a BigQuery table.
+ * Here are some queries you can use to see interesting results:
+ * Replace {@code <enter_table_name>} in the query below with the name of the BigQuery table.
+ * Replace {@code <enter_window_interval>} in the query below with the window interval.
+ *
+ * <p> To see the results of the default trigger,
+ * Note: When you start up your pipeline, you'll initially see results from 'late' data. Wait after
+ * the window duration, until the first pane of non-late data has been emitted, to see more
+ * interesting results.
+ * {@code SELECT * FROM enter_table_name WHERE trigger_type = "default" ORDER BY window DESC}
+ *
+ * <p> To see the late data i.e. dropped by the default trigger,
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "withAllowedLateness" and
+ * (timing = "LATE" or timing = "ON_TIME") and freeway = "5" ORDER BY window DESC, processing_time}
+ *
+ * <p>To see the the difference between accumulation mode and discarding mode,
+ * {@code SELECT * FROM <enter_table_name> WHERE (timing = "LATE" or timing = "ON_TIME") AND
+ * (trigger_type = "withAllowedLateness" or trigger_type = "sequential") and freeway = "5" ORDER BY
+ * window DESC, processing_time}
+ *
+ * <p> To see speculative results every minute,
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "speculative" and freeway = "5"
+ * ORDER BY window DESC, processing_time}
+ *
+ * <p> To see speculative results every five minutes after the end of the window
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "sequential" and timing != "EARLY"
+ * and freeway = "5" ORDER BY window DESC, processing_time}
+ *
+ * <p> To see the first and the last pane for a freeway in a window for all the trigger types,
+ * {@code SELECT * FROM <enter_table_name> WHERE (isFirst = true or isLast = true) ORDER BY window}
+ *
+ * <p> To reduce the number of results for each query we can add additional where clauses.
+ * For examples, To see the results of the default trigger,
+ * {@code SELECT * FROM <enter_table_name> WHERE trigger_type = "default" AND freeway = "5" AND
+ * window = "<enter_window_interval>"}
+ *
+ * <p> The example will try to cancel the pipelines on the signal to terminate the process (CTRL-C)
+ * and then exits.
+ */
+
+public class TriggerExample {
+  //Numeric value of fixed window duration, in minutes
+  public static final int WINDOW_DURATION = 30;
+  // Constants used in triggers.
+  // Speeding up ONE_MINUTE or FIVE_MINUTES helps you get an early approximation of results.
+  // ONE_MINUTE is used only with processing time before the end of the window
+  public static final Duration ONE_MINUTE = Duration.standardMinutes(1);
+  // FIVE_MINUTES is used only with processing time after the end of the window
+  public static final Duration FIVE_MINUTES = Duration.standardMinutes(5);
+  // ONE_DAY is used to specify the amount of lateness allowed for the data elements.
+  public static final Duration ONE_DAY = Duration.standardDays(1);
+
+  /**
+   * This transform demonstrates using triggers to control when data is produced for each window
+   * Consider an example to understand the results generated by each type of trigger.
+   * The example uses "freeway" as the key. Event time is the timestamp associated with the data
+   * element and processing time is the time when the data element gets processed in the pipeline.
+   * For freeway 5, suppose there are 10 elements in the [10:00:00, 10:30:00) window.
+   * Key (freeway) | Value (total_flow) | event time | processing time
+   * 5             | 50                 | 10:00:03   | 10:00:47
+   * 5             | 30                 | 10:01:00   | 10:01:03
+   * 5             | 30                 | 10:02:00   | 11:07:00
+   * 5             | 20                 | 10:04:10   | 10:05:15
+   * 5             | 60                 | 10:05:00   | 11:03:00
+   * 5             | 20                 | 10:05:01   | 11.07:30
+   * 5             | 60                 | 10:15:00   | 10:27:15
+   * 5             | 40                 | 10:26:40   | 10:26:43
+   * 5             | 60                 | 10:27:20   | 10:27:25
+   * 5             | 60                 | 10:29:00   | 11:11:00
+   *
+   * <p> Dataflow tracks a watermark which records up to what point in event time the data is
+   * complete. For the purposes of the example, we'll assume the watermark is approximately 15m
+   * behind the current processing time. In practice, the actual value would vary over time based
+   * on the systems knowledge of the current PubSub delay and contents of the backlog (data
+   * that has not yet been processed).
+   *
+   * <p> If the watermark is 15m behind, then the window [10:00:00, 10:30:00) (in event time) would
+   * close at 10:44:59, when the watermark passes 10:30:00.
+   */
+  static class CalculateTotalFlow
+  extends PTransform <PCollection<KV<String, Integer>>, PCollectionList<TableRow>> {
+    private int windowDuration;
+
+    CalculateTotalFlow(int windowDuration) {
+      this.windowDuration = windowDuration;
+    }
+
+    @Override
+    public PCollectionList<TableRow> apply(PCollection<KV<String, Integer>> flowInfo) {
+
+      // Concept #1: The default triggering behavior
+      // By default Dataflow uses a trigger which fires when the watermark has passed the end of the
+      // window. This would be written {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
+
+      // The system also defaults to dropping late data -- data which arrives after the watermark
+      // has passed the event timestamp of the arriving element. This means that the default trigger
+      // will only fire once.
+
+      // Each pane produced by the default trigger with no allowed lateness will be the first and
+      // last pane in the window, and will be ON_TIME.
+
+      // The results for the example above with the default trigger and zero allowed lateness
+      // would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 260                | 6                 | true    | true   | ON_TIME
+
+      // At 11:03:00 (processing time) the system watermark may have advanced to 10:54:00. As a
+      // result, when the data record with event time 10:05:00 arrives at 11:03:00, it is considered
+      // late, and dropped.
+
+      PCollection<TableRow> defaultTriggerResults = flowInfo
+          .apply("Default", Window
+              // The default window duration values work well if you're running the default input
+              // file. You may want to adjust the window duration otherwise.
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              // The default trigger first emits output when the system's watermark passes the end
+              // of the window.
+              .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
+              // Late data is dropped
+              .withAllowedLateness(Duration.ZERO)
+              // Discard elements after emitting each pane.
+              // With no allowed lateness and the specified trigger there will only be a single
+              // pane, so this doesn't have a noticeable effect. See concept 2 for more details.
+              .discardingFiredPanes())
+          .apply(new TotalFlow("default"));
+
+      // Concept #2: Late data with the default trigger
+      // This uses the same trigger as concept #1, but allows data that is up to ONE_DAY late. This
+      // leads to each window staying open for ONE_DAY after the watermark has passed the end of the
+      // window. Any late data will result in an additional pane being fired for that same window.
+
+      // The first pane produced will be ON_TIME and the remaining panes will be LATE.
+      // To definitely get the last pane when the window closes, use
+      // .withAllowedLateness(ONE_DAY, ClosingBehavior.FIRE_ALWAYS).
+
+      // The results for the example above with the default trigger and ONE_DAY allowed lateness
+      // would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 260                | 6                 | true    | false  | ON_TIME
+      // 5             | 60                 | 1                 | false   | false  | LATE
+      // 5             | 30                 | 1                 | false   | false  | LATE
+      // 5             | 20                 | 1                 | false   | false  | LATE
+      // 5             | 60                 | 1                 | false   | false  | LATE
+      PCollection<TableRow> withAllowedLatenessResults = flowInfo
+          .apply("WithLateData", Window
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              // Late data is emitted as it arrives
+              .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow()))
+              // Once the output is produced, the pane is dropped and we start preparing the next
+              // pane for the window
+              .discardingFiredPanes()
+              // Late data is handled up to one day
+              .withAllowedLateness(ONE_DAY))
+          .apply(new TotalFlow("withAllowedLateness"));
+
+      // Concept #3: How to get speculative estimates
+      // We can specify a trigger that fires independent of the watermark, for instance after
+      // ONE_MINUTE of processing time. This allows us to produce speculative estimates before
+      // all the data is available. Since we don't have any triggers that depend on the watermark
+      // we don't get an ON_TIME firing. Instead, all panes are either EARLY or LATE.
+
+      // We also use accumulatingFiredPanes to build up the results across each pane firing.
+
+      // The results for the example above for this trigger would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 80                 | 2                 | true    | false  | EARLY
+      // 5             | 100                | 3                 | false   | false  | EARLY
+      // 5             | 260                | 6                 | false   | false  | EARLY
+      // 5             | 320                | 7                 | false   | false  | LATE
+      // 5             | 370                | 9                 | false   | false  | LATE
+      // 5             | 430                | 10                | false   | false  | LATE
+      PCollection<TableRow> speculativeResults = flowInfo
+          .apply("Speculative" , Window
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              // Trigger fires every minute.
+              .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                  // Speculative every ONE_MINUTE
+                  .plusDelayOf(ONE_MINUTE)))
+              // After emitting each pane, it will continue accumulating the elements so that each
+              // approximation includes all of the previous data in addition to the newly arrived
+              // data.
+              .accumulatingFiredPanes()
+              .withAllowedLateness(ONE_DAY))
+          .apply(new TotalFlow("speculative"));
+
+      // Concept #4: Combining late data and speculative estimates
+      // We can put the previous concepts together to get EARLY estimates, an ON_TIME result,
+      // and LATE updates based on late data.
+
+      // Each time a triggering condition is satisfied it advances to the next trigger.
+      // If there are new elements this trigger emits a window under following condition:
+      // > Early approximations every minute till the end of the window.
+      // > An on-time firing when the watermark has passed the end of the window
+      // > Every five minutes of late data.
+
+      // Every pane produced will either be EARLY, ON_TIME or LATE.
+
+      // The results for the example above for this trigger would be:
+      // Key (freeway) | Value (total_flow) | number_of_records | isFirst | isLast | timing
+      // 5             | 80                 | 2                 | true    | false  | EARLY
+      // 5             | 100                | 3                 | false   | false  | EARLY
+      // 5             | 260                | 6                 | false   | false  | EARLY
+      // [First pane fired after the end of the window]
+      // 5             | 320                | 7                 | false   | false  | ON_TIME
+      // 5             | 430                | 10                | false   | false  | LATE
+
+      // For more possibilities of how to build advanced triggers, see {@link Trigger}.
+      PCollection<TableRow> sequentialResults = flowInfo
+          .apply("Sequential", Window
+              .<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(windowDuration)))
+              .triggering(AfterEach.inOrder(
+                  Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                      // Speculative every ONE_MINUTE
+                      .plusDelayOf(ONE_MINUTE)).orFinally(AfterWatermark.pastEndOfWindow()),
+                  Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()
+                      // Late data every FIVE_MINUTES
+                      .plusDelayOf(FIVE_MINUTES))))
+              .accumulatingFiredPanes()
+              // For up to ONE_DAY
+              .withAllowedLateness(ONE_DAY))
+          .apply(new TotalFlow("sequential"));
+
+      // Adds the results generated by each trigger type to a PCollectionList.
+      PCollectionList<TableRow> resultsList = PCollectionList.of(defaultTriggerResults)
+          .and(withAllowedLatenessResults)
+          .and(speculativeResults)
+          .and(sequentialResults);
+
+      return resultsList;
+    }
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // The remaining parts of the pipeline are needed to produce the output for each
+  // concept above. Not directly relevant to understanding the trigger examples.
+
+  /**
+   * Calculate total flow and number of records for each freeway and format the results to TableRow
+   * objects, to save to BigQuery.
+   */
+  static class TotalFlow extends
+  PTransform <PCollection<KV<String, Integer>>, PCollection<TableRow>> {
+    private String triggerType;
+
+    public TotalFlow(String triggerType) {
+      this.triggerType = triggerType;
+    }
+
+    @Override
+    public PCollection<TableRow> apply(PCollection<KV<String, Integer>> flowInfo) {
+      PCollection<KV<String, Iterable<Integer>>> flowPerFreeway = flowInfo
+          .apply(GroupByKey.<String, Integer>create());
+
+      PCollection<KV<String, String>> results = flowPerFreeway.apply(ParDo.of(
+          new DoFn <KV<String, Iterable<Integer>>, KV<String, String>>() {
+
+            @Override
+            public void processElement(ProcessContext c) throws Exception {
+              Iterable<Integer> flows = c.element().getValue();
+              Integer sum = 0;
+              Long numberOfRecords = 0L;
+              for (Integer value : flows) {
+                sum += value;
+                numberOfRecords++;
+              }
+              c.output(KV.of(c.element().getKey(), sum + "," + numberOfRecords));
+            }
+          }));
+      PCollection<TableRow> output = results.apply(ParDo.of(new FormatTotalFlow(triggerType)));
+      return output;
+    }
+  }
+
+  /**
+   * Format the results of the Total flow calculation to a TableRow, to save to BigQuery.
+   * Adds the triggerType, pane information, processing time and the window timestamp.
+   * */
+  static class FormatTotalFlow extends DoFn<KV<String, String>, TableRow>
+  implements  RequiresWindowAccess {
+    private String triggerType;
+
+    public FormatTotalFlow(String triggerType) {
+      this.triggerType = triggerType;
+    }
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      String[] values = c.element().getValue().split(",");
+      TableRow row = new TableRow()
+          .set("trigger_type", triggerType)
+          .set("freeway", c.element().getKey())
+          .set("total_flow", Integer.parseInt(values[0]))
+          .set("number_of_records", Long.parseLong(values[1]))
+          .set("window", c.window().toString())
+          .set("isFirst", c.pane().isFirst())
+          .set("isLast", c.pane().isLast())
+          .set("timing", c.pane().getTiming().toString())
+          .set("event_time", c.timestamp().toString())
+          .set("processing_time", Instant.now().toString());
+      c.output(row);
+    }
+  }
+
+  /**
+   * Extract the freeway and total flow in a reading.
+   * Freeway is used as key since we are calculating the total flow for each freeway.
+   */
+  static class ExtractFlowInfo extends DoFn<String, KV<String, Integer>> {
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      String[] laneInfo = c.element().split(",");
+      if (laneInfo[0].equals("timestamp")) {
+        // Header row
+        return;
+      }
+      if (laneInfo.length < 48) {
+        //Skip the invalid input.
+        return;
+      }
+      String freeway = laneInfo[2];
+      Integer totalFlow = tryIntegerParse(laneInfo[7]);
+      // Ignore the records with total flow 0 to easily understand the working of triggers.
+      // Skip the records with total flow -1 since they are invalid input.
+      if (totalFlow == null || totalFlow <= 0) {
+        return;
+      }
+      c.output(KV.of(freeway,  totalFlow));
+    }
+  }
+
+  /**
+   * Inherits standard configuration options.
+   */
+  public interface TrafficFlowOptions
+      extends ExamplePubsubTopicOptions, ExampleBigQueryTableOptions, DataflowExampleOptions {
+
+    @Description("Input file to inject to Pub/Sub topic")
+    @Default.String("gs://dataflow-samples/traffic_sensor/"
+        + "Freeways-5Minaa2010-01-01_to_2010-02-15.csv")
+    String getInput();
+    void setInput(String value);
+
+    @Description("Numeric value of window duration for fixed windows, in minutes")
+    @Default.Integer(WINDOW_DURATION)
+    Integer getWindowDuration();
+    void setWindowDuration(Integer value);
+  }
+
+  private static final String PUBSUB_TIMESTAMP_LABEL_KEY = "timestamp_ms";
+
+  public static void main(String[] args) throws Exception {
+    TrafficFlowOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(TrafficFlowOptions.class);
+    options.setStreaming(true);
+
+    // In order to cancel the pipelines automatically,
+    // {@code DataflowPipelineRunner} is forced to be used.
+    options.setRunner(DataflowPipelineRunner.class);
+    options.setBigQuerySchema(getSchema());
+
+    DataflowExampleUtils dataflowUtils = new DataflowExampleUtils(options);
+    dataflowUtils.setup();
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    TableReference tableRef = getTableReference(options.getProject(),
+        options.getBigQueryDataset(), options.getBigQueryTable());
+
+    PCollectionList<TableRow> resultList = pipeline.apply(PubsubIO.Read.named("ReadPubsubInput")
+        .timestampLabel(PUBSUB_TIMESTAMP_LABEL_KEY)
+        .topic(options.getPubsubTopic()))
+        .apply(ParDo.of(new ExtractFlowInfo()))
+        .apply(new CalculateTotalFlow(options.getWindowDuration()));
+
+    for (int i = 0; i < resultList.size(); i++){
+      resultList.get(i).apply(BigQueryIO.Write.to(tableRef).withSchema(getSchema()));
+    }
+
+    PipelineResult result = pipeline.run();
+    if (!options.getInput().isEmpty()){
+      //Inject the data into the pubsub topic
+      dataflowUtils.runInjectorPipeline(runInjector(options));
+    }
+    // dataflowUtils will try to cancel the pipeline and the injector before the program exits.
+    dataflowUtils.waitToFinish(result);
+  }
+
+  private static Pipeline runInjector(TrafficFlowOptions options){
+    DataflowPipelineOptions copiedOptions = options.cloneAs(DataflowPipelineOptions.class);
+    copiedOptions.setStreaming(false);
+    copiedOptions.setNumWorkers(options.as(DataflowExampleOptions.class).getInjectorNumWorkers());
+    copiedOptions.setJobName(options.getJobName() + "-injector");
+    Pipeline injectorPipeline = Pipeline.create(copiedOptions);
+    injectorPipeline
+    .apply(TextIO.Read.named("ReadMyFile").from(options.getInput()))
+    .apply(ParDo.named("InsertRandomDelays").of(new InsertDelays()))
+    .apply(IntraBundleParallelization.of(PubsubFileInjector
+        .withTimestampLabelKey(PUBSUB_TIMESTAMP_LABEL_KEY)
+        .publish(options.getPubsubTopic()))
+        .withMaxParallelism(20));
+
+    return injectorPipeline;
+  }
+
+  /**
+   * Add current time to each record.
+   * Also insert a delay at random to demo the triggers.
+   */
+  public static class InsertDelays extends DoFn<String, String> {
+    private static final double THRESHOLD = 0.001;
+    // MIN_DELAY and MAX_DELAY in minutes.
+    private static final int MIN_DELAY = 1;
+    private static final int MAX_DELAY = 100;
+
+    @Override
+    public void processElement(ProcessContext c) throws Exception {
+      Instant timestamp = Instant.now();
+      if (Math.random() < THRESHOLD){
+        int range = MAX_DELAY - MIN_DELAY;
+        int delayInMinutes = (int) (Math.random() * range) + MIN_DELAY;
+        long delayInMillis = TimeUnit.MINUTES.toMillis(delayInMinutes);
+        timestamp = new Instant(timestamp.getMillis() - delayInMillis);
+      }
+      c.outputWithTimestamp(c.element(), timestamp);
+    }
+  }
+
+
+  /**Sets the table reference. **/
+  private static TableReference getTableReference(String project, String dataset, String table){
+    TableReference tableRef = new TableReference();
+    tableRef.setProjectId(project);
+    tableRef.setDatasetId(dataset);
+    tableRef.setTableId(table);
+    return tableRef;
+  }
+
+  /** Defines the BigQuery schema used for the output. */
+  private static TableSchema getSchema() {
+    List<TableFieldSchema> fields = new ArrayList<>();
+    fields.add(new TableFieldSchema().setName("trigger_type").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("freeway").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("total_flow").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("number_of_records").setType("INTEGER"));
+    fields.add(new TableFieldSchema().setName("window").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("isFirst").setType("BOOLEAN"));
+    fields.add(new TableFieldSchema().setName("isLast").setType("BOOLEAN"));
+    fields.add(new TableFieldSchema().setName("timing").setType("STRING"));
+    fields.add(new TableFieldSchema().setName("event_time").setType("TIMESTAMP"));
+    fields.add(new TableFieldSchema().setName("processing_time").setType("TIMESTAMP"));
+    TableSchema schema = new TableSchema().setFields(fields);
+    return schema;
+  }
+
+  private static Integer tryIntegerParse(String number) {
+    try {
+      return Integer.parseInt(number);
+    } catch (NumberFormatException e) {
+      return null;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
new file mode 100644
index 0000000..77d7bc8
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/DebuggingWordCountTest.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.common.io.Files;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Tests for {@link DebuggingWordCount}.
+ */
+@RunWith(JUnit4.class)
+public class DebuggingWordCountTest {
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testDebuggingWordCount() throws Exception {
+    File file = tmpFolder.newFile();
+    Files.write("stomach secret Flourish message Flourish here Flourish", file,
+        StandardCharsets.UTF_8);
+    DebuggingWordCount.main(new String[]{"--inputFile=" + file.getAbsolutePath()});
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
new file mode 100644
index 0000000..4542c48
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/WordCountTest.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples;
+
+import com.google.cloud.dataflow.examples.WordCount.CountWords;
+import com.google.cloud.dataflow.examples.WordCount.ExtractWordsFn;
+import com.google.cloud.dataflow.examples.WordCount.FormatAsTextFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.MapElements;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of WordCount.
+ */
+@RunWith(JUnit4.class)
+public class WordCountTest {
+
+  /** Example test that tests a specific DoFn. */
+  @Test
+  public void testExtractWordsFn() {
+    DoFnTester<String, String> extractWordsFn =
+        DoFnTester.of(new ExtractWordsFn());
+
+    Assert.assertThat(extractWordsFn.processBatch(" some  input  words "),
+                      CoreMatchers.hasItems("some", "input", "words"));
+    Assert.assertThat(extractWordsFn.processBatch(" "),
+                      CoreMatchers.<String>hasItems());
+    Assert.assertThat(extractWordsFn.processBatch(" some ", " input", " words"),
+                      CoreMatchers.hasItems("some", "input", "words"));
+  }
+
+  static final String[] WORDS_ARRAY = new String[] {
+    "hi there", "hi", "hi sue bob",
+    "hi sue", "", "bob hi"};
+
+  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
+
+  static final String[] COUNTS_ARRAY = new String[] {
+      "hi: 5", "there: 1", "sue: 2", "bob: 2"};
+
+  /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCountWords() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<String> output = input.apply(new CountWords())
+      .apply(MapElements.via(new FormatAsTextFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
new file mode 100644
index 0000000..aec1557
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/AutoCompleteTest.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.cloud.dataflow.examples.complete.AutoComplete.CompletionCandidate;
+import com.google.cloud.dataflow.examples.complete.AutoComplete.ComputeTopCompletions;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.Filter;
+import com.google.cloud.dataflow.sdk.transforms.PTransform;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+import com.google.cloud.dataflow.sdk.transforms.windowing.SlidingWindows;
+import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Tests of AutoComplete.
+ */
+@RunWith(Parameterized.class)
+public class AutoCompleteTest implements Serializable {
+  private boolean recursive;
+
+  public AutoCompleteTest(Boolean recursive) {
+    this.recursive = recursive;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> testRecursive() {
+    return Arrays.asList(new Object[][] {
+        { true },
+        { false }
+      });
+  }
+
+  @Test
+  public void testAutoComplete() {
+    List<String> words = Arrays.asList(
+        "apple",
+        "apple",
+        "apricot",
+        "banana",
+        "blackberry",
+        "blackberry",
+        "blackberry",
+        "blueberry",
+        "blueberry",
+        "cherry");
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(words));
+
+    PCollection<KV<String, List<CompletionCandidate>>> output =
+      input.apply(new ComputeTopCompletions(2, recursive))
+           .apply(Filter.byPredicate(
+                        new SerializableFunction<KV<String, List<CompletionCandidate>>, Boolean>() {
+                          @Override
+                          public Boolean apply(KV<String, List<CompletionCandidate>> element) {
+                            return element.getKey().length() <= 2;
+                          }
+                      }));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("a", parseList("apple:2", "apricot:1")),
+        KV.of("ap", parseList("apple:2", "apricot:1")),
+        KV.of("b", parseList("blackberry:3", "blueberry:2")),
+        KV.of("ba", parseList("banana:1")),
+        KV.of("bl", parseList("blackberry:3", "blueberry:2")),
+        KV.of("c", parseList("cherry:1")),
+        KV.of("ch", parseList("cherry:1")));
+    p.run();
+  }
+
+  @Test
+  public void testTinyAutoComplete() {
+    List<String> words = Arrays.asList("x", "x", "x", "xy", "xy", "xyz");
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(words));
+
+    PCollection<KV<String, List<CompletionCandidate>>> output =
+      input.apply(new ComputeTopCompletions(2, recursive));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        KV.of("x", parseList("x:3", "xy:2")),
+        KV.of("xy", parseList("xy:2", "xyz:1")),
+        KV.of("xyz", parseList("xyz:1")));
+    p.run();
+  }
+
+  @Test
+  public void testWindowedAutoComplete() {
+    List<TimestampedValue<String>> words = Arrays.asList(
+        TimestampedValue.of("xA", new Instant(1)),
+        TimestampedValue.of("xA", new Instant(1)),
+        TimestampedValue.of("xB", new Instant(1)),
+        TimestampedValue.of("xB", new Instant(2)),
+        TimestampedValue.of("xB", new Instant(2)));
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p
+      .apply(Create.of(words))
+      .apply(new ReifyTimestamps<String>());
+
+    PCollection<KV<String, List<CompletionCandidate>>> output =
+      input.apply(Window.<String>into(SlidingWindows.of(new Duration(2))))
+           .apply(new ComputeTopCompletions(2, recursive));
+
+    DataflowAssert.that(output).containsInAnyOrder(
+        // Window [0, 2)
+        KV.of("x", parseList("xA:2", "xB:1")),
+        KV.of("xA", parseList("xA:2")),
+        KV.of("xB", parseList("xB:1")),
+
+        // Window [1, 3)
+        KV.of("x", parseList("xB:3", "xA:2")),
+        KV.of("xA", parseList("xA:2")),
+        KV.of("xB", parseList("xB:3")),
+
+        // Window [2, 3)
+        KV.of("x", parseList("xB:2")),
+        KV.of("xB", parseList("xB:2")));
+    p.run();
+  }
+
+  private static List<CompletionCandidate> parseList(String... entries) {
+    List<CompletionCandidate> all = new ArrayList<>();
+    for (String s : entries) {
+      String[] countValue = s.split(":");
+      all.add(new CompletionCandidate(countValue[0], Integer.valueOf(countValue[1])));
+    }
+    return all;
+  }
+
+  private static class ReifyTimestamps<T>
+      extends PTransform<PCollection<TimestampedValue<T>>, PCollection<T>> {
+    @Override
+    public PCollection<T> apply(PCollection<TimestampedValue<T>> input) {
+      return input.apply(ParDo.of(new DoFn<TimestampedValue<T>, T>() {
+        @Override
+        public void processElement(ProcessContext c) {
+          c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
+        }
+      }));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
new file mode 100644
index 0000000..5ee136c
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TfIdfTest.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.Keys;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.net.URI;
+import java.util.Arrays;
+
+/**
+ * Tests of {@link TfIdf}.
+ */
+@RunWith(JUnit4.class)
+public class TfIdfTest {
+
+  /** Test that the example runs. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testTfIdf() throws Exception {
+    Pipeline pipeline = TestPipeline.create();
+
+    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
+
+    PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
+        .apply(Create.of(
+            KV.of(new URI("x"), "a b c d"),
+            KV.of(new URI("y"), "a b c"),
+            KV.of(new URI("z"), "a m n")))
+        .apply(new TfIdf.ComputeTfIdf());
+
+    PCollection<String> words = wordToUriAndTfIdf
+        .apply(Keys.<String>create())
+        .apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(words).containsInAnyOrder(Arrays.asList("a", "m", "n", "b", "c", "d"));
+
+    pipeline.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
new file mode 100644
index 0000000..ce9de51
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/complete/TopWikipediaSessionsTest.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.complete;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+
+/** Unit tests for {@link TopWikipediaSessions}. */
+@RunWith(JUnit4.class)
+public class TopWikipediaSessionsTest {
+  @Test
+  @Category(RunnableOnService.class)
+  public void testComputeTopUsers() {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> output =
+        p.apply(Create.of(Arrays.asList(
+            new TableRow().set("timestamp", 0).set("contributor_username", "user1"),
+            new TableRow().set("timestamp", 1).set("contributor_username", "user1"),
+            new TableRow().set("timestamp", 2).set("contributor_username", "user1"),
+            new TableRow().set("timestamp", 0).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 1).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 3601).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 3602).set("contributor_username", "user2"),
+            new TableRow().set("timestamp", 35 * 24 * 3600).set("contributor_username", "user3"))))
+        .apply(new TopWikipediaSessions.ComputeTopSessions(1.0));
+
+    DataflowAssert.that(output).containsInAnyOrder(Arrays.asList(
+        "user1 : [1970-01-01T00:00:00.000Z..1970-01-01T01:00:02.000Z)"
+        + " : 3 : 1970-01-01T00:00:00.000Z",
+        "user3 : [1970-02-05T00:00:00.000Z..1970-02-05T01:00:00.000Z)"
+        + " : 1 : 1970-02-01T00:00:00.000Z"));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
new file mode 100644
index 0000000..6dce4ed
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/BigQueryTornadoesTest.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes.ExtractTornadoesFn;
+import com.google.cloud.dataflow.examples.cookbook.BigQueryTornadoes.FormatCountsFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/**
+ * Test case for {@link BigQueryTornadoes}.
+ */
+@RunWith(JUnit4.class)
+public class BigQueryTornadoesTest {
+
+  @Test
+  public void testExtractTornadoes() throws Exception {
+    TableRow row = new TableRow()
+          .set("month", "6")
+          .set("tornado", true);
+    DoFnTester<TableRow, Integer> extractWordsFn =
+        DoFnTester.of(new ExtractTornadoesFn());
+    Assert.assertThat(extractWordsFn.processBatch(row),
+                      CoreMatchers.hasItems(6));
+  }
+
+  @Test
+  public void testNoTornadoes() throws Exception {
+    TableRow row = new TableRow()
+          .set("month", 6)
+          .set("tornado", false);
+    DoFnTester<TableRow, Integer> extractWordsFn =
+        DoFnTester.of(new ExtractTornadoesFn());
+    Assert.assertTrue(extractWordsFn.processBatch(row).isEmpty());
+  }
+
+  @Test
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public void testFormatCounts() throws Exception {
+    DoFnTester<KV<Integer, Long>, TableRow> formatCountsFn =
+        DoFnTester.of(new FormatCountsFn());
+    KV empty[] = {};
+    List<TableRow> results = formatCountsFn.processBatch(empty);
+    Assert.assertTrue(results.size() == 0);
+    KV input[] = { KV.of(3, 0L),
+                   KV.of(4, Long.MAX_VALUE),
+                   KV.of(5, Long.MIN_VALUE) };
+    results = formatCountsFn.processBatch(input);
+    Assert.assertEquals(results.size(), 3);
+    Assert.assertEquals(results.get(0).get("month"), 3);
+    Assert.assertEquals(results.get(0).get("tornado_count"), 0L);
+    Assert.assertEquals(results.get(1).get("month"), 4);
+    Assert.assertEquals(results.get(1).get("tornado_count"), Long.MAX_VALUE);
+    Assert.assertEquals(results.get(2).get("month"), 5);
+    Assert.assertEquals(results.get(2).get("tornado_count"), Long.MIN_VALUE);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
new file mode 100644
index 0000000..fe4823d
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/CombinePerKeyExamplesTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.cookbook.CombinePerKeyExamples.ExtractLargeWordsFn;
+import com.google.cloud.dataflow.examples.cookbook.CombinePerKeyExamples.FormatShakespeareOutputFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.List;
+
+/** Unit tests for {@link CombinePerKeyExamples}. */
+@RunWith(JUnit4.class)
+public class CombinePerKeyExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+      .set("corpus", "king_lear").set("word", "snuffleupaguses");
+  private static final TableRow row2 = new TableRow()
+      .set("corpus", "macbeth").set("word", "antidisestablishmentarianism");
+  private static final TableRow row3 = new TableRow()
+      .set("corpus", "king_lear").set("word", "antidisestablishmentarianism");
+  private static final TableRow row4 = new TableRow()
+      .set("corpus", "macbeth").set("word", "bob");
+  private static final TableRow row5 = new TableRow()
+      .set("corpus", "king_lear").set("word", "hi");
+
+  static final TableRow[] ROWS_ARRAY = new TableRow[] {
+    row1, row2, row3, row4, row5
+  };
+
+  private static final KV<String, String> tuple1 = KV.of("snuffleupaguses", "king_lear");
+  private static final KV<String, String> tuple2 = KV.of("antidisestablishmentarianism", "macbeth");
+  private static final KV<String, String> tuple3 = KV.of("antidisestablishmentarianism",
+      "king_lear");
+
+  private static final KV<String, String> combinedTuple1 = KV.of("antidisestablishmentarianism",
+      "king_lear,macbeth");
+  private static final KV<String, String> combinedTuple2 = KV.of("snuffleupaguses", "king_lear");
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  static final KV<String, String>[] COMBINED_TUPLES_ARRAY = new KV[] {
+    combinedTuple1, combinedTuple2
+  };
+
+  private static final TableRow resultRow1 = new TableRow()
+      .set("word", "snuffleupaguses").set("all_plays", "king_lear");
+  private static final TableRow resultRow2 = new TableRow()
+      .set("word", "antidisestablishmentarianism")
+      .set("all_plays", "king_lear,macbeth");
+
+  @Test
+  public void testExtractLargeWordsFn() {
+    DoFnTester<TableRow, KV<String, String>> extractLargeWordsFn =
+        DoFnTester.of(new ExtractLargeWordsFn());
+    List<KV<String, String>> results = extractLargeWordsFn.processBatch(ROWS_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(tuple1));
+    Assert.assertThat(results, CoreMatchers.hasItem(tuple2));
+    Assert.assertThat(results, CoreMatchers.hasItem(tuple3));
+  }
+
+  @Test
+  public void testFormatShakespeareOutputFn() {
+    DoFnTester<KV<String, String>, TableRow> formatShakespeareOutputFn =
+        DoFnTester.of(new FormatShakespeareOutputFn());
+    List<TableRow> results = formatShakespeareOutputFn.processBatch(COMBINED_TUPLES_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow1));
+    Assert.assertThat(results, CoreMatchers.hasItem(resultRow2));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
new file mode 100644
index 0000000..bce6b11
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/DeDupExampleTest.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link DeDupExample}. */
+@RunWith(JUnit4.class)
+public class DeDupExampleTest {
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testRemoveDuplicates() {
+    List<String> strings = Arrays.asList(
+        "k1",
+        "k5",
+        "k5",
+        "k2",
+        "k1",
+        "k2",
+        "k3");
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(strings)
+            .withCoder(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(output)
+        .containsInAnyOrder("k1", "k5", "k2", "k3");
+    p.run();
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testRemoveDuplicatesEmpty() {
+    List<String> strings = Arrays.asList();
+
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input =
+        p.apply(Create.of(strings)
+            .withCoder(StringUtf8Coder.of()));
+
+    PCollection<String> output =
+        input.apply(RemoveDuplicates.<String>create());
+
+    DataflowAssert.that(output).empty();
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
new file mode 100644
index 0000000..6d822f9
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/FilterExamplesTest.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.cookbook.FilterExamples.FilterSingleMonthDataFn;
+import com.google.cloud.dataflow.examples.cookbook.FilterExamples.ProjectionFn;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link FilterExamples}. */
+@RunWith(JUnit4.class)
+public class FilterExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+      .set("month", "6").set("day", "21")
+      .set("year", "2014").set("mean_temp", "85.3")
+      .set("tornado", true);
+  private static final TableRow row2 = new TableRow()
+      .set("month", "7").set("day", "20")
+      .set("year", "2014").set("mean_temp", "75.4")
+      .set("tornado", false);
+  private static final TableRow row3 = new TableRow()
+      .set("month", "6").set("day", "18")
+      .set("year", "2014").set("mean_temp", "45.3")
+      .set("tornado", true);
+  static final TableRow[] ROWS_ARRAY = new TableRow[] {
+    row1, row2, row3
+  };
+  static final List<TableRow> ROWS = Arrays.asList(ROWS_ARRAY);
+
+  private static final TableRow outRow1 = new TableRow()
+      .set("year", 2014).set("month", 6)
+      .set("day", 21).set("mean_temp", 85.3);
+  private static final TableRow outRow2 = new TableRow()
+      .set("year", 2014).set("month", 7)
+      .set("day", 20).set("mean_temp", 75.4);
+  private static final TableRow outRow3 = new TableRow()
+      .set("year", 2014).set("month", 6)
+      .set("day", 18).set("mean_temp", 45.3);
+  private static final TableRow[] PROJROWS_ARRAY = new TableRow[] {
+    outRow1, outRow2, outRow3
+  };
+
+
+  @Test
+  public void testProjectionFn() {
+    DoFnTester<TableRow, TableRow> projectionFn =
+        DoFnTester.of(new ProjectionFn());
+    List<TableRow> results = projectionFn.processBatch(ROWS_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow1));
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow2));
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow3));
+  }
+
+  @Test
+  public void testFilterSingleMonthDataFn() {
+    DoFnTester<TableRow, TableRow> filterSingleMonthDataFn =
+        DoFnTester.of(new FilterSingleMonthDataFn(7));
+    List<TableRow> results = filterSingleMonthDataFn.processBatch(PROJROWS_ARRAY);
+    Assert.assertThat(results, CoreMatchers.hasItem(outRow2));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2eaa709c/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
new file mode 100644
index 0000000..db3ae34
--- /dev/null
+++ b/examples/java/src/test/java/com/google/cloud/dataflow/examples/cookbook/JoinExamplesTest.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.cloud.dataflow.examples.cookbook;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.cloud.dataflow.examples.cookbook.JoinExamples.ExtractCountryInfoFn;
+import com.google.cloud.dataflow.examples.cookbook.JoinExamples.ExtractEventDataFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.values.KV;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/** Unit tests for {@link JoinExamples}. */
+@RunWith(JUnit4.class)
+public class JoinExamplesTest {
+
+  private static final TableRow row1 = new TableRow()
+        .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
+        .set("Actor1Name", "BANGKOK").set("SOURCEURL", "http://cnn.com");
+  private static final TableRow row2 = new TableRow()
+        .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
+        .set("Actor1Name", "LAOS").set("SOURCEURL", "http://www.chicagotribune.com");
+  private static final TableRow row3 = new TableRow()
+        .set("ActionGeo_CountryCode", "BE").set("SQLDATE", "20141213")
+        .set("Actor1Name", "AFGHANISTAN").set("SOURCEURL", "http://cnn.com");
+  static final TableRow[] EVENTS = new TableRow[] {
+    row1, row2, row3
+  };
+  static final List<TableRow> EVENT_ARRAY = Arrays.asList(EVENTS);
+
+  private static final KV<String, String> kv1 = KV.of("VM",
+      "Date: 20141212, Actor1: LAOS, url: http://www.chicagotribune.com");
+  private static final KV<String, String> kv2 = KV.of("BE",
+      "Date: 20141213, Actor1: AFGHANISTAN, url: http://cnn.com");
+  private static final KV<String, String> kv3 = KV.of("BE", "Belgium");
+  private static final KV<String, String> kv4 = KV.of("VM", "Vietnam");
+
+  private static final TableRow cc1 = new TableRow()
+        .set("FIPSCC", "VM").set("HumanName", "Vietnam");
+  private static final TableRow cc2 = new TableRow()
+        .set("FIPSCC", "BE").set("HumanName", "Belgium");
+  static final TableRow[] CCS = new TableRow[] {
+    cc1, cc2
+  };
+  static final List<TableRow> CC_ARRAY = Arrays.asList(CCS);
+
+  static final String[] JOINED_EVENTS = new String[] {
+      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: LAOS, "
+          + "url: http://www.chicagotribune.com",
+      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: BANGKOK, "
+          + "url: http://cnn.com",
+      "Country code: BE, Country name: Belgium, Event info: Date: 20141213, Actor1: AFGHANISTAN, "
+          + "url: http://cnn.com"
+    };
+
+  @Test
+  public void testExtractEventDataFn() {
+    DoFnTester<TableRow, KV<String, String>> extractEventDataFn =
+        DoFnTester.of(new ExtractEventDataFn());
+    List<KV<String, String>> results = extractEventDataFn.processBatch(EVENTS);
+    Assert.assertThat(results, CoreMatchers.hasItem(kv1));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv2));
+  }
+
+  @Test
+  public void testExtractCountryInfoFn() {
+    DoFnTester<TableRow, KV<String, String>> extractCountryInfoFn =
+        DoFnTester.of(new ExtractCountryInfoFn());
+    List<KV<String, String>> results = extractCountryInfoFn.processBatch(CCS);
+    Assert.assertThat(results, CoreMatchers.hasItem(kv3));
+    Assert.assertThat(results, CoreMatchers.hasItem(kv4));
+  }
+
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testJoin() throws java.lang.Exception {
+    Pipeline p = TestPipeline.create();
+    PCollection<TableRow> input1 = p.apply("CreateEvent", Create.of(EVENT_ARRAY));
+    PCollection<TableRow> input2 = p.apply("CreateCC", Create.of(CC_ARRAY));
+
+    PCollection<String> output = JoinExamples.joinEvents(input1, input2);
+    DataflowAssert.that(output).containsInAnyOrder(JOINED_EVENTS);
+    p.run();
+  }
+}

[15/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
deleted file mode 100644
index d51fc7e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/WindowFn.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms.windowing;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.common.collect.Ordering;
-
-import org.joda.time.Instant;
-
-import java.io.Serializable;
-import java.util.Collection;
-
-/**
- * The argument to the {@link Window} transform used to assign elements into
- * windows and to determine how windows are merged.  See {@link Window} for more
- * information on how {@code WindowFn}s are used and for a library of
- * predefined {@code WindowFn}s.
- *
- * <p>Users will generally want to use the predefined
- * {@code WindowFn}s, but it is also possible to create new
- * subclasses.
- *
- * <p>To create a custom {@code WindowFn}, inherit from this class and override all required
- * methods.  If no merging is required, inherit from {@link NonMergingWindowFn}
- * instead.  If no merging is required and each element is assigned to a single window, inherit from
- * {@code PartitioningWindowFn}.  Inheriting from the most specific subclass will enable more
- * optimizations in the runner.
- *
- * @param <T> type of elements being windowed
- * @param <W> {@link BoundedWindow} subclass used to represent the
- *            windows used by this {@code WindowFn}
- */
-public abstract class WindowFn<T, W extends BoundedWindow>
-    implements Serializable {
-  /**
-   * Information available when running {@link #assignWindows}.
-   */
-  public abstract class AssignContext {
-    /**
-     * Returns the current element.
-     */
-    public abstract T element();
-
-    /**
-     * Returns the timestamp of the current element.
-     */
-    public abstract Instant timestamp();
-
-    /**
-     * Returns the windows the current element was in, prior to this
-     * {@code WindowFn} being called.
-     */
-    public abstract Collection<? extends BoundedWindow> windows();
-  }
-
-  /**
-   * Given a timestamp and element, returns the set of windows into which it
-   * should be placed.
-   */
-  public abstract Collection<W> assignWindows(AssignContext c) throws Exception;
-
-  /**
-   * Information available when running {@link #mergeWindows}.
-   */
-  public abstract class MergeContext {
-    /**
-     * Returns the current set of windows.
-     */
-    public abstract Collection<W> windows();
-
-    /**
-     * Signals to the framework that the windows in {@code toBeMerged} should
-     * be merged together to form {@code mergeResult}.
-     *
-     * <p>{@code toBeMerged} should be a subset of {@link #windows}
-     * and disjoint from the {@code toBeMerged} set of previous calls
-     * to {@code merge}.
-     *
-     * <p>{@code mergeResult} must either not be in {@link #windows} or be in
-     * {@code toBeMerged}.
-     *
-     * @throws IllegalArgumentException if any elements of toBeMerged are not
-     * in windows(), or have already been merged
-     */
-    public abstract void merge(Collection<W> toBeMerged, W mergeResult)
-        throws Exception;
-  }
-
-  /**
-   * Does whatever merging of windows is necessary.
-   *
-   * <p>See {@link MergeOverlappingIntervalWindows#mergeWindows} for an
-   * example of how to override this method.
-   */
-  public abstract void mergeWindows(MergeContext c) throws Exception;
-
-  /**
-   * Returns whether this performs the same merging as the given
-   * {@code WindowFn}.
-   */
-  public abstract boolean isCompatible(WindowFn<?, ?> other);
-
-  /**
-   * Returns the {@link Coder} used for serializing the windows used
-   * by this windowFn.
-   */
-  public abstract Coder<W> windowCoder();
-
-  /**
-   * Returns the window of the side input corresponding to the given window of
-   * the main input.
-   *
-   * <p>Authors of custom {@code WindowFn}s should override this.
-   */
-  public abstract W getSideInputWindow(final BoundedWindow window);
-
-  /**
-   * @deprecated Implement {@link #getOutputTimeFn} to return one of the appropriate
-   * {@link OutputTimeFns}, or a custom {@link OutputTimeFn} extending
-   * {@link OutputTimeFn.Defaults}.
-   */
-  @Deprecated
-  @Experimental(Kind.OUTPUT_TIME)
-  public Instant getOutputTime(Instant inputTimestamp, W window) {
-    return getOutputTimeFn().assignOutputTime(inputTimestamp, window);
-  }
-
-  /**
-   * Provides a default implementation for {@link WindowingStrategy#getOutputTimeFn()}.
-   * See the full specification there.
-   *
-   * <p>If this {@link WindowFn} doesn't produce overlapping windows, this need not (and probably
-   * should not) override any of the default implementations in {@link OutputTimeFn.Defaults}.
-   *
-   * <p>If this {@link WindowFn} does produce overlapping windows that can be predicted here, it is
-   * suggested that the result in later overlapping windows is past the end of earlier windows so
-   * that the later windows don't prevent the watermark from progressing past the end of the earlier
-   * window.
-   *
-   * <p>For example, a timestamp in a sliding window should be moved past the beginning of the next
-   * sliding window. See {@link SlidingWindows#getOutputTimeFn}.
-   */
-  @Experimental(Kind.OUTPUT_TIME)
-  public OutputTimeFn<? super W> getOutputTimeFn() {
-    return new OutputAtEarliestAssignedTimestamp<>(this);
-  }
-
-  /**
-   * Returns true if this {@code WindowFn} never needs to merge any windows.
-   */
-  public boolean isNonMerging() {
-    return false;
-  }
-
-  /**
-   * Returns true if this {@code WindowFn} assigns each element to a single window.
-   */
-  public boolean assignsToSingleWindow() {
-    return false;
-  }
-
-  /**
-   * A compatibility adapter that will return the assigned timestamps according to the
-   * {@link WindowFn}, which was the prior policy. Specifying the assigned output timestamps
-   * on the {@link WindowFn} is now deprecated.
-   */
-  private static class OutputAtEarliestAssignedTimestamp<W extends BoundedWindow>
-      extends OutputTimeFn.Defaults<W> {
-
-    private final WindowFn<?, W> windowFn;
-
-    public OutputAtEarliestAssignedTimestamp(WindowFn<?, W> windowFn) {
-      this.windowFn = windowFn;
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return the result of {@link WindowFn#getOutputTime windowFn.getOutputTime()}.
-     */
-    @Override
-    @SuppressWarnings("deprecation") // this is an adapter for the deprecated behavior
-    public Instant assignOutputTime(Instant timestamp, W window) {
-      return windowFn.getOutputTime(timestamp, window);
-    }
-
-    @Override
-    public Instant combine(Instant outputTime, Instant otherOutputTime) {
-      return Ordering.natural().min(outputTime, otherOutputTime);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * @return {@code true}. When the {@link OutputTimeFn} is not overridden by {@link WindowFn}
-     *         or {@link WindowingStrategy}, the minimum output timestamp is taken, which depends
-     *         only on the minimum input timestamp by monotonicity of {@link #assignOutputTime}.
-     */
-    @Override
-    public boolean dependsOnlyOnEarliestInputTimestamp() {
-      return true;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
deleted file mode 100644
index 65ccf71..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/windowing/package-info.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-/**
- * Defines the {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window} transform
- * for dividing the elements in a PCollection into windows, and the
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.Trigger} for controlling when those
- * elements are output.
- *
- * <p>{@code Window} logically divides up or groups the elements of a
- * {@link com.google.cloud.dataflow.sdk.values.PCollection} into finite windows according to a
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}.
- * The output of {@code Window} contains the same elements as input, but they
- * have been logically assigned to windows. The next
- * {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey}s, including one
- * within composite transforms, will group by the combination of keys and
- * windows.
- *
- * <p>Windowing a {@code PCollection} allows chunks of it to be processed
- * individually, before the entire {@code PCollection} is available.  This is
- * especially important for {@code PCollection}s with unbounded size, since the full
- * {@code PCollection} is never available at once.
- *
- * <p>For {@code PCollection}s with a bounded size, by default, all data is implicitly in a
- * single window, and this replicates conventional batch mode. However, windowing can still be a
- * convenient way to express time-sliced algorithms over bounded {@code PCollection}s.
- *
- * <p>As elements are assigned to a window, they are are placed into a pane. When the trigger fires
- * all of the elements in the current pane are output.
- *
- * <p>The {@link com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger} will output a
- * window when the system watermark passes the end of the window.  See
- * {@link com.google.cloud.dataflow.sdk.transforms.windowing.AfterWatermark} for details on the
- * watermark.
- */
-package com.google.cloud.dataflow.sdk.transforms.windowing;

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
deleted file mode 100644
index 69350cb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ActiveWindowSet.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-
-import java.util.Collection;
-import java.util.Set;
-
-import javax.annotation.Nullable;
-
-/**
- * Track which active windows have their state associated with merged-away windows.
- *
- * When windows are merged we must track which state previously associated with the merged windows
- * must now be associated with the result window. Some of that state may be combined eagerly when
- * the windows are merged. The rest is combined lazily when the final state is actually
- * required when emitting a pane. We keep track of this using an {@link ActiveWindowSet}.
- *
- * <p>An {@link ActiveWindowSet} considers a window to be in one of the following states:
- *
- * <ol>
- *   <li><b>NEW</b>: The initial state for a window on an incoming element; we do not yet know
- *       if it should be merged into an ACTIVE window, or whether it is already present as an
- *       ACTIVE window, since we have not yet called
- *       {@link WindowFn#mergeWindows}.</li>
- *   <li><b>ACTIVE</b>: A window that has state associated with it and has not itself been merged
- *       away. The window may have one or more <i>state address</i> windows under which its
- *       non-empty state is stored. A state value for an ACTIVE window must be derived by reading
- *       the state in all of its state address windows.</li>
- *   <li><b>EPHEMERAL</b>: A NEW window that has been merged into an ACTIVE window before any state
- *       has been associated with that window. Thus the window is neither ACTIVE nor MERGED. These
- *       windows are not persistently represented since if they reappear the merge function should
- *       again redirect them to an ACTIVE window. EPHEMERAL windows are an optimization for
- *       the common case of in-order events and {@link Sessions session window} by never associating
- *       state with windows that are created and immediately merged away.</li>
- *   <li><b>MERGED</b>: An ACTIVE window has been merged into another ACTIVE window after it had
- *       state associated with it. The window will thus appear as a state address window for exactly
- *       one ACTIVE window.</li>
- *   <li><b>EXPIRED</b>: The window has expired and may have been garbage collected. No new elements
- *       (even late elements) will ever be assigned to that window. These windows are not explicitly
- *       represented anywhere; it is expected that the user of {@link ActiveWindowSet} will store
- *       no state associated with the window.</li>
- * </ol>
- *
- * <p>
- *
- * <p>If no windows will ever be merged we can use the trivial implementation {@link
- * NonMergingActiveWindowSet}. Otherwise, the actual implementation of this data structure is in
- * {@link MergingActiveWindowSet}.
- *
- * @param <W> the type of window being managed
- */
-public interface ActiveWindowSet<W extends BoundedWindow> {
-  /**
-   * Callback for {@link #merge}.
-   */
-  public interface MergeCallback<W extends BoundedWindow> {
-    /**
-     * Called when windows are about to be merged, but before any {@link #onMerge} callback
-     * has been made.
-     */
-    void prefetchOnMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
-        throws Exception;
-
-    /**
-     * Called when windows are about to be merged, after all {@link #prefetchOnMerge} calls
-     * have been made, but before the active window set has been updated to reflect the merge.
-     *
-     * @param toBeMerged the windows about to be merged.
-     * @param activeToBeMerged the subset of {@code toBeMerged} corresponding to windows which
-     * are currently ACTIVE (and about to be merged). The remaining windows have been deemed
-     * EPHEMERAL, and thus have no state associated with them.
-     * @param mergeResult the result window, either a member of {@code toBeMerged} or new.
-     */
-    void onMerge(Collection<W> toBeMerged, Collection<W> activeToBeMerged, W mergeResult)
-        throws Exception;
-  }
-
-  /**
-   * Remove EPHEMERAL windows since we only need to know about them while processing new elements.
-   */
-  void removeEphemeralWindows();
-
-  /**
-   * Save any state changes needed.
-   */
-  void persist();
-
-  /**
-   * Return the ACTIVE window into which {@code window} has been merged.
-   * Return {@code window} itself if it is ACTIVE. Return null if {@code window} has not
-   * yet been seen.
-   */
-  @Nullable
-  W representative(W window);
-
-  /**
-   * Return (a view of) the set of currently ACTIVE windows.
-   */
-  Set<W> getActiveWindows();
-
-  /**
-   * Return {@code true} if {@code window} is ACTIVE.
-   */
-  boolean isActive(W window);
-
-  /**
-   * If {@code window} is not already known to be ACTIVE, MERGED or EPHEMERAL then add it
-   * as NEW. All NEW windows will be accounted for as ACTIVE, MERGED or EPHEMERAL by a call
-   * to {@link #merge}.
-   */
-  void addNew(W window);
-
-  /**
-   * If {@code window} is not already known to be ACTIVE, MERGED or EPHEMERAL then add it
-   * as ACTIVE.
-   */
-  void addActive(W window);
-
-  /**
-   * Remove {@code window} from the set.
-   */
-  void remove(W window);
-
-  /**
-   * Invoke {@link WindowFn#mergeWindows} on the {@code WindowFn} associated with this window set,
-   * merging as many of the active windows as possible. {@code mergeCallback} will be invoked for
-   * each group of windows that are merged. After this no NEW windows will remain, all merge
-   * result windows will be ACTIVE, and all windows which have been merged away will not be ACTIVE.
-   */
-  void merge(MergeCallback<W> mergeCallback) throws Exception;
-
-  /**
-   * Signal that all state in {@link #readStateAddresses} for {@code window} has been merged into
-   * the {@link #writeStateAddress} for {@code window}.
-   */
-  void merged(W window);
-
-  /**
-   * Return the state address windows for ACTIVE {@code window} from which all state associated
-   * should be read and merged.
-   */
-  Set<W> readStateAddresses(W window);
-
-  /**
-   * Return the state address window of ACTIVE {@code window} into which all new state should be
-   * written. Always one of the results of {@link #readStateAddresses}.
-   */
-  W writeStateAddress(W window);
-
-  /**
-   * Return the state address window into which all new state should be written after
-   * ACTIVE windows {@code toBeMerged} have been merged into {@code mergeResult}.
-   */
-  W mergedWriteStateAddress(Collection<W> toBeMerged, W mergeResult);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
deleted file mode 100644
index 7a9c877..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ApiSurface.java
+++ /dev/null
@@ -1,642 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.common.base.Joiner;
-import com.google.common.base.Supplier;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Multimap;
-import com.google.common.collect.Multimaps;
-import com.google.common.collect.Sets;
-import com.google.common.reflect.ClassPath;
-import com.google.common.reflect.ClassPath.ClassInfo;
-import com.google.common.reflect.Invokable;
-import com.google.common.reflect.Parameter;
-import com.google.common.reflect.TypeToken;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.lang.annotation.Annotation;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Field;
-import java.lang.reflect.GenericArrayType;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.Type;
-import java.lang.reflect.TypeVariable;
-import java.lang.reflect.WildcardType;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-/**
- * Represents the API surface of a package prefix. Used for accessing public classes,
- * methods, and the types they reference, to control what dependencies are re-exported.
- *
- * <p>For the purposes of calculating the public API surface, exposure includes any public
- * or protected occurrence of:
- *
- * <ul>
- * <li>superclasses
- * <li>interfaces implemented
- * <li>actual type arguments to generic types
- * <li>array component types
- * <li>method return types
- * <li>method parameter types
- * <li>type variable bounds
- * <li>wildcard bounds
- * </ul>
- *
- * <p>Exposure is a transitive property. The resulting map excludes primitives
- * and array classes themselves.
- *
- * <p>It is prudent (though not required) to prune prefixes like "java" via the builder
- * method {@link #pruningPrefix} to halt the traversal so it does not uselessly catalog references
- * that are not interesting.
- */
-@SuppressWarnings("rawtypes")
-public class ApiSurface {
-  private static Logger logger = LoggerFactory.getLogger(ApiSurface.class);
-
-  /**
-   * Returns an empty {@link ApiSurface}.
-   */
-  public static ApiSurface empty() {
-    logger.debug("Returning an empty ApiSurface");
-    return new ApiSurface(Collections.<Class<?>>emptySet(), Collections.<Pattern>emptySet());
-  }
-
-  /**
-   * Returns an {@link ApiSurface} object representing the given package and all subpackages.
-   */
-  public static ApiSurface ofPackage(String packageName) throws IOException {
-    return ApiSurface.empty().includingPackage(packageName);
-  }
-
-  /**
-   * Returns an {@link ApiSurface} object representing just the surface of the given class.
-   */
-  public static ApiSurface ofClass(Class<?> clazz) {
-    return ApiSurface.empty().includingClass(clazz);
-  }
-
-  /**
-   * Returns an {@link ApiSurface} like this one, but also including the named
-   * package and all of its subpackages.
-   */
-  public ApiSurface includingPackage(String packageName) throws IOException {
-    ClassPath classPath = ClassPath.from(ClassLoader.getSystemClassLoader());
-
-    Set<Class<?>> newRootClasses = Sets.newHashSet();
-    for (ClassInfo classInfo : classPath.getTopLevelClassesRecursive(packageName)) {
-      Class clazz = classInfo.load();
-      if (exposed(clazz.getModifiers())) {
-        newRootClasses.add(clazz);
-      }
-    }
-    logger.debug("Including package {} and subpackages: {}", packageName, newRootClasses);
-    newRootClasses.addAll(rootClasses);
-
-    return new ApiSurface(newRootClasses, patternsToPrune);
-  }
-
-  /**
-   * Returns an {@link ApiSurface} like this one, but also including the given class.
-   */
-  public ApiSurface includingClass(Class<?> clazz) {
-    Set<Class<?>> newRootClasses = Sets.newHashSet();
-    logger.debug("Including class {}", clazz);
-    newRootClasses.add(clazz);
-    newRootClasses.addAll(rootClasses);
-    return new ApiSurface(newRootClasses, patternsToPrune);
-  }
-
-  /**
-   * Returns an {@link ApiSurface} like this one, but pruning transitive
-   * references from classes whose full name (including package) begins with the provided prefix.
-   */
-  public ApiSurface pruningPrefix(String prefix) {
-    return pruningPattern(Pattern.compile(Pattern.quote(prefix) + ".*"));
-  }
-
-  /**
-   * Returns an {@link ApiSurface} like this one, but pruning references from the named
-   * class.
-   */
-  public ApiSurface pruningClassName(String className) {
-    return pruningPattern(Pattern.compile(Pattern.quote(className)));
-  }
-
-  /**
-   * Returns an {@link ApiSurface} like this one, but pruning references from the
-   * provided class.
-   */
-  public ApiSurface pruningClass(Class<?> clazz) {
-    return pruningClassName(clazz.getName());
-  }
-
-  /**
-   * Returns an {@link ApiSurface} like this one, but pruning transitive
-   * references from classes whose full name (including package) begins with the provided prefix.
-   */
-  public ApiSurface pruningPattern(Pattern pattern) {
-    Set<Pattern> newPatterns = Sets.newHashSet();
-    newPatterns.addAll(patternsToPrune);
-    newPatterns.add(pattern);
-    return new ApiSurface(rootClasses, newPatterns);
-  }
-
-  /**
-   * See {@link #pruningPattern(Pattern)}.
-   */
-  public ApiSurface pruningPattern(String patternString) {
-    return pruningPattern(Pattern.compile(patternString));
-  }
-
-  /**
-   * Returns all public classes originally belonging to the package
-   * in the {@link ApiSurface}.
-   */
-  public Set<Class<?>> getRootClasses() {
-    return rootClasses;
-  }
-
-  /**
-   * Returns exposed types in this set, including arrays and primitives as
-   * specified.
-   */
-  public Set<Class<?>> getExposedClasses() {
-    return getExposedToExposers().keySet();
-  }
-
-  /**
-   * Returns a path from an exposed class to a root class. There may be many, but this
-   * gives only one.
-   *
-   * <p>If there are only cycles, with no path back to a root class, throws
-   * IllegalStateException.
-   */
-  public List<Class<?>> getAnyExposurePath(Class<?> exposedClass) {
-    Set<Class<?>> excluded = Sets.newHashSet();
-    excluded.add(exposedClass);
-    List<Class<?>> path = getAnyExposurePath(exposedClass, excluded);
-    if (path == null) {
-      throw new IllegalArgumentException(
-          "Class " + exposedClass + " has no path back to any root class."
-          + " It should never have been considered exposed.");
-    } else {
-      return path;
-    }
-  }
-
-  /**
-   * Returns a path from an exposed class to a root class. There may be many, but this
-   * gives only one. It will not return a path that crosses the excluded classes.
-   *
-   * <p>If there are only cycles or paths through the excluded classes, returns null.
-   *
-   * <p>If the class is not actually in the exposure map, throws IllegalArgumentException
-   */
-  private List<Class<?>> getAnyExposurePath(Class<?> exposedClass, Set<Class<?>> excluded) {
-    List<Class<?>> exposurePath = Lists.newArrayList();
-    exposurePath.add(exposedClass);
-
-    Collection<Class<?>> exposers = getExposedToExposers().get(exposedClass);
-    if (exposers.isEmpty()) {
-      throw new IllegalArgumentException("Class " + exposedClass + " is not exposed.");
-    }
-
-    for (Class<?> exposer : exposers) {
-      if (excluded.contains(exposer)) {
-        continue;
-      }
-
-      // A null exposer means this is already a root class.
-      if (exposer == null) {
-        return exposurePath;
-      }
-
-      List<Class<?>> restOfPath = getAnyExposurePath(
-          exposer,
-          Sets.union(excluded, Sets.newHashSet(exposer)));
-
-      if (restOfPath != null) {
-        exposurePath.addAll(restOfPath);
-        return exposurePath;
-      }
-    }
-    return null;
-  }
-
-  ////////////////////////////////////////////////////////////////////
-
-  // Fields initialized upon construction
-  private final Set<Class<?>> rootClasses;
-  private final Set<Pattern> patternsToPrune;
-
-  // Fields computed on-demand
-  private Multimap<Class<?>, Class<?>> exposedToExposers = null;
-  private Pattern prunedPattern = null;
-  private Set<Type> visited = null;
-
-  private ApiSurface(Set<Class<?>> rootClasses, Set<Pattern> patternsToPrune) {
-    this.rootClasses = rootClasses;
-    this.patternsToPrune = patternsToPrune;
-  }
-
-  /**
-   * A map from exposed types to place where they are exposed, in the sense of being a part
-   * of a public-facing API surface.
-   *
-   * <p>This map is the adjencency list representation of a directed graph, where an edge from type
-   * {@code T1} to type {@code T2} indicates that {@code T2} directly exposes {@code T1} in its API
-   * surface.
-   *
-   * <p>The traversal methods in this class are designed to avoid repeatedly processing types, since
-   * there will almost always be cyclic references.
-   */
-  private Multimap<Class<?>, Class<?>> getExposedToExposers() {
-    if (exposedToExposers == null) {
-      constructExposedToExposers();
-    }
-    return exposedToExposers;
-  }
-
-  /**
-   * See {@link #getExposedToExposers}.
-   */
-  private void constructExposedToExposers() {
-    visited = Sets.newHashSet();
-    exposedToExposers = Multimaps.newSetMultimap(
-        Maps.<Class<?>, Collection<Class<?>>>newHashMap(),
-        new Supplier<Set<Class<?>>>() {
-          @Override
-          public Set<Class<?>> get() {
-            return Sets.newHashSet();
-          }
-        });
-
-    for (Class<?> clazz : rootClasses) {
-      addExposedTypes(clazz, null);
-    }
-  }
-
-  /**
-   * A combined {@code Pattern} that implements all the pruning specified.
-   */
-  private Pattern getPrunedPattern() {
-    if (prunedPattern == null) {
-      constructPrunedPattern();
-    }
-    return prunedPattern;
-  }
-
-  /**
-   * See {@link #getPrunedPattern}.
-   */
-  private void constructPrunedPattern() {
-    Set<String> prunedPatternStrings = Sets.newHashSet();
-    for (Pattern patternToPrune : patternsToPrune) {
-      prunedPatternStrings.add(patternToPrune.pattern());
-    }
-    prunedPattern = Pattern.compile("(" + Joiner.on(")|(").join(prunedPatternStrings) + ")");
-  }
-
-  /**
-   * Whether a type and all that it references should be pruned from the graph.
-   */
-  private boolean pruned(Type type) {
-    return pruned(TypeToken.of(type).getRawType());
-  }
-
-  /**
-   * Whether a class and all that it references should be pruned from the graph.
-   */
-  private boolean pruned(Class<?> clazz) {
-    return clazz.isPrimitive()
-        || clazz.isArray()
-        || getPrunedPattern().matcher(clazz.getName()).matches();
-  }
-
-  /**
-   * Whether a type has already beens sufficiently processed.
-   */
-  private boolean done(Type type) {
-    return visited.contains(type);
-  }
-
-  private void recordExposure(Class<?> exposed, Class<?> cause) {
-    exposedToExposers.put(exposed, cause);
-  }
-
-  private void recordExposure(Type exposed, Class<?> cause) {
-    exposedToExposers.put(TypeToken.of(exposed).getRawType(), cause);
-  }
-
-  private void visit(Type type) {
-    visited.add(type);
-  }
-
-  /**
-   * See {@link #addExposedTypes(Type, Class)}.
-   */
-  private void addExposedTypes(TypeToken type, Class<?> cause) {
-    logger.debug(
-        "Adding exposed types from {}, which is the type in type token {}", type.getType(), type);
-    addExposedTypes(type.getType(), cause);
-  }
-
-  /**
-   * Adds any references learned by following a link from {@code cause} to {@code type}.
-   * This will dispatch according to the concrete {@code Type} implementation. See the
-   * other overloads of {@code addExposedTypes} for their details.
-   */
-  private void addExposedTypes(Type type, Class<?> cause) {
-    if (type instanceof TypeVariable) {
-      logger.debug("Adding exposed types from {}, which is a type variable", type);
-      addExposedTypes((TypeVariable) type, cause);
-    } else if (type instanceof WildcardType) {
-      logger.debug("Adding exposed types from {}, which is a wildcard type", type);
-      addExposedTypes((WildcardType) type, cause);
-    } else if (type instanceof GenericArrayType) {
-      logger.debug("Adding exposed types from {}, which is a generic array type", type);
-      addExposedTypes((GenericArrayType) type, cause);
-    } else if (type instanceof ParameterizedType) {
-      logger.debug("Adding exposed types from {}, which is a parameterized type", type);
-      addExposedTypes((ParameterizedType) type, cause);
-    } else if (type instanceof Class) {
-      logger.debug("Adding exposed types from {}, which is a class", type);
-      addExposedTypes((Class) type, cause);
-    } else {
-      throw new IllegalArgumentException("Unknown implementation of Type");
-    }
-  }
-
-  /**
-   * Adds any types exposed to this set. These will
-   * come from the (possibly absent) bounds on the
-   * type variable.
-   */
-  private void addExposedTypes(TypeVariable type, Class<?> cause) {
-    if (done(type)) {
-      return;
-    }
-    visit(type);
-    for (Type bound : type.getBounds()) {
-      logger.debug("Adding exposed types from {}, which is a type bound on {}", bound, type);
-      addExposedTypes(bound, cause);
-    }
-  }
-
-  /**
-   * Adds any types exposed to this set. These will come from the (possibly absent) bounds on the
-   * wildcard.
-   */
-  private void addExposedTypes(WildcardType type, Class<?> cause) {
-    visit(type);
-    for (Type lowerBound : type.getLowerBounds()) {
-      logger.debug(
-          "Adding exposed types from {}, which is a type lower bound on wildcard type {}",
-          lowerBound,
-          type);
-      addExposedTypes(lowerBound, cause);
-    }
-    for (Type upperBound : type.getUpperBounds()) {
-      logger.debug(
-          "Adding exposed types from {}, which is a type upper bound on wildcard type {}",
-          upperBound,
-          type);
-      addExposedTypes(upperBound, cause);
-    }
-  }
-
-  /**
-   * Adds any types exposed from the given array type. The array type itself is not added. The
-   * cause of the exposure of the underlying type is considered whatever type exposed the array
-   * type.
-   */
-  private void addExposedTypes(GenericArrayType type, Class<?> cause) {
-    if (done(type)) {
-      return;
-    }
-    visit(type);
-    logger.debug(
-        "Adding exposed types from {}, which is the component type on generic array type {}",
-        type.getGenericComponentType(),
-        type);
-    addExposedTypes(type.getGenericComponentType(), cause);
-  }
-
-  /**
-   * Adds any types exposed to this set. Even if the
-   * root type is to be pruned, the actual type arguments
-   * are processed.
-   */
-  private void addExposedTypes(ParameterizedType type, Class<?> cause) {
-    // Even if the type is already done, this link to it may be new
-    boolean alreadyDone = done(type);
-    if (!pruned(type)) {
-      visit(type);
-      recordExposure(type, cause);
-    }
-    if (alreadyDone) {
-      return;
-    }
-
-    // For a parameterized type, pruning does not take place
-    // here, only for the raw class.
-    // The type parameters themselves may not be pruned,
-    // for example with List<MyApiType> probably the
-    // standard List is pruned, but MyApiType is not.
-    logger.debug(
-        "Adding exposed types from {}, which is the raw type on parameterized type {}",
-        type.getRawType(),
-        type);
-    addExposedTypes(type.getRawType(), cause);
-    for (Type typeArg : type.getActualTypeArguments()) {
-      logger.debug(
-          "Adding exposed types from {}, which is a type argument on parameterized type {}",
-          typeArg,
-          type);
-      addExposedTypes(typeArg, cause);
-    }
-  }
-
-  /**
-   * Adds a class and all of the types it exposes. The cause
-   * of the class being exposed is given, and the cause
-   * of everything within the class is that class itself.
-   */
-  private void addExposedTypes(Class<?> clazz, Class<?> cause) {
-    if (pruned(clazz)) {
-      return;
-    }
-    // Even if `clazz` has been visited, the link from `cause` may be new
-    boolean alreadyDone = done(clazz);
-    visit(clazz);
-    recordExposure(clazz, cause);
-    if (alreadyDone || pruned(clazz)) {
-      return;
-    }
-
-    TypeToken<?> token = TypeToken.of(clazz);
-    for (TypeToken<?> superType : token.getTypes()) {
-      if (!superType.equals(token)) {
-        logger.debug(
-            "Adding exposed types from {}, which is a super type token on {}", superType, clazz);
-        addExposedTypes(superType, clazz);
-      }
-    }
-    for (Class innerClass : clazz.getDeclaredClasses()) {
-      if (exposed(innerClass.getModifiers())) {
-        logger.debug(
-            "Adding exposed types from {}, which is an exposed inner class of {}",
-            innerClass,
-            clazz);
-        addExposedTypes(innerClass, clazz);
-      }
-    }
-    for (Field field : clazz.getDeclaredFields()) {
-      if (exposed(field.getModifiers())) {
-        logger.debug("Adding exposed types from {}, which is an exposed field on {}", field, clazz);
-        addExposedTypes(field, clazz);
-      }
-    }
-    for (Invokable invokable : getExposedInvokables(token)) {
-      logger.debug(
-          "Adding exposed types from {}, which is an exposed invokable on {}", invokable, clazz);
-      addExposedTypes(invokable, clazz);
-    }
-  }
-
-  private void addExposedTypes(Invokable<?, ?> invokable, Class<?> cause) {
-    addExposedTypes(invokable.getReturnType(), cause);
-    for (Annotation annotation : invokable.getAnnotations()) {
-      logger.debug(
-          "Adding exposed types from {}, which is an annotation on invokable {}",
-          annotation,
-          invokable);
-     addExposedTypes(annotation.annotationType(), cause);
-    }
-    for (Parameter parameter : invokable.getParameters()) {
-      logger.debug(
-          "Adding exposed types from {}, which is a parameter on invokable {}",
-          parameter,
-          invokable);
-      addExposedTypes(parameter, cause);
-    }
-    for (TypeToken<?> exceptionType : invokable.getExceptionTypes()) {
-      logger.debug(
-          "Adding exposed types from {}, which is an exception type on invokable {}",
-          exceptionType,
-          invokable);
-      addExposedTypes(exceptionType, cause);
-    }
-  }
-
-  private void addExposedTypes(Parameter parameter, Class<?> cause) {
-    logger.debug(
-        "Adding exposed types from {}, which is the type of parameter {}",
-        parameter.getType(),
-        parameter);
-    addExposedTypes(parameter.getType(), cause);
-    for (Annotation annotation : parameter.getAnnotations()) {
-      logger.debug(
-          "Adding exposed types from {}, which is an annotation on parameter {}",
-          annotation,
-          parameter);
-      addExposedTypes(annotation.annotationType(), cause);
-    }
-  }
-
-  private void addExposedTypes(Field field, Class<?> cause) {
-    addExposedTypes(field.getGenericType(), cause);
-    for (Annotation annotation : field.getDeclaredAnnotations()) {
-      logger.debug(
-          "Adding exposed types from {}, which is an annotation on field {}", annotation, field);
-      addExposedTypes(annotation.annotationType(), cause);
-    }
-  }
-
-  /**
-   * Returns an {@link Invokable} for each public methods or constructors of a type.
-   */
-  private Set<Invokable> getExposedInvokables(TypeToken<?> type) {
-    Set<Invokable> invokables = Sets.newHashSet();
-
-    for (Constructor constructor : type.getRawType().getConstructors()) {
-      if (0 != (constructor.getModifiers() & (Modifier.PUBLIC | Modifier.PROTECTED))) {
-        invokables.add(type.constructor(constructor));
-      }
-    }
-
-    for (Method method : type.getRawType().getMethods()) {
-      if (0 != (method.getModifiers() & (Modifier.PUBLIC | Modifier.PROTECTED))) {
-        invokables.add(type.method(method));
-      }
-    }
-
-    return invokables;
-  }
-
-  /**
-   * Returns true of the given modifier bitmap indicates exposure (public or protected access).
-   */
-  private boolean exposed(int modifiers) {
-    return 0 != (modifiers & (Modifier.PUBLIC | Modifier.PROTECTED));
-  }
-
-
-  ////////////////////////////////////////////////////////////////////////////
-
-  public static ApiSurface getSdkApiSurface() throws IOException {
-    return ApiSurface.ofPackage("com.google.cloud.dataflow")
-        .pruningPattern("com[.]google[.]cloud[.]dataflow.*Test")
-        .pruningPattern("com[.]google[.]cloud[.]dataflow.*Benchmark")
-        .pruningPrefix("com.google.cloud.dataflow.integration")
-        .pruningPrefix("java")
-        .pruningPrefix("com.google.api")
-        .pruningPrefix("com.google.auth")
-        .pruningPrefix("com.google.bigtable.v1")
-        .pruningPrefix("com.google.cloud.bigtable.config")
-        .pruningPrefix("com.google.cloud.bigtable.grpc.Bigtable*Name")
-        .pruningPrefix("com.google.protobuf")
-        .pruningPrefix("org.joda.time")
-        .pruningPrefix("org.apache.avro")
-        .pruningPrefix("org.junit")
-        .pruningPrefix("com.fasterxml.jackson.annotation");
-  }
-
-  public static void main(String[] args) throws Exception {
-    List<String> names = Lists.newArrayList();
-    for (Class clazz : getSdkApiSurface().getExposedClasses()) {
-      names.add(clazz.getName());
-    }
-    List<String> sortedNames = Lists.newArrayList(names);
-    Collections.sort(sortedNames);
-
-    for (String name : sortedNames) {
-      System.out.println(name);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
deleted file mode 100644
index c7fe4b4..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppEngineEnvironment.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.lang.reflect.InvocationTargetException;
-
-/** Stores whether we are running within AppEngine or not. */
-public class AppEngineEnvironment {
-  /**
-   * True if running inside of AppEngine, false otherwise.
-   */
-  @Deprecated
-  public static final boolean IS_APP_ENGINE = isAppEngine();
-
-  /**
-   * Attempts to detect whether we are inside of AppEngine.
-   *
-   * <p>Purposely copied and left private from private <a href="https://code.google.com/p/
-   * guava-libraries/source/browse/guava/src/com/google/common/util/concurrent/
-   * MoreExecutors.java#785">code.google.common.util.concurrent.MoreExecutors#isAppEngine</a>.
-   *
-   * @return true if we are inside of AppEngine, false otherwise.
-   */
-  static boolean isAppEngine() {
-    if (System.getProperty("com.google.appengine.runtime.environment") == null) {
-      return false;
-    }
-    try {
-      // If the current environment is null, we're not inside AppEngine.
-      return Class.forName("com.google.apphosting.api.ApiProxy")
-          .getMethod("getCurrentEnvironment")
-          .invoke(null) != null;
-    } catch (ClassNotFoundException e) {
-      // If ApiProxy doesn't exist, we're not on AppEngine at all.
-      return false;
-    } catch (InvocationTargetException e) {
-      // If ApiProxy throws an exception, we're not in a proper AppEngine environment.
-      return false;
-    } catch (IllegalAccessException e) {
-      // If the method isn't accessible, we're not on a supported version of AppEngine;
-      return false;
-    } catch (NoSuchMethodException e) {
-      // If the method doesn't exist, we're not on a supported version of AppEngine;
-      return false;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
deleted file mode 100644
index 512d72d..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AppliedCombineFn.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.common.annotations.VisibleForTesting;
-
-import java.io.Serializable;
-
-/**
- * A {@link KeyedCombineFnWithContext} with a fixed accumulator coder. This is created from a
- * specific application of the {@link KeyedCombineFnWithContext}.
- *
- *  <p>Because the {@code AccumT} may reference {@code InputT}, the specific {@code Coder<AccumT>}
- *  may depend on the {@code Coder<InputT>}.
- *
- * @param <K> type of keys
- * @param <InputT> type of input values
- * @param <AccumT> type of mutable accumulator values
- * @param <OutputT> type of output values
- */
-public class AppliedCombineFn<K, InputT, AccumT, OutputT> implements Serializable {
-
-  private final PerKeyCombineFn<K, InputT, AccumT, OutputT> fn;
-  private final Coder<AccumT> accumulatorCoder;
-
-  private final Iterable<PCollectionView<?>> sideInputViews;
-  private final KvCoder<K, InputT> kvCoder;
-  private final WindowingStrategy<?, ?> windowingStrategy;
-
-  private AppliedCombineFn(PerKeyCombineFn<K, InputT, AccumT, OutputT> fn,
-      Coder<AccumT> accumulatorCoder, Iterable<PCollectionView<?>> sideInputViews,
-      KvCoder<K, InputT> kvCoder, WindowingStrategy<?, ?> windowingStrategy) {
-    this.fn = fn;
-    this.accumulatorCoder = accumulatorCoder;
-    this.sideInputViews = sideInputViews;
-    this.kvCoder = kvCoder;
-    this.windowingStrategy = windowingStrategy;
-  }
-
-  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
-      withAccumulatorCoder(
-          PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
-          Coder<AccumT> accumCoder) {
-    return withAccumulatorCoder(fn, accumCoder, null, null, null);
-  }
-
-  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
-      withAccumulatorCoder(
-          PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
-          Coder<AccumT> accumCoder, Iterable<PCollectionView<?>> sideInputViews,
-          KvCoder<K, InputT> kvCoder, WindowingStrategy<?, ?> windowingStrategy) {
-    // Casting down the K and InputT is safe because they're only used as inputs.
-    @SuppressWarnings("unchecked")
-    PerKeyCombineFn<K, InputT, AccumT, OutputT> clonedFn =
-        (PerKeyCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
-    return create(clonedFn, accumCoder, sideInputViews, kvCoder, windowingStrategy);
-  }
-
-  @VisibleForTesting
-  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
-      withInputCoder(PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
-          CoderRegistry registry, KvCoder<K, InputT> kvCoder) {
-    return withInputCoder(fn, registry, kvCoder, null, null);
-  }
-
-  public static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT>
-      withInputCoder(PerKeyCombineFn<? super K, ? super InputT, AccumT, OutputT> fn,
-          CoderRegistry registry, KvCoder<K, InputT> kvCoder,
-          Iterable<PCollectionView<?>> sideInputViews, WindowingStrategy<?, ?> windowingStrategy) {
-    // Casting down the K and InputT is safe because they're only used as inputs.
-    @SuppressWarnings("unchecked")
-    PerKeyCombineFn<K, InputT, AccumT, OutputT> clonedFn =
-        (PerKeyCombineFn<K, InputT, AccumT, OutputT>) SerializableUtils.clone(fn);
-    try {
-      Coder<AccumT> accumulatorCoder = clonedFn.getAccumulatorCoder(
-          registry, kvCoder.getKeyCoder(), kvCoder.getValueCoder());
-      return create(clonedFn, accumulatorCoder, sideInputViews, kvCoder, windowingStrategy);
-    } catch (CannotProvideCoderException e) {
-      throw new IllegalStateException("Could not determine coder for accumulator", e);
-    }
-  }
-
-  private static <K, InputT, AccumT, OutputT> AppliedCombineFn<K, InputT, AccumT, OutputT> create(
-      PerKeyCombineFn<K, InputT, AccumT, OutputT> fn,
-      Coder<AccumT> accumulatorCoder, Iterable<PCollectionView<?>> sideInputViews,
-      KvCoder<K, InputT> kvCoder, WindowingStrategy<?, ?> windowingStrategy) {
-    return new AppliedCombineFn<>(
-        fn, accumulatorCoder, sideInputViews, kvCoder, windowingStrategy);
-  }
-
-  public PerKeyCombineFn<K, InputT, AccumT, OutputT> getFn() {
-    return fn;
-  }
-
-  public Iterable<PCollectionView<?>> getSideInputViews() {
-    return sideInputViews;
-  }
-
-  public Coder<AccumT> getAccumulatorCoder() {
-    return accumulatorCoder;
-  }
-
-  public KvCoder<K, InputT> getKvCoder() {
-    return kvCoder;
-  }
-
-  public WindowingStrategy<?, ?> getWindowingStrategy() {
-    return windowingStrategy;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
deleted file mode 100644
index ca59c53..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AssignWindowsDoFn.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-
-import org.joda.time.Instant;
-
-import java.util.Collection;
-
-/**
- * {@link DoFn} that tags elements of a PCollection with windows, according
- * to the provided {@link WindowFn}.
- * @param <T> Type of elements being windowed
- * @param <W> Window type
- */
-@SystemDoFnInternal
-public class AssignWindowsDoFn<T, W extends BoundedWindow> extends DoFn<T, T> {
-  private WindowFn<? super T, W> fn;
-
-  public AssignWindowsDoFn(WindowFn<? super T, W> fn) {
-    this.fn = fn;
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public void processElement(final ProcessContext c) throws Exception {
-    Collection<W> windows =
-        ((WindowFn<T, W>) fn).assignWindows(
-            ((WindowFn<T, W>) fn).new AssignContext() {
-                @Override
-                public T element() {
-                  return c.element();
-                }
-
-                @Override
-                public Instant timestamp() {
-                  return c.timestamp();
-                }
-
-                @Override
-                public Collection<? extends BoundedWindow> windows() {
-                  return c.windowingInternals().windows();
-                }
-              });
-
-    c.windowingInternals()
-        .outputWindowedValue(c.element(), c.timestamp(), windows, PaneInfo.NO_FIRING);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
deleted file mode 100644
index e94d414..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptAndTimeBoundedExponentialBackOff.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.BackOff;
-import com.google.api.client.util.NanoClock;
-import com.google.common.base.Preconditions;
-
-import java.util.concurrent.TimeUnit;
-
-/**
- * Extension of {@link AttemptBoundedExponentialBackOff} that bounds the total time that the backoff
- * is happening as well as the amount of retries. Acts exactly as a AttemptBoundedExponentialBackOff
- * unless the time interval has expired since the object was created. At this point, it will always
- * return BackOff.STOP. Calling reset() resets both the timer and the number of retry attempts,
- * unless a custom ResetPolicy (ResetPolicy.ATTEMPTS or ResetPolicy.TIMER) is passed to the
- * constructor.
- *
- * <p>Implementation is not thread-safe.
- */
-public class AttemptAndTimeBoundedExponentialBackOff extends AttemptBoundedExponentialBackOff {
-  private long endTimeMillis;
-  private long maximumTotalWaitTimeMillis;
-  private ResetPolicy resetPolicy;
-  private final NanoClock nanoClock;
-  // NanoClock.SYSTEM has a max elapsed time of 292 years or 2^63 ns.  Here, we choose 2^53 ns as
-  // a smaller but still huge limit.
-  private static final long MAX_ELAPSED_TIME_MILLIS = 1L << 53;
-
-  /**
-   * A ResetPolicy controls the behavior of this BackOff when reset() is called.  By default, both
-   * the number of attempts and the time bound for the BackOff are reset, but an alternative
-   * ResetPolicy may be set to only reset one of these two.
-   */
-  public static enum ResetPolicy {
-    ALL,
-    ATTEMPTS,
-    TIMER
-  }
-
-  /**
-   * Constructs an instance of AttemptAndTimeBoundedExponentialBackoff.
-   *
-   * @param maximumNumberOfAttempts The maximum number of attempts it will make.
-   * @param initialIntervalMillis The original interval to wait between attempts in milliseconds.
-   * @param maximumTotalWaitTimeMillis The maximum total time that this object will
-   *    allow more attempts in milliseconds.
-   */
-  public AttemptAndTimeBoundedExponentialBackOff(
-      int maximumNumberOfAttempts, long initialIntervalMillis, long maximumTotalWaitTimeMillis) {
-    this(
-        maximumNumberOfAttempts,
-        initialIntervalMillis,
-        maximumTotalWaitTimeMillis,
-        ResetPolicy.ALL,
-        NanoClock.SYSTEM);
-  }
-
-  /**
-   * Constructs an instance of AttemptAndTimeBoundedExponentialBackoff.
-   *
-   * @param maximumNumberOfAttempts The maximum number of attempts it will make.
-   * @param initialIntervalMillis The original interval to wait between attempts in milliseconds.
-   * @param maximumTotalWaitTimeMillis The maximum total time that this object will
-   *    allow more attempts in milliseconds.
-   * @param resetPolicy The ResetPolicy specifying the properties of this BackOff that are subject
-   *    to being reset.
-   */
-  public AttemptAndTimeBoundedExponentialBackOff(
-      int maximumNumberOfAttempts,
-      long initialIntervalMillis,
-      long maximumTotalWaitTimeMillis,
-      ResetPolicy resetPolicy) {
-    this(
-        maximumNumberOfAttempts,
-        initialIntervalMillis,
-        maximumTotalWaitTimeMillis,
-        resetPolicy,
-        NanoClock.SYSTEM);
-  }
-
-  /**
-   * Constructs an instance of AttemptAndTimeBoundedExponentialBackoff.
-   *
-   * @param maximumNumberOfAttempts The maximum number of attempts it will make.
-   * @param initialIntervalMillis The original interval to wait between attempts in milliseconds.
-   * @param maximumTotalWaitTimeMillis The maximum total time that this object will
-   *    allow more attempts in milliseconds.
-   * @param resetPolicy The ResetPolicy specifying the properties of this BackOff that are subject
-   *    to being reset.
-   * @param nanoClock clock used to measure the time that has passed.
-   */
-  public AttemptAndTimeBoundedExponentialBackOff(
-      int maximumNumberOfAttempts,
-      long initialIntervalMillis,
-      long maximumTotalWaitTimeMillis,
-      ResetPolicy resetPolicy,
-      NanoClock nanoClock) {
-    super(maximumNumberOfAttempts, initialIntervalMillis);
-    Preconditions.checkArgument(
-        maximumTotalWaitTimeMillis > 0, "Maximum total wait time must be greater than zero.");
-    Preconditions.checkArgument(
-        maximumTotalWaitTimeMillis < MAX_ELAPSED_TIME_MILLIS,
-        "Maximum total wait time must be less than " + MAX_ELAPSED_TIME_MILLIS + " milliseconds");
-    Preconditions.checkArgument(resetPolicy != null, "resetPolicy may not be null");
-    Preconditions.checkArgument(nanoClock != null, "nanoClock may not be null");
-    this.maximumTotalWaitTimeMillis = maximumTotalWaitTimeMillis;
-    this.resetPolicy = resetPolicy;
-    this.nanoClock = nanoClock;
-    // Set the end time for this BackOff.  Note that we cannot simply call reset() here since the
-    // resetPolicy may not be set to reset the time bound.
-    endTimeMillis = getTimeMillis() + maximumTotalWaitTimeMillis;
-  }
-
-  @Override
-  public void reset() {
-    // reset() is called in the constructor of the parent class before resetPolicy and nanoClock are
-    // set.  In this case, we call the parent class's reset() method and return.
-    if (resetPolicy == null) {
-      super.reset();
-      return;
-    }
-    // Reset the number of attempts.
-    if (resetPolicy == ResetPolicy.ALL || resetPolicy == ResetPolicy.ATTEMPTS) {
-      super.reset();
-    }
-    // Reset the time bound.
-    if (resetPolicy == ResetPolicy.ALL || resetPolicy == ResetPolicy.TIMER) {
-      endTimeMillis = getTimeMillis() + maximumTotalWaitTimeMillis;
-    }
-  }
-
-  public void setEndtimeMillis(long endTimeMillis) {
-    this.endTimeMillis = endTimeMillis;
-  }
-
-  @Override
-  public long nextBackOffMillis() {
-    if (atMaxAttempts()) {
-      return BackOff.STOP;
-    }
-    long backoff = Math.min(super.nextBackOffMillis(), endTimeMillis - getTimeMillis());
-    return (backoff > 0 ? backoff : BackOff.STOP);
-  }
-
-  private long getTimeMillis() {
-    return TimeUnit.NANOSECONDS.toMillis(nanoClock.nanoTime());
-  }
-
-  @Override
-  public boolean atMaxAttempts() {
-    return super.atMaxAttempts() || getTimeMillis() >= endTimeMillis;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
deleted file mode 100644
index 613316e..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AttemptBoundedExponentialBackOff.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.util.BackOff;
-import com.google.common.base.Preconditions;
-
-/**
- * Implementation of {@link BackOff} that increases the back off period for each retry attempt
- * using a randomization function that grows exponentially.
- *
- * <p>Example: The initial interval is .5 seconds and the maximum number of retries is 10.
- * For 10 tries the sequence will be (values in seconds):
- *
- * <pre>
- * retry#      retry_interval     randomized_interval
- * 1             0.5                [0.25,   0.75]
- * 2             0.75               [0.375,  1.125]
- * 3             1.125              [0.562,  1.687]
- * 4             1.687              [0.8435, 2.53]
- * 5             2.53               [1.265,  3.795]
- * 6             3.795              [1.897,  5.692]
- * 7             5.692              [2.846,  8.538]
- * 8             8.538              [4.269, 12.807]
- * 9            12.807              [6.403, 19.210]
- * 10           {@link BackOff#STOP}
- * </pre>
- *
- * <p>Implementation is not thread-safe.
- */
-public class AttemptBoundedExponentialBackOff implements BackOff {
-  public static final double DEFAULT_MULTIPLIER = 1.5;
-  public static final double DEFAULT_RANDOMIZATION_FACTOR = 0.5;
-  private final int maximumNumberOfAttempts;
-  private final long initialIntervalMillis;
-  private int currentAttempt;
-
-  public AttemptBoundedExponentialBackOff(int maximumNumberOfAttempts, long initialIntervalMillis) {
-    Preconditions.checkArgument(maximumNumberOfAttempts > 0,
-        "Maximum number of attempts must be greater than zero.");
-    Preconditions.checkArgument(initialIntervalMillis > 0,
-        "Initial interval must be greater than zero.");
-    this.maximumNumberOfAttempts = maximumNumberOfAttempts;
-    this.initialIntervalMillis = initialIntervalMillis;
-    reset();
-  }
-
-  @Override
-  public void reset() {
-    currentAttempt = 1;
-  }
-
-  @Override
-  public long nextBackOffMillis() {
-    if (currentAttempt >= maximumNumberOfAttempts) {
-      return BackOff.STOP;
-    }
-    double currentIntervalMillis = initialIntervalMillis
-        * Math.pow(DEFAULT_MULTIPLIER, currentAttempt - 1);
-    double randomOffset = (Math.random() * 2 - 1)
-        * DEFAULT_RANDOMIZATION_FACTOR * currentIntervalMillis;
-    currentAttempt += 1;
-    return Math.round(currentIntervalMillis + randomOffset);
-  }
-
-  public boolean atMaxAttempts() {
-    return currentAttempt >= maximumNumberOfAttempts;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
deleted file mode 100644
index c3a4861..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/AvroUtils.java
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.common.base.MoreObjects.firstNonNull;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Verify.verify;
-
-import com.google.api.services.bigquery.model.TableFieldSchema;
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Field;
-import org.apache.avro.Schema.Type;
-import org.apache.avro.file.DataFileConstants;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.DecoderFactory;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.util.Arrays;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
-/**
- * A set of utilities for working with Avro files.
- *
- * <p>These utilities are based on the <a
- * href="https://avro.apache.org/docs/1.7.7/spec.html">Avro 1.7.7</a> specification.
- */
-public class AvroUtils {
-
-  /**
-   * Avro file metadata.
-   */
-  public static class AvroMetadata {
-    private byte[] syncMarker;
-    private String codec;
-    private String schemaString;
-
-    AvroMetadata(byte[] syncMarker, String codec, String schemaString) {
-      this.syncMarker = syncMarker;
-      this.codec = codec;
-      this.schemaString = schemaString;
-    }
-
-    /**
-     * The JSON-encoded <a href="https://avro.apache.org/docs/1.7.7/spec.html#schemas">schema</a>
-     * string for the file.
-     */
-    public String getSchemaString() {
-      return schemaString;
-    }
-
-    /**
-     * The <a href="https://avro.apache.org/docs/1.7.7/spec.html#Required+Codecs">codec</a> of the
-     * file.
-     */
-    public String getCodec() {
-      return codec;
-    }
-
-    /**
-     * The 16-byte sync marker for the file.  See the documentation for
-     * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files">Object
-     * Container File</a> for more information.
-     */
-    public byte[] getSyncMarker() {
-      return syncMarker;
-    }
-  }
-
-  /**
-   * Reads the {@link AvroMetadata} from the header of an Avro file.
-   *
-   * <p>This method parses the header of an Avro
-   * <a href="https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files">
-   * Object Container File</a>.
-   *
-   * @throws IOException if the file is an invalid format.
-   */
-  public static AvroMetadata readMetadataFromFile(String fileName) throws IOException {
-    String codec = null;
-    String schemaString = null;
-    byte[] syncMarker;
-    try (InputStream stream =
-        Channels.newInputStream(IOChannelUtils.getFactory(fileName).open(fileName))) {
-      BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(stream, null);
-
-      // The header of an object container file begins with a four-byte magic number, followed
-      // by the file metadata (including the schema and codec), encoded as a map. Finally, the
-      // header ends with the file's 16-byte sync marker.
-      // See https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files for details on
-      // the encoding of container files.
-
-      // Read the magic number.
-      byte[] magic = new byte[DataFileConstants.MAGIC.length];
-      decoder.readFixed(magic);
-      if (!Arrays.equals(magic, DataFileConstants.MAGIC)) {
-        throw new IOException("Missing Avro file signature: " + fileName);
-      }
-
-      // Read the metadata to find the codec and schema.
-      ByteBuffer valueBuffer = ByteBuffer.allocate(512);
-      long numRecords = decoder.readMapStart();
-      while (numRecords > 0) {
-        for (long recordIndex = 0; recordIndex < numRecords; recordIndex++) {
-          String key = decoder.readString();
-          // readBytes() clears the buffer and returns a buffer where:
-          // - position is the start of the bytes read
-          // - limit is the end of the bytes read
-          valueBuffer = decoder.readBytes(valueBuffer);
-          byte[] bytes = new byte[valueBuffer.remaining()];
-          valueBuffer.get(bytes);
-          if (key.equals(DataFileConstants.CODEC)) {
-            codec = new String(bytes, "UTF-8");
-          } else if (key.equals(DataFileConstants.SCHEMA)) {
-            schemaString = new String(bytes, "UTF-8");
-          }
-        }
-        numRecords = decoder.mapNext();
-      }
-      if (codec == null) {
-        codec = DataFileConstants.NULL_CODEC;
-      }
-
-      // Finally, read the sync marker.
-      syncMarker = new byte[DataFileConstants.SYNC_SIZE];
-      decoder.readFixed(syncMarker);
-    }
-    return new AvroMetadata(syncMarker, codec, schemaString);
-  }
-
-  /**
-   * Formats BigQuery seconds-since-epoch into String matching JSON export. Thread-safe and
-   * immutable.
-   */
-  private static final DateTimeFormatter DATE_AND_SECONDS_FORMATTER =
-      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").withZoneUTC();
-  // Package private for BigQueryTableRowIterator to use.
-  static String formatTimestamp(String timestamp) {
-    // timestamp is in "seconds since epoch" format, with scientific notation.
-    // e.g., "1.45206229112345E9" to mean "2016-01-06 06:38:11.123456 UTC".
-    // Separate into seconds and microseconds.
-    double timestampDoubleMicros = Double.parseDouble(timestamp) * 1000000;
-    long timestampMicros = (long) timestampDoubleMicros;
-    long seconds = timestampMicros / 1000000;
-    int micros = (int) (timestampMicros % 1000000);
-    String dayAndTime = DATE_AND_SECONDS_FORMATTER.print(seconds * 1000);
-
-    // No sub-second component.
-    if (micros == 0) {
-      return String.format("%s UTC", dayAndTime);
-    }
-
-    // Sub-second component.
-    int digits = 6;
-    int subsecond = micros;
-    while (subsecond % 10 == 0) {
-      digits--;
-      subsecond /= 10;
-    }
-    String formatString = String.format("%%0%dd", digits);
-    String fractionalSeconds = String.format(formatString, subsecond);
-    return String.format("%s.%s UTC", dayAndTime, fractionalSeconds);
-  }
-
-  /**
-   * Utility function to convert from an Avro {@link GenericRecord} to a BigQuery {@link TableRow}.
-   *
-   * See <a href="https://cloud.google.com/bigquery/exporting-data-from-bigquery#config">
-   * "Avro format"</a> for more information.
-   */
-  public static TableRow convertGenericRecordToTableRow(GenericRecord record, TableSchema schema) {
-    return convertGenericRecordToTableRow(record, schema.getFields());
-  }
-
-  private static TableRow convertGenericRecordToTableRow(
-      GenericRecord record, List<TableFieldSchema> fields) {
-    TableRow row = new TableRow();
-    for (TableFieldSchema subSchema : fields) {
-      // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the name field
-      // is required, so it may not be null.
-      Field field = record.getSchema().getField(subSchema.getName());
-      Object convertedValue =
-          getTypedCellValue(field.schema(), subSchema, record.get(field.name()));
-      if (convertedValue != null) {
-        // To match the JSON files exported by BigQuery, do not include null values in the output.
-        row.set(field.name(), convertedValue);
-      }
-    }
-    return row;
-  }
-
-  @Nullable
-  private static Object getTypedCellValue(Schema schema, TableFieldSchema fieldSchema, Object v) {
-    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the mode field
-    // is optional (and so it may be null), but defaults to "NULLABLE".
-    String mode = firstNonNull(fieldSchema.getMode(), "NULLABLE");
-    switch (mode) {
-      case "REQUIRED":
-        return convertRequiredField(schema.getType(), fieldSchema, v);
-      case "REPEATED":
-        return convertRepeatedField(schema, fieldSchema, v);
-      case "NULLABLE":
-        return convertNullableField(schema, fieldSchema, v);
-      default:
-        throw new UnsupportedOperationException(
-            "Parsing a field with BigQuery field schema mode " + fieldSchema.getMode());
-    }
-  }
-
-  private static List<Object> convertRepeatedField(
-      Schema schema, TableFieldSchema fieldSchema, Object v) {
-    Type arrayType = schema.getType();
-    verify(
-        arrayType == Type.ARRAY,
-        "BigQuery REPEATED field %s should be Avro ARRAY, not %s",
-        fieldSchema.getName(),
-        arrayType);
-    // REPEATED fields are represented as Avro arrays.
-    if (v == null) {
-      // Handle the case of an empty repeated field.
-      return ImmutableList.of();
-    }
-    @SuppressWarnings("unchecked")
-    List<Object> elements = (List<Object>) v;
-    ImmutableList.Builder<Object> values = ImmutableList.builder();
-    Type elementType = schema.getElementType().getType();
-    for (Object element : elements) {
-      values.add(convertRequiredField(elementType, fieldSchema, element));
-    }
-    return values.build();
-  }
-
-  private static Object convertRequiredField(
-      Type avroType, TableFieldSchema fieldSchema, Object v) {
-    // REQUIRED fields are represented as the corresponding Avro types. For example, a BigQuery
-    // INTEGER type maps to an Avro LONG type.
-    checkNotNull(v, "REQUIRED field %s should not be null", fieldSchema.getName());
-    ImmutableMap<String, Type> fieldMap =
-        ImmutableMap.<String, Type>builder()
-            .put("STRING", Type.STRING)
-            .put("INTEGER", Type.LONG)
-            .put("FLOAT", Type.DOUBLE)
-            .put("BOOLEAN", Type.BOOLEAN)
-            .put("TIMESTAMP", Type.LONG)
-            .put("RECORD", Type.RECORD)
-            .build();
-    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the type field
-    // is required, so it may not be null.
-    String bqType = fieldSchema.getType();
-    Type expectedAvroType = fieldMap.get(bqType);
-    verify(
-        avroType == expectedAvroType,
-        "Expected Avro schema type %s, not %s, for BigQuery %s field %s",
-        expectedAvroType,
-        avroType,
-        bqType,
-        fieldSchema.getName());
-    switch (fieldSchema.getType()) {
-      case "STRING":
-        // Avro will use a CharSequence to represent String objects, but it may not always use
-        // java.lang.String; for example, it may prefer org.apache.avro.util.Utf8.
-        verify(v instanceof CharSequence, "Expected CharSequence (String), got %s", v.getClass());
-        return v.toString();
-      case "INTEGER":
-        verify(v instanceof Long, "Expected Long, got %s", v.getClass());
-        return ((Long) v).toString();
-      case "FLOAT":
-        verify(v instanceof Double, "Expected Double, got %s", v.getClass());
-        return v;
-      case "BOOLEAN":
-        verify(v instanceof Boolean, "Expected Boolean, got %s", v.getClass());
-        return v;
-      case "TIMESTAMP":
-        // TIMESTAMP data types are represented as Avro LONG types. They are converted back to
-        // Strings with variable-precision (up to six digits) to match the JSON files export
-        // by BigQuery.
-        verify(v instanceof Long, "Expected Long, got %s", v.getClass());
-        Double doubleValue = ((Long) v) / 1000000.0;
-        return formatTimestamp(doubleValue.toString());
-      case "RECORD":
-        verify(v instanceof GenericRecord, "Expected GenericRecord, got %s", v.getClass());
-        return convertGenericRecordToTableRow((GenericRecord) v, fieldSchema.getFields());
-      default:
-        throw new UnsupportedOperationException(
-            String.format(
-                "Unexpected BigQuery field schema type %s for field named %s",
-                fieldSchema.getType(),
-                fieldSchema.getName()));
-    }
-  }
-
-  @Nullable
-  private static Object convertNullableField(
-      Schema avroSchema, TableFieldSchema fieldSchema, Object v) {
-    // NULLABLE fields are represented as an Avro Union of the corresponding type and "null".
-    verify(
-        avroSchema.getType() == Type.UNION,
-        "Expected Avro schema type UNION, not %s, for BigQuery NULLABLE field %s",
-        avroSchema.getType(),
-        fieldSchema.getName());
-    List<Schema> unionTypes = avroSchema.getTypes();
-    verify(
-        unionTypes.size() == 2,
-        "BigQuery NULLABLE field %s should be an Avro UNION of NULL and another type, not %s",
-        fieldSchema.getName(),
-        unionTypes);
-
-    if (v == null) {
-      return null;
-    }
-
-    Type firstType = unionTypes.get(0).getType();
-    if (!firstType.equals(Type.NULL)) {
-      return convertRequiredField(firstType, fieldSchema, v);
-    }
-    return convertRequiredField(unionTypes.get(1).getType(), fieldSchema, v);
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
deleted file mode 100644
index 6a0ccf3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/BaseExecutionContext.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.util.common.worker.StateSampler;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Base class for implementations of {@link ExecutionContext}.
- *
- * <p>A concrete subclass should implement {@link #createStepContext} to create the appropriate
- * {@link StepContext} implementation. Any {@code StepContext} created will
- * be cached for the lifetime of this {@link ExecutionContext}.
- *
- * <p>BaseExecutionContext is generic to allow implementing subclasses to return a concrete subclass
- * of {@link StepContext} from {@link #getOrCreateStepContext(String, String, StateSampler)} and
- * {@link #getAllStepContexts()} without forcing each subclass to override the method, e.g.
- * <pre>
- * @Override
- * StreamingModeExecutionContext.StepContext getOrCreateStepContext(...) {
- *   return (StreamingModeExecutionContext.StepContext) super.getOrCreateStepContext(...);
- * }
- * </pre>
- *
- * <p>When a subclass of {@code BaseExecutionContext} has been downcast, the return types of
- * {@link #createStepContext(String, String, StateSampler)},
- * {@link #getOrCreateStepContext(String, String, StateSampler}, and {@link #getAllStepContexts()}
- * will be appropriately specialized.
- */
-public abstract class BaseExecutionContext<T extends ExecutionContext.StepContext>
-    implements ExecutionContext {
-
-  private Map<String, T> cachedStepContexts = new HashMap<>();
-
-  /**
-   * Implementations should override this to create the specific type
-   * of {@link StepContext} they need.
-   */
-  protected abstract T createStepContext(
-      String stepName, String transformName, StateSampler stateSampler);
-
-
-  /**
-   * Returns the {@link StepContext} associated with the given step.
-   */
-  @Override
-  public T getOrCreateStepContext(
-      String stepName, String transformName, StateSampler stateSampler) {
-    T context = cachedStepContexts.get(stepName);
-    if (context == null) {
-      context = createStepContext(stepName, transformName, stateSampler);
-      cachedStepContexts.put(stepName, context);
-    }
-    return context;
-  }
-
-  /**
-   * Returns a collection view of all of the {@link StepContext}s.
-   */
-  @Override
-  public Collection<? extends T> getAllStepContexts() {
-    return Collections.unmodifiableCollection(cachedStepContexts.values());
-  }
-
-  /**
-   * Hook for subclasses to implement that will be called whenever
-   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#output}
-   * is called.
-   */
-  @Override
-  public void noteOutput(WindowedValue<?> output) {}
-
-  /**
-   * Hook for subclasses to implement that will be called whenever
-   * {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context#sideOutput}
-   * is called.
-   */
-  @Override
-  public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {}
-
-  /**
-   * Base class for implementations of {@link ExecutionContext.StepContext}.
-   *
-   * <p>To complete a concrete subclass, implement {@link #timerInternals} and
-   * {@link #stateInternals}.
-   */
-  public abstract static class StepContext implements ExecutionContext.StepContext {
-    private final ExecutionContext executionContext;
-    private final String stepName;
-    private final String transformName;
-
-    public StepContext(ExecutionContext executionContext, String stepName, String transformName) {
-      this.executionContext = executionContext;
-      this.stepName = stepName;
-      this.transformName = transformName;
-    }
-
-    @Override
-    public String getStepName() {
-      return stepName;
-    }
-
-    @Override
-    public String getTransformName() {
-      return transformName;
-    }
-
-    @Override
-    public void noteOutput(WindowedValue<?> output) {
-      executionContext.noteOutput(output);
-    }
-
-    @Override
-    public void noteSideOutput(TupleTag<?> tag, WindowedValue<?> output) {
-      executionContext.noteSideOutput(tag, output);
-    }
-
-    @Override
-    public <T, W extends BoundedWindow> void writePCollectionViewData(
-        TupleTag<?> tag,
-        Iterable<WindowedValue<T>> data, Coder<Iterable<WindowedValue<T>>> dataCoder,
-        W window, Coder<W> windowCoder) throws IOException {
-      throw new UnsupportedOperationException("Not implemented.");
-    }
-
-    @Override
-    public abstract StateInternals<?> stateInternals();
-
-    @Override
-    public abstract TimerInternals timerInternals();
-  }
-}

[23/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
deleted file mode 100644
index a74e5bf..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Create.java
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue;
-import com.google.cloud.dataflow.sdk.values.TimestampedValue.TimestampedValueCoder;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.Function;
-import com.google.common.base.Optional;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Iterables;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-
-/**
- * {@code Create<T>} takes a collection of elements of type {@code T}
- * known when the pipeline is constructed and returns a
- * {@code PCollection<T>} containing the elements.
- *
- * <p>Example of use:
- * <pre> {@code
- * Pipeline p = ...;
- *
- * PCollection<Integer> pc = p.apply(Create.of(3, 4, 5).withCoder(BigEndianIntegerCoder.of()));
- *
- * Map<String, Integer> map = ...;
- * PCollection<KV<String, Integer>> pt =
- *     p.apply(Create.of(map)
- *      .withCoder(KvCoder.of(StringUtf8Coder.of(),
- *                            BigEndianIntegerCoder.of())));
- * } </pre>
- *
- * <p>{@code Create} can automatically determine the {@code Coder} to use
- * if all elements have the same run-time class, and a default coder is registered for that
- * class. See {@link CoderRegistry} for details on how defaults are determined.
- *
- * <p>If a coder can not be inferred, {@link Create.Values#withCoder} must be called
- * explicitly to set the encoding of the resulting
- * {@code PCollection}.
- *
- * <p>A good use for {@code Create} is when a {@code PCollection}
- * needs to be created without dependencies on files or other external
- * entities.  This is especially useful during testing.
- *
- * <p>Caveat: {@code Create} only supports small in-memory datasets,
- * particularly when submitting jobs to the Google Cloud Dataflow
- * service.
- *
- * @param <T> the type of the elements of the resulting {@code PCollection}
- */
-public class Create<T> {
-  /**
-   * Returns a new {@code Create.Values} transform that produces a
-   * {@link PCollection} containing elements of the provided
-   * {@code Iterable}.
-   *
-   * <p>The argument should not be modified after this is called.
-   *
-   * <p>The elements of the output {@link PCollection} will have a timestamp of negative infinity,
-   * see {@link Create#timestamped} for a way of creating a {@code PCollection} with timestamped
-   * elements.
-   *
-   * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-   * if all elements have the same non-parameterized run-time class, and a default coder is
-   * registered for that class. See {@link CoderRegistry} for details on how defaults are
-   * determined.
-   * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
-   */
-  public static <T> Values<T> of(Iterable<T> elems) {
-    return new Values<>(elems, Optional.<Coder<T>>absent());
-  }
-
-  /**
-   * Returns a new {@code Create.Values} transform that produces a
-   * {@link PCollection} containing the specified elements.
-   *
-   * <p>The elements will have a timestamp of negative infinity, see
-   * {@link Create#timestamped} for a way of creating a {@code PCollection}
-   * with timestamped elements.
-   *
-   * <p>The arguments should not be modified after this is called.
-   *
-   * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-   * if all elements have the same non-parameterized run-time class, and a default coder is
-   * registered for that class. See {@link CoderRegistry} for details on how defaults are
-   * determined.
-   * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
-   */
-  @SafeVarargs
-  public static <T> Values<T> of(T... elems) {
-    return of(Arrays.asList(elems));
-  }
-
-  /**
-   * Returns a new {@code Create.Values} transform that produces a
-   * {@link PCollection} of {@link KV}s corresponding to the keys and
-   * values of the specified {@code Map}.
-   *
-   * <p>The elements will have a timestamp of negative infinity, see
-   * {@link Create#timestamped} for a way of creating a {@code PCollection}
-   * with timestamped elements.
-   *
-   * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-   * if all elements have the same non-parameterized run-time class, and a default coder is
-   * registered for that class. See {@link CoderRegistry} for details on how defaults are
-   * determined.
-   * Otherwise, use {@link Create.Values#withCoder} to set the coder explicitly.
-   */
-  public static <K, V> Values<KV<K, V>> of(Map<K, V> elems) {
-    List<KV<K, V>> kvs = new ArrayList<>(elems.size());
-    for (Map.Entry<K, V> entry : elems.entrySet()) {
-      kvs.add(KV.of(entry.getKey(), entry.getValue()));
-    }
-    return of(kvs);
-  }
-
-  /**
-   * Returns a new {@link Create.TimestampedValues} transform that produces a
-   * {@link PCollection} containing the elements of the provided {@code Iterable}
-   * with the specified timestamps.
-   *
-   * <p>The argument should not be modified after this is called.
-   *
-   * <p>By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
-   * to use if all elements have the same non-parameterized run-time class, and a default coder is
-   * registered for that class. See {@link CoderRegistry} for details on how defaults are
-   * determined.
-   * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
-   */
-  public static <T> TimestampedValues<T> timestamped(Iterable<TimestampedValue<T>> elems) {
-    return new TimestampedValues<>(elems, Optional.<Coder<T>>absent());
-  }
-
-  /**
-   * Returns a new {@link Create.TimestampedValues} transform that produces a {@link PCollection}
-   * containing the specified elements with the specified timestamps.
-   *
-   * <p>The arguments should not be modified after this is called.
-   */
-  @SafeVarargs
-  public static <T> TimestampedValues<T> timestamped(
-      @SuppressWarnings("unchecked") TimestampedValue<T>... elems) {
-    return timestamped(Arrays.asList(elems));
-  }
-
-  /**
-   * Returns a new root transform that produces a {@link PCollection} containing
-   * the specified elements with the specified timestamps.
-   *
-   * <p>The arguments should not be modified after this is called.
-   *
-   * <p>By default, {@code Create.TimestampedValues} can automatically determine the {@code Coder}
-   * to use if all elements have the same non-parameterized run-time class, and a default coder
-   * is registered for that class. See {@link CoderRegistry} for details on how defaults are
-   * determined.
-   * Otherwise, use {@link Create.TimestampedValues#withCoder} to set the coder explicitly.
-
-   * @throws IllegalArgumentException if there are a different number of values
-   * and timestamps
-   */
-  public static <T> TimestampedValues<T> timestamped(
-      Iterable<T> values, Iterable<Long> timestamps) {
-    List<TimestampedValue<T>> elems = new ArrayList<>();
-    Iterator<T> valueIter = values.iterator();
-    Iterator<Long> timestampIter = timestamps.iterator();
-    while (valueIter.hasNext() && timestampIter.hasNext()) {
-      elems.add(TimestampedValue.of(valueIter.next(), new Instant(timestampIter.next())));
-    }
-    Preconditions.checkArgument(
-        !valueIter.hasNext() && !timestampIter.hasNext(),
-        "Expect sizes of values and timestamps are same.");
-    return timestamped(elems);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code PTransform} that creates a {@code PCollection} from a set of in-memory objects.
-   */
-  public static class Values<T> extends PTransform<PInput, PCollection<T>> {
-    /**
-     * Returns a {@link Create.Values} PTransform like this one that uses the given
-     * {@code Coder<T>} to decode each of the objects into a
-     * value of type {@code T}.
-     *
-     * <p>By default, {@code Create.Values} can automatically determine the {@code Coder} to use
-     * if all elements have the same non-parameterized run-time class, and a default coder is
-     * registered for that class. See {@link CoderRegistry} for details on how defaults are
-     * determined.
-     *
-     * <p>Note that for {@link Create.Values} with no elements, the {@link VoidCoder} is used.
-     */
-    public Values<T> withCoder(Coder<T> coder) {
-      return new Values<>(elems, Optional.of(coder));
-    }
-
-    public Iterable<T> getElements() {
-      return elems;
-    }
-
-    @Override
-    public PCollection<T> apply(PInput input) {
-      try {
-        Coder<T> coder = getDefaultOutputCoder(input);
-        return PCollection
-            .<T>createPrimitiveOutputInternal(
-                input.getPipeline(),
-                WindowingStrategy.globalDefault(),
-                IsBounded.BOUNDED)
-            .setCoder(coder);
-      } catch (CannotProvideCoderException e) {
-        throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
-            + "Please set a coder by invoking Create.withCoder() explicitly.", e);
-      }
-    }
-
-    @Override
-    public Coder<T> getDefaultOutputCoder(PInput input) throws CannotProvideCoderException {
-      if (coder.isPresent()) {
-        return coder.get();
-      }
-      // First try to deduce a coder using the types of the elements.
-      Class<?> elementClazz = Void.class;
-      for (T elem : elems) {
-        if (elem == null) {
-          continue;
-        }
-        Class<?> clazz = elem.getClass();
-        if (elementClazz.equals(Void.class)) {
-          elementClazz = clazz;
-        } else if (!elementClazz.equals(clazz)) {
-          // Elements are not the same type, require a user-specified coder.
-          throw new CannotProvideCoderException(
-              "Cannot provide coder for Create: The elements are not all of the same class.");
-        }
-      }
-
-      if (elementClazz.getTypeParameters().length == 0) {
-        try {
-          @SuppressWarnings("unchecked") // elementClazz is a wildcard type
-          Coder<T> coder = (Coder<T>) input.getPipeline().getCoderRegistry()
-              .getDefaultCoder(TypeDescriptor.of(elementClazz));
-          return coder;
-        } catch (CannotProvideCoderException exc) {
-          // let the next stage try
-        }
-      }
-
-      // If that fails, try to deduce a coder using the elements themselves
-      Optional<Coder<T>> coder = Optional.absent();
-      for (T elem : elems) {
-        Coder<T> c = input.getPipeline().getCoderRegistry().getDefaultCoder(elem);
-        if (!coder.isPresent()) {
-          coder = Optional.of(c);
-        } else if (!Objects.equals(c, coder.get())) {
-          throw new CannotProvideCoderException(
-              "Cannot provide coder for elements of " + Create.class.getSimpleName() + ":"
-              + " For their common class, no coder could be provided."
-              + " Based on their values, they do not all default to the same Coder.");
-        }
-      }
-
-      if (!coder.isPresent()) {
-        throw new CannotProvideCoderException("Unable to infer a coder. Please register "
-            + "a coder for ");
-      }
-      return coder.get();
-    }
-
-    /////////////////////////////////////////////////////////////////////////////
-
-    /** The elements of the resulting PCollection. */
-    private final transient Iterable<T> elems;
-
-    /** The coder used to encode the values to and from a binary representation. */
-    private final transient Optional<Coder<T>> coder;
-
-    /**
-     * Constructs a {@code Create.Values} transform that produces a
-     * {@link PCollection} containing the specified elements.
-     *
-     * <p>The arguments should not be modified after this is called.
-     */
-    private Values(Iterable<T> elems, Optional<Coder<T>> coder) {
-      this.elems = elems;
-      this.coder = coder;
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code PTransform} that creates a {@code PCollection} whose elements have
-   * associated timestamps.
-   */
-  public static class TimestampedValues<T> extends Values<T> {
-    /**
-     * Returns a {@link Create.TimestampedValues} PTransform like this one that uses the given
-     * {@code Coder<T>} to decode each of the objects into a
-     * value of type {@code T}.
-     *
-     * <p>By default, {@code Create.TimestampedValues} can automatically determine the
-     * {@code Coder} to use if all elements have the same non-parameterized run-time class,
-     * and a default coder is registered for that class. See {@link CoderRegistry} for details
-     * on how defaults are determined.
-     *
-     * <p>Note that for {@link Create.TimestampedValues with no elements}, the {@link VoidCoder}
-     * is used.
-     */
-    @Override
-    public TimestampedValues<T> withCoder(Coder<T> coder) {
-      return new TimestampedValues<>(elems, Optional.<Coder<T>>of(coder));
-    }
-
-    @Override
-    public PCollection<T> apply(PInput input) {
-      try {
-        Coder<T> coder = getDefaultOutputCoder(input);
-        PCollection<TimestampedValue<T>> intermediate = Pipeline.applyTransform(input,
-            Create.of(elems).withCoder(TimestampedValueCoder.of(coder)));
-
-        PCollection<T> output = intermediate.apply(ParDo.of(new ConvertTimestamps<T>()));
-        output.setCoder(coder);
-        return output;
-      } catch (CannotProvideCoderException e) {
-        throw new IllegalArgumentException("Unable to infer a coder and no Coder was specified. "
-            + "Please set a coder by invoking CreateTimestamped.withCoder() explicitly.", e);
-      }
-    }
-
-    /////////////////////////////////////////////////////////////////////////////
-
-    /** The timestamped elements of the resulting PCollection. */
-    private final transient Iterable<TimestampedValue<T>> elems;
-
-    private TimestampedValues(Iterable<TimestampedValue<T>> elems,
-        Optional<Coder<T>> coder) {
-      super(
-          Iterables.transform(elems, new Function<TimestampedValue<T>, T>() {
-            @Override
-            public T apply(TimestampedValue<T> input) {
-              return input.getValue();
-            }
-          }), coder);
-      this.elems = elems;
-    }
-
-    private static class ConvertTimestamps<T> extends DoFn<TimestampedValue<T>, T> {
-      @Override
-      public void processElement(ProcessContext c) {
-        c.outputWithTimestamp(c.element().getValue(), c.element().getTimestamp());
-      }
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  static {
-    registerDefaultTransformEvaluator();
-  }
-
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  private static void registerDefaultTransformEvaluator() {
-    DirectPipelineRunner.registerDefaultTransformEvaluator(
-        Create.Values.class,
-        new DirectPipelineRunner.TransformEvaluator<Create.Values>() {
-          @Override
-          public void evaluate(
-              Create.Values transform,
-              DirectPipelineRunner.EvaluationContext context) {
-            evaluateHelper(transform, context);
-          }
-        });
-  }
-
-  private static <T> void evaluateHelper(
-      Create.Values<T> transform,
-      DirectPipelineRunner.EvaluationContext context) {
-    // Convert the Iterable of elems into a List of elems.
-    List<T> listElems;
-    if (transform.elems instanceof Collection) {
-      Collection<T> collectionElems = (Collection<T>) transform.elems;
-      listElems = new ArrayList<>(collectionElems.size());
-    } else {
-      listElems = new ArrayList<>();
-    }
-    for (T elem : transform.elems) {
-      listElems.add(
-          context.ensureElementEncodable(context.getOutput(transform), elem));
-    }
-    context.setPCollection(context.getOutput(transform), listElems);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
deleted file mode 100644
index 5ba9992..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFn.java
+++ /dev/null
@@ -1,563 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
-import com.google.cloud.dataflow.sdk.transforms.display.HasDisplayData;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.MoreObjects;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.io.Serializable;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Objects;
-import java.util.UUID;
-
-/**
- * The argument to {@link ParDo} providing the code to use to process
- * elements of the input
- * {@link com.google.cloud.dataflow.sdk.values.PCollection}.
- *
- * <p>See {@link ParDo} for more explanation, examples of use, and
- * discussion of constraints on {@code DoFn}s, including their
- * serializability, lack of access to global shared mutable state,
- * requirements for failure tolerance, and benefits of optimization.
- *
- * <p>{@code DoFn}s can be tested in the context of a particular
- * {@code Pipeline} by running that {@code Pipeline} on sample input
- * and then checking its output.  Unit testing of a {@code DoFn},
- * separately from any {@code ParDo} transform or {@code Pipeline},
- * can be done via the {@link DoFnTester} harness.
- *
- * <p>{@link DoFnWithContext} (currently experimental) offers an alternative
- * mechanism for accessing {@link ProcessContext#window()} without the need
- * to implement {@link RequiresWindowAccess}.
- *
- * <p>See also {@link #processElement} for details on implementing the transformation
- * from {@code InputT} to {@code OutputT}.
- *
- * @param <InputT> the type of the (main) input elements
- * @param <OutputT> the type of the (main) output elements
- */
-public abstract class DoFn<InputT, OutputT> implements Serializable, HasDisplayData {
-
-  /**
-   * Information accessible to all methods in this {@code DoFn}.
-   * Used primarily to output elements.
-   */
-  public abstract class Context {
-
-    /**
-     * Returns the {@code PipelineOptions} specified with the
-     * {@link com.google.cloud.dataflow.sdk.runners.PipelineRunner}
-     * invoking this {@code DoFn}.  The {@code PipelineOptions} will
-     * be the default running via {@link DoFnTester}.
-     */
-    public abstract PipelineOptions getPipelineOptions();
-
-    /**
-     * Adds the given element to the main output {@code PCollection}.
-     *
-     * <p>Once passed to {@code output} the element should be considered
-     * immutable and not be modified in any way. It may be cached or retained
-     * by the Dataflow runtime or later steps in the pipeline, or used in
-     * other unspecified ways.
-     *
-     * <p>If invoked from {@link DoFn#processElement processElement}, the output
-     * element will have the same timestamp and be in the same windows
-     * as the input element passed to {@link DoFn#processElement processElement}.
-     *
-     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element. The output element
-     * will have a timestamp of negative infinity.
-     */
-    public abstract void output(OutputT output);
-
-    /**
-     * Adds the given element to the main output {@code PCollection},
-     * with the given timestamp.
-     *
-     * <p>Once passed to {@code outputWithTimestamp} the element should not be
-     * modified in any way.
-     *
-     * <p>If invoked from {@link DoFn#processElement processElement}, the timestamp
-     * must not be older than the input element's timestamp minus
-     * {@link DoFn#getAllowedTimestampSkew getAllowedTimestampSkew}.  The output element will
-     * be in the same windows as the input element.
-     *
-     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element except for the
-     * timestamp.
-     */
-    public abstract void outputWithTimestamp(OutputT output, Instant timestamp);
-
-    /**
-     * Adds the given element to the side output {@code PCollection} with the
-     * given tag.
-     *
-     * <p>Once passed to {@code sideOutput} the element should not be modified
-     * in any way.
-     *
-     * <p>The caller of {@code ParDo} uses {@link ParDo#withOutputTags withOutputTags} to
-     * specify the tags of side outputs that it consumes. Non-consumed side
-     * outputs, e.g., outputs for monitoring purposes only, don't necessarily
-     * need to be specified.
-     *
-     * <p>The output element will have the same timestamp and be in the same
-     * windows as the input element passed to {@link DoFn#processElement processElement}.
-     *
-     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element. The output element
-     * will have a timestamp of negative infinity.
-     *
-     * @see ParDo#withOutputTags
-     */
-    public abstract <T> void sideOutput(TupleTag<T> tag, T output);
-
-    /**
-     * Adds the given element to the specified side output {@code PCollection},
-     * with the given timestamp.
-     *
-     * <p>Once passed to {@code sideOutputWithTimestamp} the element should not be
-     * modified in any way.
-     *
-     * <p>If invoked from {@link DoFn#processElement processElement}, the timestamp
-     * must not be older than the input element's timestamp minus
-     * {@link DoFn#getAllowedTimestampSkew getAllowedTimestampSkew}.  The output element will
-     * be in the same windows as the input element.
-     *
-     * <p>If invoked from {@link #startBundle startBundle} or {@link #finishBundle finishBundle},
-     * this will attempt to use the
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-     * of the input {@code PCollection} to determine what windows the element
-     * should be in, throwing an exception if the {@code WindowFn} attempts
-     * to access any information about the input element except for the
-     * timestamp.
-     *
-     * @see ParDo#withOutputTags
-     */
-    public abstract <T> void sideOutputWithTimestamp(
-        TupleTag<T> tag, T output, Instant timestamp);
-
-    /**
-     * Creates an {@link Aggregator} in the {@link DoFn} context with the
-     * specified name and aggregation logic specified by {@link CombineFn}.
-     *
-     * <p>For internal use only.
-     *
-     * @param name the name of the aggregator
-     * @param combiner the {@link CombineFn} to use in the aggregator
-     * @return an aggregator for the provided name and {@link CombineFn} in this
-     *         context
-     */
-    @Experimental(Kind.AGGREGATOR)
-    protected abstract <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
-        createAggregatorInternal(String name, CombineFn<AggInputT, ?, AggOutputT> combiner);
-
-    /**
-     * Sets up {@link Aggregator}s created by the {@link DoFn} so they are
-     * usable within this context.
-     *
-     * <p>This method should be called by runners before {@link DoFn#startBundle}
-     * is executed.
-     */
-    @Experimental(Kind.AGGREGATOR)
-    protected final void setupDelegateAggregators() {
-      for (DelegatingAggregator<?, ?> aggregator : aggregators.values()) {
-        setupDelegateAggregator(aggregator);
-      }
-
-      aggregatorsAreFinal = true;
-    }
-
-    private final <AggInputT, AggOutputT> void setupDelegateAggregator(
-        DelegatingAggregator<AggInputT, AggOutputT> aggregator) {
-
-      Aggregator<AggInputT, AggOutputT> delegate = createAggregatorInternal(
-          aggregator.getName(), aggregator.getCombineFn());
-
-      aggregator.setDelegate(delegate);
-    }
-  }
-
-  /**
-   * Information accessible when running {@link DoFn#processElement}.
-   */
-  public abstract class ProcessContext extends Context {
-
-    /**
-     * Returns the input element to be processed.
-     *
-     * <p>The element should be considered immutable. The Dataflow runtime will not mutate the
-     * element, so it is safe to cache, etc. The element should not be mutated by any of the
-     * {@link DoFn} methods, because it may be cached elsewhere, retained by the Dataflow runtime,
-     * or used in other unspecified ways.
-     */
-    public abstract InputT element();
-
-    /**
-     * Returns the value of the side input for the window corresponding to the
-     * window of the main input element.
-     *
-     * <p>See
-     * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn#getSideInputWindow}
-     * for how this corresponding window is determined.
-     *
-     * @throws IllegalArgumentException if this is not a side input
-     * @see ParDo#withSideInputs
-     */
-    public abstract <T> T sideInput(PCollectionView<T> view);
-
-    /**
-     * Returns the timestamp of the input element.
-     *
-     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
-     * for more information.
-     */
-    public abstract Instant timestamp();
-
-    /**
-     * Returns the window into which the input element has been assigned.
-     *
-     * <p>See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
-     * for more information.
-     *
-     * @throws UnsupportedOperationException if this {@link DoFn} does
-     * not implement {@link RequiresWindowAccess}.
-     */
-    public abstract BoundedWindow window();
-
-    /**
-     * Returns information about the pane within this window into which the
-     * input element has been assigned.
-     *
-     * <p>Generally all data is in a single, uninteresting pane unless custom
-     * triggering and/or late data has been explicitly requested.
-     * See {@link com.google.cloud.dataflow.sdk.transforms.windowing.Window}
-     * for more information.
-     */
-    public abstract PaneInfo pane();
-
-    /**
-     * Returns the process context to use for implementing windowing.
-     */
-    @Experimental
-    public abstract WindowingInternals<InputT, OutputT> windowingInternals();
-  }
-
-  /**
-   * Returns the allowed timestamp skew duration, which is the maximum
-   * duration that timestamps can be shifted backward in
-   * {@link DoFn.Context#outputWithTimestamp}.
-   *
-   * <p>The default value is {@code Duration.ZERO}, in which case
-   * timestamps can only be shifted forward to future.  For infinite
-   * skew, return {@code Duration.millis(Long.MAX_VALUE)}.
-   *
-   * <p> Note that producing an element whose timestamp is less than the
-   * current timestamp may result in late data, i.e. returning a non-zero
-   * value here does not impact watermark calculations used for firing
-   * windows.
-   *
-   * @deprecated does not interact well with the watermark.
-   */
-  @Deprecated
-  public Duration getAllowedTimestampSkew() {
-    return Duration.ZERO;
-  }
-
-  /**
-   * Interface for signaling that a {@link DoFn} needs to access the window the
-   * element is being processed in, via {@link DoFn.ProcessContext#window}.
-   */
-  @Experimental
-  public interface RequiresWindowAccess {}
-
-  public DoFn() {
-    this(new HashMap<String, DelegatingAggregator<?, ?>>());
-  }
-
-  DoFn(Map<String, DelegatingAggregator<?, ?>> aggregators) {
-    this.aggregators = aggregators;
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private final Map<String, DelegatingAggregator<?, ?>> aggregators;
-
-  /**
-   * Protects aggregators from being created after initialization.
-   */
-  private boolean aggregatorsAreFinal;
-
-  /**
-   * Prepares this {@code DoFn} instance for processing a batch of elements.
-   *
-   * <p>By default, does nothing.
-   */
-  public void startBundle(Context c) throws Exception {
-  }
-
-  /**
-   * Processes one input element.
-   *
-   * <p>The current element of the input {@code PCollection} is returned by
-   * {@link ProcessContext#element() c.element()}. It should be considered immutable. The Dataflow
-   * runtime will not mutate the element, so it is safe to cache, etc. The element should not be
-   * mutated by any of the {@link DoFn} methods, because it may be cached elsewhere, retained by the
-   * Dataflow runtime, or used in other unspecified ways.
-   *
-   * <p>A value is added to the main output {@code PCollection} by {@link ProcessContext#output}.
-   * Once passed to {@code output} the element should be considered immutable and not be modified in
-   * any way. It may be cached elsewhere, retained by the Dataflow runtime, or used in other
-   * unspecified ways.
-   *
-   * @see ProcessContext
-   */
-  public abstract void processElement(ProcessContext c) throws Exception;
-
-  /**
-   * Finishes processing this batch of elements.
-   *
-   * <p>By default, does nothing.
-   */
-  public void finishBundle(Context c) throws Exception {
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * <p>By default, does not register any display data. Implementors may override this method
-   * to provide their own display metadata.
-   */
-  @Override
-  public void populateDisplayData(DisplayData.Builder builder) {
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns a {@link TypeDescriptor} capturing what is known statically
-   * about the input type of this {@code DoFn} instance's most-derived
-   * class.
-   *
-   * <p>See {@link #getOutputTypeDescriptor} for more discussion.
-   */
-  protected TypeDescriptor<InputT> getInputTypeDescriptor() {
-    return new TypeDescriptor<InputT>(getClass()) {};
-  }
-
-  /**
-   * Returns a {@link TypeDescriptor} capturing what is known statically
-   * about the output type of this {@code DoFn} instance's
-   * most-derived class.
-   *
-   * <p>In the normal case of a concrete {@code DoFn} subclass with
-   * no generic type parameters of its own (including anonymous inner
-   * classes), this will be a complete non-generic type, which is good
-   * for choosing a default output {@code Coder<OutputT>} for the output
-   * {@code PCollection<OutputT>}.
-   */
-  protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
-    return new TypeDescriptor<OutputT>(getClass()) {};
-  }
-
-  /**
-   * Returns an {@link Aggregator} with aggregation logic specified by the
-   * {@link CombineFn} argument. The name provided must be unique across
-   * {@link Aggregator}s created within the DoFn. Aggregators can only be created
-   * during pipeline construction.
-   *
-   * @param name the name of the aggregator
-   * @param combiner the {@link CombineFn} to use in the aggregator
-   * @return an aggregator for the provided name and combiner in the scope of
-   *         this DoFn
-   * @throws NullPointerException if the name or combiner is null
-   * @throws IllegalArgumentException if the given name collides with another
-   *         aggregator in this scope
-   * @throws IllegalStateException if called during pipeline processing.
-   */
-  protected final <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
-      createAggregator(String name, CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
-    checkNotNull(name, "name cannot be null");
-    checkNotNull(combiner, "combiner cannot be null");
-    checkArgument(!aggregators.containsKey(name),
-        "Cannot create aggregator with name %s."
-        + " An Aggregator with that name already exists within this scope.",
-        name);
-
-    checkState(!aggregatorsAreFinal, "Cannot create an aggregator during DoFn processing."
-        + " Aggregators should be registered during pipeline construction.");
-
-    DelegatingAggregator<AggInputT, AggOutputT> aggregator =
-        new DelegatingAggregator<>(name, combiner);
-    aggregators.put(name, aggregator);
-    return aggregator;
-  }
-
-  /**
-   * Returns an {@link Aggregator} with the aggregation logic specified by the
-   * {@link SerializableFunction} argument. The name provided must be unique
-   * across {@link Aggregator}s created within the DoFn. Aggregators can only be
-   * created during pipeline construction.
-   *
-   * @param name the name of the aggregator
-   * @param combiner the {@link SerializableFunction} to use in the aggregator
-   * @return an aggregator for the provided name and combiner in the scope of
-   *         this DoFn
-   * @throws NullPointerException if the name or combiner is null
-   * @throws IllegalArgumentException if the given name collides with another
-   *         aggregator in this scope
-   * @throws IllegalStateException if called during pipeline processing.
-   */
-  protected final <AggInputT> Aggregator<AggInputT, AggInputT> createAggregator(String name,
-      SerializableFunction<Iterable<AggInputT>, AggInputT> combiner) {
-    checkNotNull(combiner, "combiner cannot be null.");
-    return createAggregator(name, Combine.IterableCombineFn.of(combiner));
-  }
-
-  /**
-   * Returns the {@link Aggregator Aggregators} created by this {@code DoFn}.
-   */
-  Collection<Aggregator<?, ?>> getAggregators() {
-    return Collections.<Aggregator<?, ?>>unmodifiableCollection(aggregators.values());
-  }
-
-  /**
-   * An {@link Aggregator} that delegates calls to addValue to another
-   * aggregator.
-   *
-   * @param <AggInputT> the type of input element
-   * @param <AggOutputT> the type of output element
-   */
-  static class DelegatingAggregator<AggInputT, AggOutputT> implements
-      Aggregator<AggInputT, AggOutputT>, Serializable {
-    private final UUID id;
-
-    private final String name;
-
-    private final CombineFn<AggInputT, ?, AggOutputT> combineFn;
-
-    private Aggregator<AggInputT, ?> delegate;
-
-    public DelegatingAggregator(String name,
-        CombineFn<? super AggInputT, ?, AggOutputT> combiner) {
-      this.id = UUID.randomUUID();
-      this.name = checkNotNull(name, "name cannot be null");
-      // Safe contravariant cast
-      @SuppressWarnings("unchecked")
-      CombineFn<AggInputT, ?, AggOutputT> specificCombiner =
-          (CombineFn<AggInputT, ?, AggOutputT>) checkNotNull(combiner, "combineFn cannot be null");
-      this.combineFn = specificCombiner;
-    }
-
-    @Override
-    public void addValue(AggInputT value) {
-      if (delegate == null) {
-        throw new IllegalStateException(
-            "addValue cannot be called on Aggregator outside of the execution of a DoFn.");
-      } else {
-        delegate.addValue(value);
-      }
-    }
-
-    @Override
-    public String getName() {
-      return name;
-    }
-
-    @Override
-    public CombineFn<AggInputT, ?, AggOutputT> getCombineFn() {
-      return combineFn;
-    }
-
-    /**
-     * Sets the current delegate of the Aggregator.
-     *
-     * @param delegate the delegate to set in this aggregator
-     */
-    public void setDelegate(Aggregator<AggInputT, ?> delegate) {
-      this.delegate = delegate;
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("name", name)
-          .add("combineFn", combineFn)
-          .toString();
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(id, name, combineFn.getClass());
-    }
-
-    /**
-     * Indicates whether some other object is "equal to" this one.
-     *
-     * <p>{@code DelegatingAggregator} instances are equal if they have the same name, their
-     * CombineFns are the same class, and they have identical IDs.
-     */
-    @Override
-    public boolean equals(Object o) {
-      if (o == this) {
-        return true;
-      }
-      if (o == null) {
-        return false;
-      }
-      if (o instanceof DelegatingAggregator) {
-        DelegatingAggregator<?, ?> that = (DelegatingAggregator<?, ?>) o;
-        return Objects.equals(this.id, that.id)
-            && Objects.equals(this.name, that.name)
-            && Objects.equals(this.combineFn.getClass(), that.combineFn.getClass());
-      }
-      return false;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
deleted file mode 100644
index 1c46541..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnReflector.java
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ExtraContextFactory;
-import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.FinishBundle;
-import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessElement;
-import com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.StartBundle;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.util.WindowingInternals;
-import com.google.cloud.dataflow.sdk.util.common.ReflectHelpers;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Function;
-import com.google.common.base.Throwables;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.reflect.TypeParameter;
-import com.google.common.reflect.TypeToken;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.lang.annotation.Annotation;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.Type;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * Utility implementing the necessary reflection for working with {@link DoFnWithContext}s.
- */
-public abstract class DoFnReflector {
-
-  private interface ExtraContextInfo {
-    /**
-     * Create an instance of the given instance using the instance factory.
-     */
-    <InputT, OutputT> Object createInstance(
-        DoFnWithContext.ExtraContextFactory<InputT, OutputT> factory);
-
-    /**
-     * Create the type token for the given type, filling in the generics.
-     */
-    <InputT, OutputT> TypeToken<?> tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out);
-  }
-
-  private static final Map<Class<?>, ExtraContextInfo> EXTRA_CONTEXTS = Collections.emptyMap();
-  private static final Map<Class<?>, ExtraContextInfo> EXTRA_PROCESS_CONTEXTS =
-      ImmutableMap.<Class<?>, ExtraContextInfo>builder()
-      .putAll(EXTRA_CONTEXTS)
-      .put(BoundedWindow.class, new ExtraContextInfo() {
-        @Override
-        public <InputT, OutputT> Object
-            createInstance(ExtraContextFactory<InputT, OutputT> factory) {
-          return factory.window();
-        }
-
-        @Override
-        public <InputT, OutputT> TypeToken<?>
-            tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out) {
-          return TypeToken.of(BoundedWindow.class);
-        }
-      })
-      .put(WindowingInternals.class, new ExtraContextInfo() {
-        @Override
-        public <InputT, OutputT> Object
-            createInstance(ExtraContextFactory<InputT, OutputT> factory) {
-          return factory.windowingInternals();
-        }
-
-        @Override
-        public <InputT, OutputT> TypeToken<?>
-            tokenFor(TypeToken<InputT> in, TypeToken<OutputT> out) {
-          return new TypeToken<WindowingInternals<InputT, OutputT>>() {
-            }
-          .where(new TypeParameter<InputT>() {}, in)
-          .where(new TypeParameter<OutputT>() {}, out);
-        }
-      })
-      .build();
-
-  /**
-   * @return true if the reflected {@link DoFnWithContext} uses a Single Window.
-   */
-  public abstract boolean usesSingleWindow();
-
-  /**
-   * Invoke the reflected {@link ProcessElement} method on the given instance.
-   *
-   * @param fn an instance of the {@link DoFnWithContext} to invoke {@link ProcessElement} on.
-   * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.ProcessContext}
-   *     to pass to {@link ProcessElement}.
-   */
-  abstract <InputT, OutputT> void invokeProcessElement(
-      DoFnWithContext<InputT, OutputT> fn,
-      DoFnWithContext<InputT, OutputT>.ProcessContext c,
-      ExtraContextFactory<InputT, OutputT> extra);
-
-  /**
-   * Invoke the reflected {@link StartBundle} method on the given instance.
-   *
-   * @param fn an instance of the {@link DoFnWithContext} to invoke {@link StartBundle} on.
-   * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context}
-   *     to pass to {@link StartBundle}.
-   */
-  <InputT, OutputT> void invokeStartBundle(
-     DoFnWithContext<InputT, OutputT> fn,
-     DoFnWithContext<InputT, OutputT>.Context c,
-     ExtraContextFactory<InputT, OutputT> extra) {
-    fn.prepareForProcessing();
-  }
-
-  /**
-   * Invoke the reflected {@link FinishBundle} method on the given instance.
-   *
-   * @param fn an instance of the {@link DoFnWithContext} to invoke {@link FinishBundle} on.
-   * @param c the {@link com.google.cloud.dataflow.sdk.transforms.DoFnWithContext.Context}
-   *     to pass to {@link FinishBundle}.
-   */
-  abstract <InputT, OutputT> void invokeFinishBundle(
-      DoFnWithContext<InputT, OutputT> fn,
-      DoFnWithContext<InputT, OutputT>.Context c,
-      ExtraContextFactory<InputT, OutputT> extra);
-
-  private static final Map<Class<?>, DoFnReflector> REFLECTOR_CACHE =
-      new LinkedHashMap<Class<?>, DoFnReflector>();
-
-  /**
-   * @return the {@link DoFnReflector} for the given {@link DoFnWithContext}.
-   */
-  public static DoFnReflector of(
-      @SuppressWarnings("rawtypes") Class<? extends DoFnWithContext> fn) {
-    DoFnReflector reflector = REFLECTOR_CACHE.get(fn);
-    if (reflector != null) {
-      return reflector;
-    }
-
-    reflector = new GenericDoFnReflector(fn);
-    REFLECTOR_CACHE.put(fn, reflector);
-    return reflector;
-  }
-
-  /**
-   * Create a {@link DoFn} that the {@link DoFnWithContext}.
-   */
-  public <InputT, OutputT> DoFn<InputT, OutputT> toDoFn(DoFnWithContext<InputT, OutputT> fn) {
-    if (usesSingleWindow()) {
-      return new WindowDoFnAdapter<InputT, OutputT>(this, fn);
-    } else {
-      return new SimpleDoFnAdapter<InputT, OutputT>(this, fn);
-    }
-  }
-
-  private static String formatType(TypeToken<?> t) {
-    return ReflectHelpers.TYPE_SIMPLE_DESCRIPTION.apply(t.getType());
-  }
-
-  private static String format(Method m) {
-    return ReflectHelpers.CLASS_AND_METHOD_FORMATTER.apply(m);
-  }
-
-  private static Collection<String> describeSupportedTypes(
-      Map<Class<?>, ExtraContextInfo> extraProcessContexts,
-      final TypeToken<?> in, final TypeToken<?> out) {
-    return FluentIterable
-        .from(extraProcessContexts.values())
-        .transform(new Function<ExtraContextInfo, String>() {
-          @Override
-          @Nullable
-          public String apply(@Nullable ExtraContextInfo input) {
-            if (input == null) {
-              return null;
-            } else {
-              return formatType(input.tokenFor(in, out));
-            }
-          }
-        })
-        .toSortedSet(String.CASE_INSENSITIVE_ORDER);
-  }
-
-  @VisibleForTesting
-  static <InputT, OutputT> ExtraContextInfo[] verifyProcessMethodArguments(Method m) {
-    return verifyMethodArguments(m,
-        EXTRA_PROCESS_CONTEXTS,
-        new TypeToken<DoFnWithContext<InputT, OutputT>.ProcessContext>() {
-          },
-        new TypeParameter<InputT>() {},
-        new TypeParameter<OutputT>() {});
-  }
-
-  @VisibleForTesting
-  static <InputT, OutputT> ExtraContextInfo[] verifyBundleMethodArguments(Method m) {
-    return verifyMethodArguments(m,
-        EXTRA_CONTEXTS,
-        new TypeToken<DoFnWithContext<InputT, OutputT>.Context>() {
-          },
-        new TypeParameter<InputT>() {},
-        new TypeParameter<OutputT>() {});
-  }
-
-  /**
-   * Verify the method arguments for a given {@link DoFnWithContext} method.
-   *
-   * <p>The requirements for a method to be valid, are:
-   * <ol>
-   * <li>The method has at least one argument.
-   * <li>The first argument is of type firstContextArg.
-   * <li>The remaining arguments have raw types that appear in {@code contexts}
-   * <li>Any generics on the extra context arguments match what is expected. Eg.,
-   *     {@code WindowingInternals<InputT, OutputT>} either matches the
-   *     {@code InputT} and {@code OutputT} parameters of the
-   *     {@code DoFn<InputT, OutputT>.ProcessContext}, or it uses a wildcard, etc.
-   * </ol>
-   *
-   * @param m the method to verify
-   * @param contexts mapping from raw classes to the {@link ExtraContextInfo} used
-   *     to create new instances.
-   * @param firstContextArg the expected type of the first context argument
-   * @param iParam TypeParameter representing the input type
-   * @param oParam TypeParameter representing the output type
-   */
-  @VisibleForTesting static <InputT, OutputT> ExtraContextInfo[] verifyMethodArguments(Method m,
-      Map<Class<?>, ExtraContextInfo> contexts,
-      TypeToken<?> firstContextArg, TypeParameter<InputT> iParam, TypeParameter<OutputT> oParam) {
-
-    if (!void.class.equals(m.getReturnType())) {
-      throw new IllegalStateException(String.format(
-          "%s must have a void return type", format(m)));
-    }
-    if (m.isVarArgs()) {
-      throw new IllegalStateException(String.format(
-          "%s must not have var args", format(m)));
-    }
-
-    // The first parameter must be present, and must be the specified type
-    Type[] params = m.getGenericParameterTypes();
-    TypeToken<?> contextToken = null;
-    if (params.length > 0) {
-      contextToken = TypeToken.of(params[0]);
-    }
-    if (contextToken == null
-        || !contextToken.getRawType().equals(firstContextArg.getRawType())) {
-      throw new IllegalStateException(String.format(
-          "%s must take a %s as its first argument",
-          format(m), firstContextArg.getRawType().getSimpleName()));
-    }
-    ExtraContextInfo[] contextInfos = new ExtraContextInfo[params.length - 1];
-
-    // Fill in the generics in the allExtraContextArgs interface from the types in the
-    // Context or ProcessContext DoFn.
-    ParameterizedType pt = (ParameterizedType) contextToken.getType();
-    // We actually want the owner, since ProcessContext and Context are owned by DoFnWithContext.
-    pt = (ParameterizedType) pt.getOwnerType();
-    @SuppressWarnings("unchecked")
-    TypeToken<InputT> iActual = (TypeToken<InputT>) TypeToken.of(pt.getActualTypeArguments()[0]);
-    @SuppressWarnings("unchecked")
-    TypeToken<OutputT> oActual = (TypeToken<OutputT>) TypeToken.of(pt.getActualTypeArguments()[1]);
-
-    // All of the remaining parameters must be a super-interface of allExtraContextArgs
-    // that is not listed in the EXCLUDED_INTERFACES set.
-    for (int i = 1; i < params.length; i++) {
-      TypeToken<?> param = TypeToken.of(params[i]);
-
-      ExtraContextInfo info = contexts.get(param.getRawType());
-      if (info == null) {
-        throw new IllegalStateException(String.format(
-            "%s is not a valid context parameter for method %s. Should be one of %s",
-            formatType(param), format(m),
-            describeSupportedTypes(contexts, iActual, oActual)));
-      }
-
-      // If we get here, the class matches, but maybe the generics don't:
-      TypeToken<?> expected = info.tokenFor(iActual, oActual);
-      if (!expected.isSubtypeOf(param)) {
-        throw new IllegalStateException(String.format(
-            "Incompatible generics in context parameter %s for method %s. Should be %s",
-            formatType(param), format(m), formatType(info.tokenFor(iActual, oActual))));
-      }
-
-      // Register the (now validated) context info
-      contextInfos[i - 1] = info;
-    }
-    return contextInfos;
-  }
-
-  /**
-   * Implementation of {@link DoFnReflector} for the arbitrary {@link DoFnWithContext}.
-   */
-  private static class GenericDoFnReflector extends DoFnReflector {
-
-    private Method startBundle;
-    private Method processElement;
-    private Method finishBundle;
-    private ExtraContextInfo[] processElementArgs;
-    private ExtraContextInfo[] startBundleArgs;
-    private ExtraContextInfo[] finishBundleArgs;
-
-    private GenericDoFnReflector(Class<?> fn) {
-      // Locate the annotated methods
-      this.processElement = findAnnotatedMethod(ProcessElement.class, fn, true);
-      this.startBundle = findAnnotatedMethod(StartBundle.class, fn, false);
-      this.finishBundle = findAnnotatedMethod(FinishBundle.class, fn, false);
-
-      // Verify that their method arguments satisfy our conditions.
-      processElementArgs = verifyProcessMethodArguments(processElement);
-      if (startBundle != null) {
-        startBundleArgs = verifyBundleMethodArguments(startBundle);
-      }
-      if (finishBundle != null) {
-        finishBundleArgs = verifyBundleMethodArguments(finishBundle);
-      }
-    }
-
-    private static Collection<Method> declaredMethodsWithAnnotation(
-        Class<? extends Annotation> anno,
-        Class<?> startClass, Class<?> stopClass) {
-      Collection<Method> matches = new ArrayList<>();
-
-      Class<?> clazz = startClass;
-      LinkedHashSet<Class<?>> interfaces = new LinkedHashSet<>();
-
-      // First, find all declared methods on the startClass and parents (up to stopClass)
-      while (clazz != null && !clazz.equals(stopClass)) {
-        for (Method method : clazz.getDeclaredMethods()) {
-          if (method.isAnnotationPresent(anno)) {
-            matches.add(method);
-          }
-        }
-
-        Collections.addAll(interfaces, clazz.getInterfaces());
-
-        clazz = clazz.getSuperclass();
-      }
-
-      // Now, iterate over all the discovered interfaces
-      for (Method method : ReflectHelpers.getClosureOfMethodsOnInterfaces(interfaces)) {
-        if (method.isAnnotationPresent(anno)) {
-          matches.add(method);
-        }
-      }
-      return matches;
-    }
-
-    private static Method findAnnotatedMethod(
-        Class<? extends Annotation> anno, Class<?> fnClazz, boolean required) {
-      Collection<Method> matches = declaredMethodsWithAnnotation(
-          anno, fnClazz, DoFnWithContext.class);
-
-      if (matches.size() == 0) {
-        if (required == true) {
-          throw new IllegalStateException(String.format(
-              "No method annotated with @%s found in %s",
-              anno.getSimpleName(), fnClazz.getName()));
-        } else {
-          return null;
-        }
-      }
-
-      // If we have at least one match, then either it should be the only match
-      // or it should be an extension of the other matches (which came from parent
-      // classes).
-      Method first = matches.iterator().next();
-      for (Method other : matches) {
-        if (!first.getName().equals(other.getName())
-            || !Arrays.equals(first.getParameterTypes(), other.getParameterTypes())) {
-          throw new IllegalStateException(String.format(
-              "Found multiple methods annotated with @%s. [%s] and [%s]",
-              anno.getSimpleName(), format(first), format(other)));
-        }
-      }
-
-      // We need to be able to call it. We require it is public.
-      if ((first.getModifiers() & Modifier.PUBLIC) == 0) {
-        throw new IllegalStateException(format(first) + " must be public");
-      }
-
-      // And make sure its not static.
-      if ((first.getModifiers() & Modifier.STATIC) != 0) {
-        throw new IllegalStateException(format(first) + " must not be static");
-      }
-
-      first.setAccessible(true);
-      return first;
-    }
-
-    @Override
-    public boolean usesSingleWindow() {
-      return usesContext(BoundedWindow.class);
-    }
-
-    private boolean usesContext(Class<?> context) {
-      for (Class<?> clazz : processElement.getParameterTypes()) {
-        if (clazz.equals(context)) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    @Override
-    <InputT, OutputT> void invokeProcessElement(
-        DoFnWithContext<InputT, OutputT> fn,
-        DoFnWithContext<InputT, OutputT>.ProcessContext c,
-        ExtraContextFactory<InputT, OutputT> extra) {
-      invoke(processElement, fn, c, extra, processElementArgs);
-    }
-
-    @Override
-    <InputT, OutputT> void invokeStartBundle(
-        DoFnWithContext<InputT, OutputT> fn,
-        DoFnWithContext<InputT, OutputT>.Context c,
-        ExtraContextFactory<InputT, OutputT> extra) {
-      super.invokeStartBundle(fn, c, extra);
-      if (startBundle != null) {
-        invoke(startBundle, fn, c, extra, startBundleArgs);
-      }
-    }
-
-    @Override
-    <InputT, OutputT> void invokeFinishBundle(
-        DoFnWithContext<InputT, OutputT> fn,
-        DoFnWithContext<InputT, OutputT>.Context c,
-        ExtraContextFactory<InputT, OutputT> extra) {
-      if (finishBundle != null) {
-        invoke(finishBundle, fn, c, extra, finishBundleArgs);
-      }
-    }
-
-    private <InputT, OutputT> void invoke(Method m,
-        DoFnWithContext<InputT, OutputT> on,
-        DoFnWithContext<InputT, OutputT>.Context contextArg,
-        ExtraContextFactory<InputT, OutputT> extraArgFactory,
-        ExtraContextInfo[] extraArgs) {
-
-      Class<?>[] parameterTypes = m.getParameterTypes();
-      Object[] args = new Object[parameterTypes.length];
-      args[0] = contextArg;
-      for (int i = 1; i < args.length; i++) {
-        args[i] = extraArgs[i - 1].createInstance(extraArgFactory);
-      }
-
-      try {
-        m.invoke(on, args);
-      } catch (InvocationTargetException e) {
-        // Exception in user code.
-        throw UserCodeException.wrap(e.getCause());
-      } catch (IllegalAccessException | IllegalArgumentException e) {
-        // Exception in our code.
-        throw Throwables.propagate(e);
-      }
-    }
-  }
-
-  private static class ContextAdapter<InputT, OutputT>
-      extends DoFnWithContext<InputT, OutputT>.Context
-      implements DoFnWithContext.ExtraContextFactory<InputT, OutputT> {
-
-    private DoFn<InputT, OutputT>.Context context;
-
-    private ContextAdapter(
-        DoFnWithContext<InputT, OutputT> fn, DoFn<InputT, OutputT>.Context context) {
-      fn.super();
-      this.context = context;
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return context.getPipelineOptions();
-    }
-
-    @Override
-    public void output(OutputT output) {
-      context.output(output);
-    }
-
-    @Override
-    public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      context.outputWithTimestamp(output, timestamp);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      context.sideOutput(tag, output);
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      context.sideOutputWithTimestamp(tag, output, timestamp);
-    }
-
-    @Override
-    public BoundedWindow window() {
-      // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
-      // should be unreachable.
-      throw new UnsupportedOperationException("Can only get the window in ProcessElements");
-    }
-
-    @Override
-    public WindowingInternals<InputT, OutputT> windowingInternals() {
-      // The DoFnWithContext doesn't allow us to ask for these outside ProcessElements, so this
-      // should be unreachable.
-      throw new UnsupportedOperationException(
-          "Can only get the windowingInternals in ProcessElements");
-    }
-  }
-
-  private static class ProcessContextAdapter<InputT, OutputT>
-      extends DoFnWithContext<InputT, OutputT>.ProcessContext
-      implements DoFnWithContext.ExtraContextFactory<InputT, OutputT> {
-
-    private DoFn<InputT, OutputT>.ProcessContext context;
-
-    private ProcessContextAdapter(
-        DoFnWithContext<InputT, OutputT> fn,
-        DoFn<InputT, OutputT>.ProcessContext context) {
-      fn.super();
-      this.context = context;
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return context.getPipelineOptions();
-    }
-
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      return context.sideInput(view);
-    }
-
-    @Override
-    public void output(OutputT output) {
-      context.output(output);
-    }
-
-    @Override
-    public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      context.outputWithTimestamp(output, timestamp);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      context.sideOutput(tag, output);
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      context.sideOutputWithTimestamp(tag, output, timestamp);
-    }
-
-    @Override
-    public InputT element() {
-      return context.element();
-    }
-
-    @Override
-    public Instant timestamp() {
-      return context.timestamp();
-    }
-
-    @Override
-    public PaneInfo pane() {
-      return context.pane();
-    }
-
-    @Override
-    public BoundedWindow window() {
-      return context.window();
-    }
-
-    @Override
-    public WindowingInternals<InputT, OutputT> windowingInternals() {
-      return context.windowingInternals();
-    }
-  }
-
-  public static Class<?> getDoFnClass(DoFn<?, ?> fn) {
-    if (fn instanceof SimpleDoFnAdapter) {
-      return ((SimpleDoFnAdapter<?, ?>) fn).fn.getClass();
-    } else {
-      return fn.getClass();
-    }
-  }
-
-  private static class SimpleDoFnAdapter<InputT, OutputT> extends DoFn<InputT, OutputT> {
-
-    private transient DoFnReflector reflector;
-    private DoFnWithContext<InputT, OutputT> fn;
-
-    private SimpleDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
-      super(fn.aggregators);
-      this.reflector = reflector;
-      this.fn = fn;
-    }
-
-    @Override
-    public void startBundle(DoFn<InputT, OutputT>.Context c) throws Exception {
-      ContextAdapter<InputT, OutputT> adapter = new ContextAdapter<>(fn, c);
-      reflector.invokeStartBundle(fn, (DoFnWithContext<InputT, OutputT>.Context) adapter, adapter);
-    }
-
-    @Override
-    public void finishBundle(DoFn<InputT, OutputT>.Context c) throws Exception {
-      ContextAdapter<InputT, OutputT> adapter = new ContextAdapter<>(fn, c);
-      reflector.invokeFinishBundle(fn, (DoFnWithContext<InputT, OutputT>.Context) adapter, adapter);
-    }
-
-    @Override
-    public void processElement(DoFn<InputT, OutputT>.ProcessContext c) throws Exception {
-      ProcessContextAdapter<InputT, OutputT> adapter = new ProcessContextAdapter<>(fn, c);
-      reflector.invokeProcessElement(
-          fn, (DoFnWithContext<InputT, OutputT>.ProcessContext) adapter, adapter);
-    }
-
-    @Override
-    protected TypeDescriptor<InputT> getInputTypeDescriptor() {
-      return fn.getInputTypeDescriptor();
-    }
-
-    @Override
-    protected TypeDescriptor<OutputT> getOutputTypeDescriptor() {
-      return fn.getOutputTypeDescriptor();
-    }
-
-    private void readObject(java.io.ObjectInputStream in)
-        throws IOException, ClassNotFoundException {
-      in.defaultReadObject();
-      reflector = DoFnReflector.of(fn.getClass());
-    }
-  }
-
-  private static class WindowDoFnAdapter<InputT, OutputT>
-  extends SimpleDoFnAdapter<InputT, OutputT> implements DoFn.RequiresWindowAccess {
-
-    private WindowDoFnAdapter(DoFnReflector reflector, DoFnWithContext<InputT, OutputT> fn) {
-      super(reflector, fn);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
deleted file mode 100644
index 5447664..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/DoFnTester.java
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.util.DirectModeExecutionContext;
-import com.google.cloud.dataflow.sdk.util.DirectSideInputReader;
-import com.google.cloud.dataflow.sdk.util.DoFnRunner;
-import com.google.cloud.dataflow.sdk.util.DoFnRunnerBase;
-import com.google.cloud.dataflow.sdk.util.DoFnRunners;
-import com.google.cloud.dataflow.sdk.util.PTuple;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.util.common.CounterSet;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-import com.google.common.base.Function;
-import com.google.common.base.Objects;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-
-import org.joda.time.Instant;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A harness for unit-testing a {@link DoFn}.
- *
- * <p>For example:
- *
- * <pre> {@code
- * DoFn<InputT, OutputT> fn = ...;
- *
- * DoFnTester<InputT, OutputT> fnTester = DoFnTester.of(fn);
- *
- * // Set arguments shared across all batches:
- * fnTester.setSideInputs(...);      // If fn takes side inputs.
- * fnTester.setSideOutputTags(...);  // If fn writes to side outputs.
- *
- * // Process a batch containing a single input element:
- * Input testInput = ...;
- * List<OutputT> testOutputs = fnTester.processBatch(testInput);
- * Assert.assertThat(testOutputs,
- *                   JUnitMatchers.hasItems(...));
- *
- * // Process a bigger batch:
- * Assert.assertThat(fnTester.processBatch(i1, i2, ...),
- *                   JUnitMatchers.hasItems(...));
- * } </pre>
- *
- * @param <InputT> the type of the {@code DoFn}'s (main) input elements
- * @param <OutputT> the type of the {@code DoFn}'s (main) output elements
- */
-public class DoFnTester<InputT, OutputT> {
-  /**
-   * Returns a {@code DoFnTester} supporting unit-testing of the given
-   * {@link DoFn}.
-   */
-  @SuppressWarnings("unchecked")
-  public static <InputT, OutputT> DoFnTester<InputT, OutputT> of(DoFn<InputT, OutputT> fn) {
-    return new DoFnTester<InputT, OutputT>(fn);
-  }
-
-  /**
-   * Returns a {@code DoFnTester} supporting unit-testing of the given
-   * {@link DoFn}.
-   */
-  @SuppressWarnings("unchecked")
-  public static <InputT, OutputT> DoFnTester<InputT, OutputT>
-      of(DoFnWithContext<InputT, OutputT> fn) {
-    return new DoFnTester<InputT, OutputT>(DoFnReflector.of(fn.getClass()).toDoFn(fn));
-  }
-
-  /**
-   * Registers the tuple of values of the side input {@link PCollectionView}s to
-   * pass to the {@link DoFn} under test.
-   *
-   * <p>If needed, first creates a fresh instance of the {@link DoFn}
-   * under test.
-   *
-   * <p>If this isn't called, {@code DoFnTester} assumes the
-   * {@link DoFn} takes no side inputs.
-   */
-  public void setSideInputs(Map<PCollectionView<?>, Iterable<WindowedValue<?>>> sideInputs) {
-    this.sideInputs = sideInputs;
-    resetState();
-  }
-
-  /**
-   * Registers the values of a side input {@link PCollectionView} to
-   * pass to the {@link DoFn} under test.
-   *
-   * <p>If needed, first creates a fresh instance of the {@code DoFn}
-   * under test.
-   *
-   * <p>If this isn't called, {@code DoFnTester} assumes the
-   * {@code DoFn} takes no side inputs.
-   */
-  public void setSideInput(PCollectionView<?> sideInput, Iterable<WindowedValue<?>> value) {
-    sideInputs.put(sideInput, value);
-  }
-
-  /**
-   * Registers the values for a side input {@link PCollectionView} to
-   * pass to the {@link DoFn} under test. All values are placed
-   * in the global window.
-   */
-  public void setSideInputInGlobalWindow(
-      PCollectionView<?> sideInput,
-      Iterable<?> value) {
-    sideInputs.put(
-        sideInput,
-        Iterables.transform(value, new Function<Object, WindowedValue<?>>() {
-          @Override
-          public WindowedValue<?> apply(Object input) {
-            return WindowedValue.valueInGlobalWindow(input);
-          }
-        }));
-  }
-
-
-  /**
-   * Registers the list of {@code TupleTag}s that can be used by the
-   * {@code DoFn} under test to output to side output
-   * {@code PCollection}s.
-   *
-   * <p>If needed, first creates a fresh instance of the DoFn under test.
-   *
-   * <p>If this isn't called, {@code DoFnTester} assumes the
-   * {@code DoFn} doesn't emit to any side outputs.
-   */
-  public void setSideOutputTags(TupleTagList sideOutputTags) {
-    this.sideOutputTags = sideOutputTags.getAll();
-    resetState();
-  }
-
-  /**
-   * A convenience operation that first calls {@link #startBundle},
-   * then calls {@link #processElement} on each of the input elements, then
-   * calls {@link #finishBundle}, then returns the result of
-   * {@link #takeOutputElements}.
-   */
-  public List<OutputT> processBatch(Iterable <? extends InputT> inputElements) {
-    startBundle();
-    for (InputT inputElement : inputElements) {
-      processElement(inputElement);
-    }
-    finishBundle();
-    return takeOutputElements();
-  }
-
-  /**
-   * A convenience method for testing {@link DoFn DoFns} with bundles of elements.
-   * Logic proceeds as follows:
-   *
-   * <ol>
-   *   <li>Calls {@link #startBundle}.</li>
-   *   <li>Calls {@link #processElement} on each of the arguments.<li>
-   *   <li>Calls {@link #finishBundle}.</li>
-   *   <li>Returns the result of {@link #takeOutputElements}.</li>
-   * </ol>
-   */
-  @SafeVarargs
-  public final List<OutputT> processBatch(InputT... inputElements) {
-    return processBatch(Arrays.asList(inputElements));
-  }
-
-  /**
-   * Calls {@link DoFn#startBundle} on the {@code DoFn} under test.
-   *
-   * <p>If needed, first creates a fresh instance of the DoFn under test.
-   */
-  public void startBundle() {
-    resetState();
-    initializeState();
-    fnRunner.startBundle();
-    state = State.STARTED;
-  }
-
-  /**
-   * Calls {@link DoFn#processElement} on the {@code DoFn} under test, in a
-   * context where {@link DoFn.ProcessContext#element} returns the
-   * given element.
-   *
-   * <p>Will call {@link #startBundle} automatically, if it hasn't
-   * already been called.
-   *
-   * @throws IllegalStateException if the {@code DoFn} under test has already
-   * been finished
-   */
-  public void processElement(InputT element) {
-    if (state == State.FINISHED) {
-      throw new IllegalStateException("finishBundle() has already been called");
-    }
-    if (state == State.UNSTARTED) {
-      startBundle();
-    }
-    fnRunner.processElement(WindowedValue.valueInGlobalWindow(element));
-  }
-
-  /**
-   * Calls {@link DoFn#finishBundle} of the {@code DoFn} under test.
-   *
-   * <p>Will call {@link #startBundle} automatically, if it hasn't
-   * already been called.
-   *
-   * @throws IllegalStateException if the {@code DoFn} under test has already
-   * been finished
-   */
-  public void finishBundle() {
-    if (state == State.FINISHED) {
-      throw new IllegalStateException("finishBundle() has already been called");
-    }
-    if (state == State.UNSTARTED) {
-      startBundle();
-    }
-    fnRunner.finishBundle();
-    state = State.FINISHED;
-  }
-
-  /**
-   * Returns the elements output so far to the main output.  Does not
-   * clear them, so subsequent calls will continue to include these
-   * elements.
-   *
-   * @see #takeOutputElements
-   * @see #clearOutputElements
-   *
-   */
-  public List<OutputT> peekOutputElements() {
-    // TODO: Should we return an unmodifiable list?
-    return Lists.transform(
-        peekOutputElementsWithTimestamp(),
-        new Function<OutputElementWithTimestamp<OutputT>, OutputT>() {
-          @Override
-          @SuppressWarnings("unchecked")
-          public OutputT apply(OutputElementWithTimestamp<OutputT> input) {
-            return input.getValue();
-          }
-        });
-  }
-
-  /**
-   * Returns the elements output so far to the main output with associated timestamps.  Does not
-   * clear them, so subsequent calls will continue to include these.
-   * elements.
-   *
-   * @see #takeOutputElementsWithTimestamp
-   * @see #clearOutputElements
-   */
-  @Experimental
-  public List<OutputElementWithTimestamp<OutputT>> peekOutputElementsWithTimestamp() {
-    // TODO: Should we return an unmodifiable list?
-    return Lists.transform(
-        outputManager.getOutput(mainOutputTag),
-        new Function<Object, OutputElementWithTimestamp<OutputT>>() {
-          @Override
-          @SuppressWarnings("unchecked")
-          public OutputElementWithTimestamp<OutputT> apply(Object input) {
-            return new OutputElementWithTimestamp<OutputT>(
-                ((WindowedValue<OutputT>) input).getValue(),
-                ((WindowedValue<OutputT>) input).getTimestamp());
-          }
-        });
-  }
-
-  /**
-   * Clears the record of the elements output so far to the main output.
-   *
-   * @see #peekOutputElements
-   */
-  public void clearOutputElements() {
-    peekOutputElements().clear();
-  }
-
-  /**
-   * Returns the elements output so far to the main output.
-   * Clears the list so these elements don't appear in future calls.
-   *
-   * @see #peekOutputElements
-   */
-  public List<OutputT> takeOutputElements() {
-    List<OutputT> resultElems = new ArrayList<>(peekOutputElements());
-    clearOutputElements();
-    return resultElems;
-  }
-
-  /**
-   * Returns the elements output so far to the main output with associated timestamps.
-   * Clears the list so these elements don't appear in future calls.
-   *
-   * @see #peekOutputElementsWithTimestamp
-   * @see #takeOutputElements
-   * @see #clearOutputElements
-   */
-  @Experimental
-  public List<OutputElementWithTimestamp<OutputT>> takeOutputElementsWithTimestamp() {
-    List<OutputElementWithTimestamp<OutputT>> resultElems =
-        new ArrayList<>(peekOutputElementsWithTimestamp());
-    clearOutputElements();
-    return resultElems;
-  }
-
-  /**
-   * Returns the elements output so far to the side output with the
-   * given tag.  Does not clear them, so subsequent calls will
-   * continue to include these elements.
-   *
-   * @see #takeSideOutputElements
-   * @see #clearSideOutputElements
-   */
-  public <T> List<T> peekSideOutputElements(TupleTag<T> tag) {
-    // TODO: Should we return an unmodifiable list?
-    return Lists.transform(
-        outputManager.getOutput(tag),
-        new Function<WindowedValue<T>, T>() {
-          @SuppressWarnings("unchecked")
-          @Override
-          public T apply(WindowedValue<T> input) {
-            return input.getValue();
-          }});
-  }
-
-  /**
-   * Clears the record of the elements output so far to the side
-   * output with the given tag.
-   *
-   * @see #peekSideOutputElements
-   */
-  public <T> void clearSideOutputElements(TupleTag<T> tag) {
-    peekSideOutputElements(tag).clear();
-  }
-
-  /**
-   * Returns the elements output so far to the side output with the given tag.
-   * Clears the list so these elements don't appear in future calls.
-   *
-   * @see #peekSideOutputElements
-   */
-  public <T> List<T> takeSideOutputElements(TupleTag<T> tag) {
-    List<T> resultElems = new ArrayList<>(peekSideOutputElements(tag));
-    clearSideOutputElements(tag);
-    return resultElems;
-  }
-
-  /**
-   * Returns the value of the provided {@link Aggregator}.
-   */
-  public <AggregateT> AggregateT getAggregatorValue(Aggregator<?, AggregateT> agg) {
-    @SuppressWarnings("unchecked")
-    Counter<AggregateT> counter =
-        (Counter<AggregateT>)
-            counterSet.getExistingCounter("user-" + STEP_NAME + "-" + agg.getName());
-    return counter.getAggregate();
-  }
-
-  /**
-   * Holder for an OutputElement along with its associated timestamp.
-   */
-  @Experimental
-  public static class OutputElementWithTimestamp<OutputT> {
-    private final OutputT value;
-    private final Instant timestamp;
-
-    OutputElementWithTimestamp(OutputT value, Instant timestamp) {
-      this.value = value;
-      this.timestamp = timestamp;
-    }
-
-    OutputT getValue() {
-      return value;
-    }
-
-    Instant getTimestamp() {
-      return timestamp;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (!(obj instanceof OutputElementWithTimestamp)) {
-        return false;
-      }
-      OutputElementWithTimestamp<?> other = (OutputElementWithTimestamp<?>) obj;
-      return Objects.equal(other.value, value) && Objects.equal(other.timestamp, timestamp);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(value, timestamp);
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /** The possible states of processing a DoFn. */
-  enum State {
-    UNSTARTED,
-    STARTED,
-    FINISHED
-  }
-
-  /** The name of the step of a DoFnTester. */
-  static final String STEP_NAME = "stepName";
-  /** The name of the enclosing DoFn PTransform for a DoFnTester. */
-  static final String TRANSFORM_NAME = "transformName";
-
-  final PipelineOptions options = PipelineOptionsFactory.create();
-
-  /** The original DoFn under test. */
-  final DoFn<InputT, OutputT> origFn;
-
-  /** The side input values to provide to the DoFn under test. */
-  private Map<PCollectionView<?>, Iterable<WindowedValue<?>>> sideInputs =
-      new HashMap<>();
-
-  /** The output tags used by the DoFn under test. */
-  TupleTag<OutputT> mainOutputTag = new TupleTag<>();
-  List<TupleTag<?>> sideOutputTags = new ArrayList<>();
-
-  /** The original DoFn under test, if started. */
-  DoFn<InputT, OutputT> fn;
-
-  /** The ListOutputManager to examine the outputs. */
-  DoFnRunnerBase.ListOutputManager outputManager;
-
-  /** The DoFnRunner if processing is in progress. */
-  DoFnRunner<InputT, OutputT> fnRunner;
-
-  /** Counters for user-defined Aggregators if processing is in progress. */
-  CounterSet counterSet;
-
-  /** The state of processing of the DoFn under test. */
-  State state;
-
-  DoFnTester(DoFn<InputT, OutputT> origFn) {
-    this.origFn = origFn;
-    resetState();
-  }
-
-  void resetState() {
-    fn = null;
-    outputManager = null;
-    fnRunner = null;
-    counterSet = null;
-    state = State.UNSTARTED;
-  }
-
-  @SuppressWarnings("unchecked")
-  void initializeState() {
-    fn = (DoFn<InputT, OutputT>)
-        SerializableUtils.deserializeFromByteArray(
-            SerializableUtils.serializeToByteArray(origFn),
-            origFn.toString());
-    counterSet = new CounterSet();
-    PTuple runnerSideInputs = PTuple.empty();
-    for (Map.Entry<PCollectionView<?>, Iterable<WindowedValue<?>>> entry
-        : sideInputs.entrySet()) {
-      runnerSideInputs = runnerSideInputs.and(entry.getKey().getTagInternal(), entry.getValue());
-    }
-    outputManager = new DoFnRunnerBase.ListOutputManager();
-    fnRunner = DoFnRunners.createDefault(
-        options,
-        fn,
-        DirectSideInputReader.of(runnerSideInputs),
-        outputManager,
-        mainOutputTag,
-        sideOutputTags,
-        DirectModeExecutionContext.create().getOrCreateStepContext(STEP_NAME, TRANSFORM_NAME, null),
-        counterSet.getAddCounterMutator(),
-        WindowingStrategy.globalDefault());
-  }
-}

[02/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
deleted file mode 100644
index e311252..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/PipelineTest.java
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk;
-
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.instanceOf;
-import static org.hamcrest.Matchers.isA;
-import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.Pipeline.PipelineExecutionException;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions;
-import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
-import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.ExpectedLogs;
-import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.Flatten;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.util.UserCodeException;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.PInput;
-import com.google.cloud.dataflow.sdk.values.POutput;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.common.collect.ImmutableList;
-
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for Pipeline.
- */
-@RunWith(JUnit4.class)
-public class PipelineTest {
-
-  @Rule public ExpectedLogs logged = ExpectedLogs.none(Pipeline.class);
-  @Rule public ExpectedException thrown = ExpectedException.none();
-
-  static class PipelineWrapper extends Pipeline {
-    protected PipelineWrapper(PipelineRunner<?> runner) {
-      super(runner, PipelineOptionsFactory.create());
-    }
-  }
-
-  // Mock class that throws a user code exception during the call to
-  // Pipeline.run().
-  static class TestPipelineRunnerThrowingUserException
-      extends PipelineRunner<PipelineResult> {
-    @Override
-    public PipelineResult run(Pipeline pipeline) {
-      Throwable t = new IllegalStateException("user code exception");
-      throw UserCodeException.wrap(t);
-    }
-  }
-
-  // Mock class that throws an SDK or API client code exception during
-  // the call to Pipeline.run().
-  static class TestPipelineRunnerThrowingSDKException
-      extends PipelineRunner<PipelineResult> {
-    @Override
-    public PipelineResult run(Pipeline pipeline) {
-      throw new IllegalStateException("SDK exception");
-    }
-  }
-
-  @Test
-  public void testPipelineUserExceptionHandling() {
-    Pipeline p = new PipelineWrapper(
-        new TestPipelineRunnerThrowingUserException());
-
-    // Check pipeline runner correctly catches user errors.
-    thrown.expect(PipelineExecutionException.class);
-    thrown.expectCause(isA(IllegalStateException.class));
-    thrown.expectMessage("user code exception");
-    p.run();
-  }
-
-  @Test
-  public void testPipelineSDKExceptionHandling() {
-    Pipeline p = new PipelineWrapper(new TestPipelineRunnerThrowingSDKException());
-
-    // Check pipeline runner correctly catches SDK errors.
-    try {
-      p.run();
-      fail("Should have thrown an exception.");
-    } catch (RuntimeException exn) {
-      // Make sure the exception isn't a UserCodeException.
-      Assert.assertThat(exn, not(instanceOf(UserCodeException.class)));
-      // Assert that the message is correct.
-      Assert.assertThat(exn.getMessage(), containsString("SDK exception"));
-      // RuntimeException should be IllegalStateException.
-      Assert.assertThat(exn, instanceOf(IllegalStateException.class));
-    }
-  }
-
-  @Test
-  @Category(com.google.cloud.dataflow.sdk.testing.RunnableOnService.class)
-  public void testMultipleApply() {
-    PTransform<PCollection<? extends String>, PCollection<String>> myTransform =
-        addSuffix("+");
-
-    Pipeline p = TestPipeline.create();
-    PCollection<String> input = p.apply(Create.<String>of(ImmutableList.of("a", "b")));
-
-    PCollection<String> left = input.apply("Left1", myTransform).apply("Left2", myTransform);
-    PCollection<String> right = input.apply("Right", myTransform);
-
-    PCollection<String> both = PCollectionList.of(left).and(right)
-        .apply(Flatten.<String>pCollections());
-
-    DataflowAssert.that(both).containsInAnyOrder("a++", "b++", "a+", "b+");
-
-    p.run();
-  }
-
-  private static PTransform<PCollection<? extends String>, PCollection<String>> addSuffix(
-      final String suffix) {
-    return ParDo.of(new DoFn<String, String>() {
-      @Override
-      public void processElement(DoFn<String, String>.ProcessContext c) {
-        c.output(c.element() + suffix);
-      }
-    });
-  }
-
-  @Test
-  public void testToString() {
-    PipelineOptions options = PipelineOptionsFactory.as(PipelineOptions.class);
-    options.setRunner(DirectPipelineRunner.class);
-    Pipeline pipeline = Pipeline.create(options);
-    assertEquals("Pipeline#" + pipeline.hashCode(), pipeline.toString());
-  }
-
-  @Test
-  public void testStableUniqueNameOff() {
-    Pipeline p = TestPipeline.create();
-    p.getOptions().setStableUniqueNames(CheckEnabled.OFF);
-
-    p.apply(Create.of(5, 6, 7));
-    p.apply(Create.of(5, 6, 7));
-
-    logged.verifyNotLogged("does not have a stable unique name.");
-  }
-
-  @Test
-  public void testStableUniqueNameWarning() {
-    Pipeline p = TestPipeline.create();
-    p.getOptions().setStableUniqueNames(CheckEnabled.WARNING);
-
-    p.apply(Create.of(5, 6, 7));
-    p.apply(Create.of(5, 6, 7));
-
-    logged.verifyWarn("does not have a stable unique name.");
-  }
-
-  @Test
-  public void testStableUniqueNameError() {
-    Pipeline p = TestPipeline.create();
-    p.getOptions().setStableUniqueNames(CheckEnabled.ERROR);
-
-    p.apply(Create.of(5, 6, 7));
-
-    thrown.expectMessage("does not have a stable unique name.");
-    p.apply(Create.of(5, 6, 7));
-  }
-
-  /**
-   * Tests that Pipeline supports a pass-through identity function.
-   */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testIdentityTransform() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> output = pipeline
-        .apply(Create.<Integer>of(1, 2, 3, 4))
-        .apply("IdentityTransform", new IdentityTransform<PCollection<Integer>>());
-
-    DataflowAssert.that(output).containsInAnyOrder(1, 2, 3, 4);
-    pipeline.run();
-  }
-
-  private static class IdentityTransform<T extends PInput & POutput>
-      extends PTransform<T, T> {
-    @Override
-    public T apply(T input) {
-      return input;
-    }
-  }
-
-  /**
-   * Tests that Pipeline supports pulling an element out of a tuple as a transform.
-   */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testTupleProjectionTransform() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> input = pipeline
-        .apply(Create.<Integer>of(1, 2, 3, 4));
-
-    TupleTag<Integer> tag = new TupleTag<Integer>();
-    PCollectionTuple tuple = PCollectionTuple.of(tag, input);
-
-    PCollection<Integer> output = tuple
-        .apply("ProjectTag", new TupleProjectionTransform<Integer>(tag));
-
-    DataflowAssert.that(output).containsInAnyOrder(1, 2, 3, 4);
-    pipeline.run();
-  }
-
-  private static class TupleProjectionTransform<T>
-      extends PTransform<PCollectionTuple, PCollection<T>> {
-    private TupleTag<T> tag;
-
-    public TupleProjectionTransform(TupleTag<T> tag) {
-      this.tag = tag;
-    }
-
-    @Override
-    public PCollection<T> apply(PCollectionTuple input) {
-      return input.get(tag);
-    }
-  }
-
-  /**
-   * Tests that Pipeline supports putting an element into a tuple as a transform.
-   */
-  @Test
-  @Category(RunnableOnService.class)
-  public void testTupleInjectionTransform() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-
-    PCollection<Integer> input = pipeline
-        .apply(Create.<Integer>of(1, 2, 3, 4));
-
-    TupleTag<Integer> tag = new TupleTag<Integer>();
-
-    PCollectionTuple output = input
-        .apply("ProjectTag", new TupleInjectionTransform<Integer>(tag));
-
-    DataflowAssert.that(output.get(tag)).containsInAnyOrder(1, 2, 3, 4);
-    pipeline.run();
-  }
-
-  private static class TupleInjectionTransform<T>
-      extends PTransform<PCollection<T>, PCollectionTuple> {
-    private TupleTag<T> tag;
-
-    public TupleInjectionTransform(TupleTag<T> tag) {
-      this.tag = tag;
-    }
-
-    @Override
-    public PCollectionTuple apply(PCollection<T> input) {
-      return PCollectionTuple.of(tag, input);
-    }
-  }
-
-  /**
-   * Tests that an empty pipeline runs.
-   */
-  @Test
-  public void testEmptyPipeline() throws Exception {
-    Pipeline pipeline = TestPipeline.create();
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
deleted file mode 100644
index 257ecbb..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/TestUtils.java
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk;
-
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
-import com.google.cloud.dataflow.sdk.values.KV;
-
-import org.hamcrest.CoreMatchers;
-import org.hamcrest.Description;
-import org.hamcrest.Matcher;
-import org.hamcrest.TypeSafeMatcher;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Utilities for tests.
- */
-public class TestUtils {
-  // Do not instantiate.
-  private TestUtils() {}
-
-  public static final String[] NO_LINES_ARRAY = new String[] { };
-
-  public static final List<String> NO_LINES = Arrays.asList(NO_LINES_ARRAY);
-
-  public static final String[] LINES_ARRAY = new String[] {
-      "To be, or not to be: that is the question: ",
-      "Whether 'tis nobler in the mind to suffer ",
-      "The slings and arrows of outrageous fortune, ",
-      "Or to take arms against a sea of troubles, ",
-      "And by opposing end them? To die: to sleep; ",
-      "No more; and by a sleep to say we end ",
-      "The heart-ache and the thousand natural shocks ",
-      "That flesh is heir to, 'tis a consummation ",
-      "Devoutly to be wish'd. To die, to sleep; ",
-      "To sleep: perchance to dream: ay, there's the rub; ",
-      "For in that sleep of death what dreams may come ",
-      "When we have shuffled off this mortal coil, ",
-      "Must give us pause: there's the respect ",
-      "That makes calamity of so long life; ",
-      "For who would bear the whips and scorns of time, ",
-      "The oppressor's wrong, the proud man's contumely, ",
-      "The pangs of despised love, the law's delay, ",
-      "The insolence of office and the spurns ",
-      "That patient merit of the unworthy takes, ",
-      "When he himself might his quietus make ",
-      "With a bare bodkin? who would fardels bear, ",
-      "To grunt and sweat under a weary life, ",
-      "But that the dread of something after death, ",
-      "The undiscover'd country from whose bourn ",
-      "No traveller returns, puzzles the will ",
-      "And makes us rather bear those ills we have ",
-      "Than fly to others that we know not of? ",
-      "Thus conscience does make cowards of us all; ",
-      "And thus the native hue of resolution ",
-      "Is sicklied o'er with the pale cast of thought, ",
-      "And enterprises of great pith and moment ",
-      "With this regard their currents turn awry, ",
-      "And lose the name of action.--Soft you now! ",
-      "The fair Ophelia! Nymph, in thy orisons ",
-      "Be all my sins remember'd." };
-
-  public static final List<String> LINES = Arrays.asList(LINES_ARRAY);
-
-  public static final String[] LINES2_ARRAY = new String[] {
-    "hi", "there", "bob!" };
-
-  public static final List<String> LINES2 = Arrays.asList(LINES2_ARRAY);
-
-  public static final Integer[] NO_INTS_ARRAY = new Integer[] { };
-
-  public static final List<Integer> NO_INTS = Arrays.asList(NO_INTS_ARRAY);
-
-  public static final Integer[] INTS_ARRAY = new Integer[] {
-    3, 42, Integer.MAX_VALUE, 0, -1, Integer.MIN_VALUE, 666 };
-
-  public static final List<Integer> INTS = Arrays.asList(INTS_ARRAY);
-
-  /**
-   * Matcher for KVs.
-   */
-  public static class KvMatcher<K, V>
-      extends TypeSafeMatcher<KV<? extends K, ? extends V>> {
-    final Matcher<? super K> keyMatcher;
-    final Matcher<? super V> valueMatcher;
-
-    public static <K, V> KvMatcher<K, V> isKv(Matcher<K> keyMatcher,
-                                              Matcher<V> valueMatcher) {
-      return new KvMatcher<>(keyMatcher, valueMatcher);
-    }
-
-    public KvMatcher(Matcher<? super K> keyMatcher,
-                     Matcher<? super V> valueMatcher) {
-      this.keyMatcher = keyMatcher;
-      this.valueMatcher = valueMatcher;
-    }
-
-    @Override
-    public boolean matchesSafely(KV<? extends K, ? extends V> kv) {
-      return keyMatcher.matches(kv.getKey())
-          && valueMatcher.matches(kv.getValue());
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description
-          .appendText("a KV(").appendValue(keyMatcher)
-          .appendText(", ").appendValue(valueMatcher)
-          .appendText(")");
-    }
-  }
-
-  ////////////////////////////////////////////////////////////////////////////
-  // Utilities for testing CombineFns, ensuring they give correct results
-  // across various permutations and shardings of the input.
-
-  public static <InputT, AccumT, OutputT> void checkCombineFn(
-      CombineFn<InputT, AccumT, OutputT> fn, List<InputT> input, final OutputT expected) {
-    checkCombineFn(fn, input, CoreMatchers.is(expected));
-  }
-
-  public static <InputT, AccumT, OutputT> void checkCombineFn(
-      CombineFn<InputT, AccumT, OutputT> fn, List<InputT> input, Matcher<? super OutputT> matcher) {
-    checkCombineFnInternal(fn, input, matcher);
-    Collections.shuffle(input);
-    checkCombineFnInternal(fn, input, matcher);
-  }
-
-  private static <InputT, AccumT, OutputT> void checkCombineFnInternal(
-      CombineFn<InputT, AccumT, OutputT> fn, List<InputT> input, Matcher<? super OutputT> matcher) {
-    int size = input.size();
-    checkCombineFnShards(fn, Collections.singletonList(input), matcher);
-    checkCombineFnShards(fn, shardEvenly(input, 2), matcher);
-    if (size > 4) {
-      checkCombineFnShards(fn, shardEvenly(input, size / 2), matcher);
-      checkCombineFnShards(
-          fn, shardEvenly(input, (int) (size / Math.sqrt(size))), matcher);
-    }
-    checkCombineFnShards(fn, shardExponentially(input, 1.4), matcher);
-    checkCombineFnShards(fn, shardExponentially(input, 2), matcher);
-    checkCombineFnShards(fn, shardExponentially(input, Math.E), matcher);
-  }
-
-  public static <InputT, AccumT, OutputT> void checkCombineFnShards(
-      CombineFn<InputT, AccumT, OutputT> fn,
-      List<? extends Iterable<InputT>> shards,
-      Matcher<? super OutputT> matcher) {
-    checkCombineFnShardsInternal(fn, shards, matcher);
-    Collections.shuffle(shards);
-    checkCombineFnShardsInternal(fn, shards, matcher);
-  }
-
-  private static <InputT, AccumT, OutputT> void checkCombineFnShardsInternal(
-      CombineFn<InputT, AccumT, OutputT> fn,
-      Iterable<? extends Iterable<InputT>> shards,
-      Matcher<? super OutputT> matcher) {
-    List<AccumT> accumulators = new ArrayList<>();
-    int maybeCompact = 0;
-    for (Iterable<InputT> shard : shards) {
-      AccumT accumulator = fn.createAccumulator();
-      for (InputT elem : shard) {
-        accumulator = fn.addInput(accumulator, elem);
-      }
-      if (maybeCompact++ % 2 == 0) {
-        accumulator = fn.compact(accumulator);
-      }
-      accumulators.add(accumulator);
-    }
-    AccumT merged = fn.mergeAccumulators(accumulators);
-    assertThat(fn.extractOutput(merged), matcher);
-  }
-
-  private static <T> List<List<T>> shardEvenly(List<T> input, int numShards) {
-    List<List<T>> shards = new ArrayList<>(numShards);
-    for (int i = 0; i < numShards; i++) {
-      shards.add(input.subList(i * input.size() / numShards,
-                               (i + 1) * input.size() / numShards));
-    }
-    return shards;
-  }
-
-  private static <T> List<List<T>> shardExponentially(
-      List<T> input, double base) {
-    assert base > 1.0;
-    List<List<T>> shards = new ArrayList<>();
-    int end = input.size();
-    while (end > 0) {
-      int start = (int) (end / base);
-      shards.add(input.subList(start, end));
-      end = start;
-    }
-    return shards;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
deleted file mode 100644
index 9d7cfc8..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/WindowMatchers.java
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.WindowedValue;
-
-import org.hamcrest.Description;
-import org.hamcrest.Matcher;
-import org.hamcrest.Matchers;
-import org.hamcrest.TypeSafeMatcher;
-import org.joda.time.Instant;
-
-import java.util.Collection;
-import java.util.Objects;
-
-/**
- * Matchers that are useful for working with Windowing, Timestamps, etc.
- */
-public class WindowMatchers {
-
-  public static <T> Matcher<WindowedValue<? extends T>> isWindowedValue(
-      Matcher<? super T> valueMatcher, Matcher<? super Instant> timestampMatcher,
-      Matcher<? super Collection<? extends BoundedWindow>> windowsMatcher) {
-    return new WindowedValueMatcher<>(valueMatcher, timestampMatcher, windowsMatcher);
-  }
-
-  public static <T> Matcher<WindowedValue<? extends T>> isWindowedValue(
-      Matcher<? super T> valueMatcher, Matcher<? super Instant> timestampMatcher) {
-    return new WindowedValueMatcher<>(valueMatcher, timestampMatcher, Matchers.anything());
-  }
-
-  public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
-      T value, long timestamp, long windowStart, long windowEnd) {
-    return WindowMatchers.<T>isSingleWindowedValue(
-        Matchers.equalTo(value), timestamp, windowStart, windowEnd);
-  }
-
-  public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
-      Matcher<T> valueMatcher, long timestamp, long windowStart, long windowEnd) {
-    IntervalWindow intervalWindow =
-        new IntervalWindow(new Instant(windowStart), new Instant(windowEnd));
-    return WindowMatchers.<T>isSingleWindowedValue(
-        valueMatcher,
-        Matchers.describedAs("%0", Matchers.equalTo(new Instant(timestamp)), timestamp),
-        Matchers.<BoundedWindow>equalTo(intervalWindow));
-  }
-
-  public static <T> Matcher<WindowedValue<? extends T>> isSingleWindowedValue(
-      Matcher<? super T> valueMatcher, Matcher<? super Instant> timestampMatcher,
-      Matcher<? super BoundedWindow> windowMatcher) {
-    return new WindowedValueMatcher<T>(
-        valueMatcher, timestampMatcher, Matchers.contains(windowMatcher));
-  }
-
-  public static Matcher<IntervalWindow> intervalWindow(long start, long end) {
-    return Matchers.equalTo(new IntervalWindow(new Instant(start), new Instant(end)));
-  }
-
-  public static <T> Matcher<WindowedValue<? extends T>> valueWithPaneInfo(final PaneInfo paneInfo) {
-    return new TypeSafeMatcher<WindowedValue<? extends T>>() {
-      @Override
-      public void describeTo(Description description) {
-        description
-            .appendText("WindowedValue(paneInfo = ").appendValue(paneInfo).appendText(")");
-      }
-
-      @Override
-      protected boolean matchesSafely(WindowedValue<? extends T> item) {
-        return Objects.equals(item.getPane(), paneInfo);
-      }
-
-      @Override
-      protected void describeMismatchSafely(
-          WindowedValue<? extends T> item, Description mismatchDescription) {
-        mismatchDescription.appendValue(item.getPane());
-      }
-    };
-  }
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  @SafeVarargs
-  public static final <W extends BoundedWindow> Matcher<Iterable<W>> ofWindows(
-      Matcher<W>... windows) {
-    return (Matcher) Matchers.<W>containsInAnyOrder(windows);
-  }
-
-  private WindowMatchers() {}
-
-  private static class WindowedValueMatcher<T> extends TypeSafeMatcher<WindowedValue<? extends T>> {
-
-    private Matcher<? super T> valueMatcher;
-    private Matcher<? super Instant> timestampMatcher;
-    private Matcher<? super Collection<? extends BoundedWindow>> windowsMatcher;
-
-    private WindowedValueMatcher(
-        Matcher<? super T> valueMatcher,
-        Matcher<? super Instant> timestampMatcher,
-        Matcher<? super Collection<? extends BoundedWindow>> windowsMatcher) {
-      this.valueMatcher = valueMatcher;
-      this.timestampMatcher = timestampMatcher;
-      this.windowsMatcher = windowsMatcher;
-    }
-
-    @Override
-    public void describeTo(Description description) {
-      description
-          .appendText("a WindowedValue(").appendValue(valueMatcher)
-          .appendText(", ").appendValue(timestampMatcher)
-          .appendText(", ").appendValue(windowsMatcher)
-          .appendText(")");
-    }
-
-    @Override
-    protected boolean matchesSafely(WindowedValue<? extends T> windowedValue) {
-      return valueMatcher.matches(windowedValue.getValue())
-          && timestampMatcher.matches(windowedValue.getTimestamp())
-          && windowsMatcher.matches(windowedValue.getWindows());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
deleted file mode 100644
index db6e944..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/AvroCoderTest.java
+++ /dev/null
@@ -1,754 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static org.hamcrest.Matchers.containsString;
-import static org.hamcrest.Matchers.equalTo;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
-import com.google.cloud.dataflow.sdk.testing.TestPipeline;
-import com.google.cloud.dataflow.sdk.transforms.Create;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.util.CloudObject;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import org.apache.avro.AvroTypeException;
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.reflect.AvroName;
-import org.apache.avro.reflect.AvroSchema;
-import org.apache.avro.reflect.Nullable;
-import org.apache.avro.reflect.ReflectData;
-import org.apache.avro.reflect.Stringable;
-import org.apache.avro.reflect.Union;
-import org.apache.avro.specific.SpecificData;
-import org.apache.avro.util.Utf8;
-import org.hamcrest.Description;
-import org.hamcrest.Matcher;
-import org.hamcrest.Matchers;
-import org.hamcrest.TypeSafeMatcher;
-import org.junit.Assert;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.SortedMap;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-/** Tests for {@link AvroCoder}. */
-@RunWith(JUnit4.class)
-public class AvroCoderTest {
-
-  @DefaultCoder(AvroCoder.class)
-  private static class Pojo {
-    public String text;
-    public int count;
-
-    // Empty constructor required for Avro decoding.
-    @SuppressWarnings("unused")
-    public Pojo() {
-    }
-
-    public Pojo(String text, int count) {
-      this.text = text;
-      this.count = count;
-    }
-
-    // auto-generated
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      Pojo pojo = (Pojo) o;
-
-      if (count != pojo.count) {
-        return false;
-      }
-      if (text != null
-          ? !text.equals(pojo.text)
-          : pojo.text != null) {
-        return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      return 0;
-    }
-
-    @Override
-    public String toString() {
-      return "Pojo{"
-          + "text='" + text + '\''
-          + ", count=" + count
-          + '}';
-    }
-  }
-
-  private static class GetTextFn extends DoFn<Pojo, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      c.output(c.element().text);
-    }
-  }
-
-  @Test
-  public void testAvroCoderEncoding() throws Exception {
-    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
-    CloudObject encoding = coder.asCloudObject();
-
-    Assert.assertThat(encoding.keySet(),
-        Matchers.containsInAnyOrder("@type", "type", "schema", "encoding_id"));
-  }
-
-  @Test
-  public void testPojoEncoding() throws Exception {
-    Pojo value = new Pojo("Hello", 42);
-    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
-
-    CoderProperties.coderDecodeEncodeEqual(coder, value);
-  }
-
-  @Test
-  public void testPojoEncodingId() throws Exception {
-    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
-    CoderProperties.coderHasEncodingId(coder, Pojo.class.getName());
-  }
-
-  @Test
-  public void testGenericRecordEncoding() throws Exception {
-    String schemaString =
-        "{\"namespace\": \"example.avro\",\n"
-      + " \"type\": \"record\",\n"
-      + " \"name\": \"User\",\n"
-      + " \"fields\": [\n"
-      + "     {\"name\": \"name\", \"type\": \"string\"},\n"
-      + "     {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n"
-      + "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n"
-      + " ]\n"
-      + "}";
-    Schema schema = (new Schema.Parser()).parse(schemaString);
-
-    GenericRecord before = new GenericData.Record(schema);
-    before.put("name", "Bob");
-    before.put("favorite_number", 256);
-    // Leave favorite_color null
-
-    AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, schema);
-
-    CoderProperties.coderDecodeEncodeEqual(coder, before);
-    Assert.assertEquals(schema, coder.getSchema());
-  }
-
-  @Test
-  public void testEncodingNotBuffered() throws Exception {
-    // This test ensures that the coder doesn't read ahead and buffer data.
-    // Reading ahead causes a problem if the stream consists of records of different
-    // types.
-    Pojo before = new Pojo("Hello", 42);
-
-    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
-    SerializableCoder<Integer> intCoder = SerializableCoder.of(Integer.class);
-
-    ByteArrayOutputStream outStream = new ByteArrayOutputStream();
-
-    Context context = Context.NESTED;
-    coder.encode(before, outStream, context);
-    intCoder.encode(10, outStream, context);
-
-    ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray());
-
-    Pojo after = coder.decode(inStream, context);
-    Assert.assertEquals(before, after);
-
-    Integer intAfter = intCoder.decode(inStream, context);
-    Assert.assertEquals(new Integer(10), intAfter);
-  }
-
-  @Test
-  public void testDefaultCoder() throws Exception {
-    Pipeline p = TestPipeline.create();
-
-    // Use MyRecord as input and output types without explicitly specifying
-    // a coder (this uses the default coders, which may not be AvroCoder).
-    PCollection<String> output =
-        p.apply(Create.of(new Pojo("hello", 1), new Pojo("world", 2)))
-            .apply(ParDo.of(new GetTextFn()));
-
-    DataflowAssert.that(output)
-        .containsInAnyOrder("hello", "world");
-    p.run();
-  }
-
-  @Test
-  public void testAvroCoderIsSerializable() throws Exception {
-    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class);
-
-    // Check that the coder is serializable using the regular JSON approach.
-    SerializableUtils.ensureSerializable(coder);
-  }
-
-  private final void assertDeterministic(AvroCoder<?> coder) {
-    try {
-      coder.verifyDeterministic();
-    } catch (NonDeterministicException e) {
-      fail("Expected " + coder + " to be deterministic, but got:\n" + e);
-    }
-  }
-
-  private final void assertNonDeterministic(AvroCoder<?> coder,
-      Matcher<String> reason1) {
-    try {
-      coder.verifyDeterministic();
-      fail("Expected " + coder + " to be non-deterministic.");
-    } catch (NonDeterministicException e) {
-      assertThat(e.getReasons(), Matchers.<String>iterableWithSize(1));
-      assertThat(e.getReasons(), Matchers.<String>contains(reason1));
-    }
-  }
-
-  @Test
-  public void testDeterministicInteger() {
-    assertDeterministic(AvroCoder.of(Integer.class));
-  }
-
-  @Test
-  public void testDeterministicInt() {
-    assertDeterministic(AvroCoder.of(int.class));
-  }
-
-  private static class SimpleDeterministicClass {
-    @SuppressWarnings("unused")
-    private Integer intField;
-    @SuppressWarnings("unused")
-    private char charField;
-    @SuppressWarnings("unused")
-    private Integer[] intArray;
-    @SuppressWarnings("unused")
-    private Utf8 utf8field;
-  }
-
-  @Test
-  public void testDeterministicSimple() {
-    assertDeterministic(AvroCoder.of(SimpleDeterministicClass.class));
-  }
-
-  private static class UnorderedMapClass {
-    @SuppressWarnings("unused")
-    private Map<String, String> mapField;
-  }
-
-  private Matcher<String> reason(final String prefix, final String messagePart) {
-    return new TypeSafeMatcher<String>(String.class) {
-      @Override
-      public void describeTo(Description description) {
-        description.appendText(String.format("Reason starting with '%s:' containing '%s'",
-            prefix, messagePart));
-      }
-
-      @Override
-      protected boolean matchesSafely(String item) {
-        return item.startsWith(prefix + ":") && item.contains(messagePart);
-      }
-    };
-  }
-
-  private Matcher<String> reasonClass(Class<?> clazz, String message) {
-    return reason(clazz.getName(), message);
-  }
-
-  private Matcher<String> reasonField(
-      Class<?> clazz, String field, String message) {
-    return reason(clazz.getName() + "#" + field, message);
-  }
-
-  @Test
-  public void testDeterministicUnorderedMap() {
-    assertNonDeterministic(AvroCoder.of(UnorderedMapClass.class),
-        reasonField(UnorderedMapClass.class, "mapField",
-            "java.util.Map<java.lang.String, java.lang.String> "
-                + "may not be deterministically ordered"));
-  }
-
-  private static class NonDeterministicArray {
-    @SuppressWarnings("unused")
-    private UnorderedMapClass[] arrayField;
-  }
-  @Test
-  public void testDeterministicNonDeterministicArray() {
-    assertNonDeterministic(AvroCoder.of(NonDeterministicArray.class),
-        reasonField(UnorderedMapClass.class, "mapField",
-            "java.util.Map<java.lang.String, java.lang.String>"
-                + " may not be deterministically ordered"));
-  }
-
-  private static class SubclassOfUnorderedMapClass extends UnorderedMapClass {}
-
-
-  @Test
-  public void testDeterministicNonDeterministicChild() {
-    // Super class has non deterministic fields.
-    assertNonDeterministic(AvroCoder.of(SubclassOfUnorderedMapClass.class),
-        reasonField(UnorderedMapClass.class, "mapField",
-            "may not be deterministically ordered"));
-  }
-
-  private static class SubclassHidingParent extends UnorderedMapClass {
-    @SuppressWarnings("unused")
-    @AvroName("mapField2") // AvroName is not enough
-    private int mapField;
-  }
-
-  @Test
-  public void testAvroProhibitsShadowing() {
-    // This test verifies that Avro won't serialize a class with two fields of
-    // the same name. This is important for our error reporting, and also how
-    // we lookup a field.
-    try {
-      ReflectData.get().getSchema(SubclassHidingParent.class);
-      fail("Expected AvroTypeException");
-    } catch (AvroTypeException e) {
-      assertThat(e.getMessage(), containsString("mapField"));
-      assertThat(e.getMessage(), containsString("two fields named"));
-    }
-  }
-
-  private static class FieldWithAvroName {
-    @AvroName("name")
-    @SuppressWarnings("unused")
-    private int someField;
-  }
-
-  @Test
-  public void testDeterministicWithAvroName() {
-    assertDeterministic(AvroCoder.of(FieldWithAvroName.class));
-  }
-
-  @Test
-  public void testDeterminismSortedMap() {
-    assertDeterministic(AvroCoder.of(StringSortedMapField.class));
-  }
-
-  private static class StringSortedMapField {
-    @SuppressWarnings("unused")
-    SortedMap<String, String> sortedMapField;
-  }
-
-  @Test
-  public void testDeterminismTreeMapValue() {
-    // The value is non-deterministic, so we should fail.
-    assertNonDeterministic(AvroCoder.of(TreeMapNonDetValue.class),
-        reasonField(UnorderedMapClass.class, "mapField",
-            "java.util.Map<java.lang.String, java.lang.String> "
-                + "may not be deterministically ordered"));
-  }
-
-  private static class TreeMapNonDetValue {
-    @SuppressWarnings("unused")
-    TreeMap<String, NonDeterministicArray> nonDeterministicField;
-  }
-
-  @Test
-  public void testDeterminismUnorderedMap() {
-    // LinkedHashMap is not deterministically ordered, so we should fail.
-    assertNonDeterministic(AvroCoder.of(LinkedHashMapField.class),
-        reasonField(LinkedHashMapField.class, "nonDeterministicMap",
-            "java.util.LinkedHashMap<java.lang.String, java.lang.String> "
-                + "may not be deterministically ordered"));
-  }
-
-  private static class LinkedHashMapField {
-    @SuppressWarnings("unused")
-    LinkedHashMap<String, String> nonDeterministicMap;
-  }
-
-  @Test
-  public void testDeterminismCollection() {
-    assertNonDeterministic(AvroCoder.of(StringCollection.class),
-        reasonField(StringCollection.class, "stringCollection",
-            "java.util.Collection<java.lang.String> may not be deterministically ordered"));
-  }
-
-  private static class StringCollection {
-    @SuppressWarnings("unused")
-    Collection<String> stringCollection;
-  }
-
-  @Test
-  public void testDeterminismList() {
-    assertDeterministic(AvroCoder.of(StringList.class));
-    assertDeterministic(AvroCoder.of(StringArrayList.class));
-  }
-
-  private static class StringList {
-    @SuppressWarnings("unused")
-    List<String> stringCollection;
-  }
-
-  private static class StringArrayList {
-    @SuppressWarnings("unused")
-    ArrayList<String> stringCollection;
-  }
-
-  @Test
-  public void testDeterminismSet() {
-    assertDeterministic(AvroCoder.of(StringSortedSet.class));
-    assertDeterministic(AvroCoder.of(StringTreeSet.class));
-    assertNonDeterministic(AvroCoder.of(StringHashSet.class),
-        reasonField(StringHashSet.class, "stringCollection",
-            "java.util.HashSet<java.lang.String> may not be deterministically ordered"));
-  }
-
-  private static class StringSortedSet{
-    @SuppressWarnings("unused")
-    SortedSet<String> stringCollection;
-  }
-
-  private static class StringTreeSet {
-    @SuppressWarnings("unused")
-    TreeSet<String> stringCollection;
-  }
-
-  private static class StringHashSet {
-    @SuppressWarnings("unused")
-    HashSet<String> stringCollection;
-  }
-
-  @Test
-  public void testDeterminismCollectionValue() {
-    assertNonDeterministic(AvroCoder.of(OrderedSetOfNonDetValues.class),
-        reasonField(UnorderedMapClass.class, "mapField",
-            "may not be deterministically ordered"));
-    assertNonDeterministic(AvroCoder.of(ListOfNonDetValues.class),
-        reasonField(UnorderedMapClass.class, "mapField",
-            "may not be deterministically ordered"));
-  }
-
-  private static class OrderedSetOfNonDetValues {
-    @SuppressWarnings("unused")
-    SortedSet<UnorderedMapClass> set;
-  }
-
-  private static class ListOfNonDetValues {
-    @SuppressWarnings("unused")
-    List<UnorderedMapClass> set;
-  }
-
-  @Test
-  public void testDeterminismUnion() {
-    assertDeterministic(AvroCoder.of(DeterministicUnionBase.class));
-    assertNonDeterministic(AvroCoder.of(NonDeterministicUnionBase.class),
-        reasonField(UnionCase3.class, "mapField", "may not be deterministically ordered"));
-  }
-
-  @Test
-  public void testDeterminismStringable() {
-    assertDeterministic(AvroCoder.of(String.class));
-    assertNonDeterministic(AvroCoder.of(StringableClass.class),
-        reasonClass(StringableClass.class, "may not have deterministic #toString()"));
-  }
-
-  @Stringable
-  private static class StringableClass {
-  }
-
-  @Test
-  public void testDeterminismCyclicClass() {
-    assertNonDeterministic(AvroCoder.of(Cyclic.class),
-        reasonField(Cyclic.class, "cyclicField", "appears recursively"));
-    assertNonDeterministic(AvroCoder.of(CyclicField.class),
-        reasonField(Cyclic.class, "cyclicField",
-    Cyclic.class.getName() + " appears recursively"));
-    assertNonDeterministic(AvroCoder.of(IndirectCycle1.class),
-        reasonField(IndirectCycle2.class, "field2",
-    IndirectCycle1.class.getName() +  " appears recursively"));
-  }
-
-  private static class Cyclic {
-    @SuppressWarnings("unused")
-    int intField;
-    @SuppressWarnings("unused")
-    Cyclic cyclicField;
-  }
-
-  private static class CyclicField {
-    @SuppressWarnings("unused")
-    Cyclic cyclicField2;
-  }
-
-  private static class IndirectCycle1 {
-    @SuppressWarnings("unused")
-    IndirectCycle2 field1;
-  }
-
-  private static class IndirectCycle2 {
-    @SuppressWarnings("unused")
-    IndirectCycle1 field2;
-  }
-
-  @Test
-  public void testDeterminismHasGenericRecord() {
-    assertDeterministic(AvroCoder.of(HasGenericRecord.class));
-  }
-
-  private static class HasGenericRecord {
-    @AvroSchema("{\"name\": \"bar\", \"type\": \"record\", \"fields\": ["
-        + "{\"name\": \"foo\", \"type\": \"int\"}]}")
-    GenericRecord genericRecord;
-  }
-
-  @Test
-  public void testDeterminismHasCustomSchema() {
-    assertNonDeterministic(AvroCoder.of(HasCustomSchema.class),
-        reasonField(HasCustomSchema.class, "withCustomSchema",
-            "Custom schemas are only supported for subtypes of IndexedRecord."));
-  }
-
-  private static class HasCustomSchema {
-    @AvroSchema("{\"name\": \"bar\", \"type\": \"record\", \"fields\": ["
-        + "{\"name\": \"foo\", \"type\": \"int\"}]}")
-    int withCustomSchema;
-  }
-
-  @Test
-  public void testAvroCoderTreeMapDeterminism()
-      throws Exception, NonDeterministicException {
-    TreeMapField size1 = new TreeMapField();
-    TreeMapField size2 = new TreeMapField();
-
-    // Different order for entries
-    size1.field.put("hello", "world");
-    size1.field.put("another", "entry");
-
-    size2.field.put("another", "entry");
-    size2.field.put("hello", "world");
-
-    AvroCoder<TreeMapField> coder = AvroCoder.of(TreeMapField.class);
-    coder.verifyDeterministic();
-
-    ByteArrayOutputStream outStream1 = new ByteArrayOutputStream();
-    ByteArrayOutputStream outStream2 = new ByteArrayOutputStream();
-
-    Context context = Context.NESTED;
-    coder.encode(size1, outStream1, context);
-    coder.encode(size2, outStream2, context);
-
-    assertTrue(Arrays.equals(
-        outStream1.toByteArray(), outStream2.toByteArray()));
-  }
-
-  private static class TreeMapField {
-    private TreeMap<String, String> field = new TreeMap<>();
-  }
-
-  @Union({ UnionCase1.class, UnionCase2.class })
-  private abstract static class DeterministicUnionBase {}
-
-  @Union({ UnionCase1.class, UnionCase2.class, UnionCase3.class })
-  private abstract static class NonDeterministicUnionBase {}
-  private static class UnionCase1 extends DeterministicUnionBase {}
-  private static class UnionCase2 extends DeterministicUnionBase {
-    @SuppressWarnings("unused")
-    String field;
-  }
-
-  private static class UnionCase3 extends NonDeterministicUnionBase {
-    @SuppressWarnings("unused")
-    private Map<String, String> mapField;
-  }
-
-  @Test
-  public void testAvroCoderSimpleSchemaDeterminism() {
-    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .endRecord()));
-    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .name("int").type().intType().noDefault()
-        .endRecord()));
-    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .name("string").type().stringType().noDefault()
-        .endRecord()));
-
-    assertNonDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .name("map").type().map().values().stringType().noDefault()
-        .endRecord()),
-        reason("someRecord.map", "HashMap to represent MAPs"));
-
-    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .name("array").type().array().items().stringType().noDefault()
-        .endRecord()));
-
-    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .name("enum").type().enumeration("anEnum").symbols("s1", "s2").enumDefault("s1")
-        .endRecord()));
-
-    assertDeterministic(AvroCoder.of(SchemaBuilder.unionOf()
-        .intType().and()
-        .record("someRecord").fields().nullableString("someField", "").endRecord()
-        .endUnion()));
-  }
-
-  @Test
-  public void testAvroCoderStrings() {
-    // Custom Strings in Records
-    assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .name("string").prop(SpecificData.CLASS_PROP, "java.lang.String")
-        .type().stringType().noDefault()
-        .endRecord()));
-    assertNonDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields()
-        .name("string").prop(SpecificData.CLASS_PROP, "unknownString")
-        .type().stringType().noDefault()
-        .endRecord()),
-        reason("someRecord.string", "unknownString is not known to be deterministic"));
-
-    // Custom Strings in Unions
-    assertNonDeterministic(AvroCoder.of(SchemaBuilder.unionOf()
-        .intType().and()
-        .record("someRecord").fields()
-        .name("someField").prop(SpecificData.CLASS_PROP, "unknownString")
-        .type().stringType().noDefault().endRecord()
-        .endUnion()),
-        reason("someRecord.someField", "unknownString is not known to be deterministic"));
-  }
-
-  @Test
-  public void testAvroCoderNestedRecords() {
-    // Nested Record
-    assertDeterministic(AvroCoder.of(SchemaBuilder.record("nestedRecord").fields()
-        .name("subRecord").type().record("subRecord").fields()
-            .name("innerField").type().stringType().noDefault()
-        .endRecord().noDefault()
-        .endRecord()));
-  }
-
-  @Test
-  public void testAvroCoderCyclicRecords() {
-    // Recursive record
-    assertNonDeterministic(AvroCoder.of(SchemaBuilder.record("cyclicRecord").fields()
-        .name("cycle").type("cyclicRecord").noDefault()
-        .endRecord()),
-        reason("cyclicRecord.cycle", "cyclicRecord appears recursively"));
-  }
-
-  private static class NullableField {
-    @SuppressWarnings("unused")
-    @Nullable private String nullable;
-  }
-
-  @Test
-  public void testNullableField() {
-    assertDeterministic(AvroCoder.of(NullableField.class));
-  }
-
-  private static class NullableNonDeterministicField {
-    @SuppressWarnings("unused")
-    @Nullable private NonDeterministicArray nullableNonDetArray;
-  }
-
-  private static class NullableCyclic {
-    @SuppressWarnings("unused")
-    @Nullable private NullableCyclic nullableNullableCyclicField;
-  }
-
-  private static class NullableCyclicField {
-    @SuppressWarnings("unused")
-    @Nullable private Cyclic nullableCyclicField;
-  }
-
-  @Test
-  public void testNullableNonDeterministicField() {
-    assertNonDeterministic(AvroCoder.of(NullableCyclic.class),
-        reasonField(NullableCyclic.class, "nullableNullableCyclicField",
-            NullableCyclic.class.getName() + " appears recursively"));
-    assertNonDeterministic(AvroCoder.of(NullableCyclicField.class),
-        reasonField(Cyclic.class, "cyclicField",
-            Cyclic.class.getName() + " appears recursively"));
-    assertNonDeterministic(AvroCoder.of(NullableNonDeterministicField.class),
-        reasonField(UnorderedMapClass.class, "mapField",
-            " may not be deterministically ordered"));
-  }
-
-  /**
-   * Tests that a parameterized class can have an automatically generated schema if the generic
-   * field is annotated with a union tag.
-   */
-  @Test
-  public void testGenericClassWithUnionAnnotation() throws Exception {
-    // Cast is safe as long as the same coder is used for encoding and decoding.
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    AvroCoder<GenericWithAnnotation<String>> coder =
-        (AvroCoder) AvroCoder.of(GenericWithAnnotation.class);
-
-    assertThat(coder.getSchema().getField("onlySomeTypesAllowed").schema().getType(),
-        equalTo(Schema.Type.UNION));
-
-    CoderProperties.coderDecodeEncodeEqual(coder, new GenericWithAnnotation<>("hello"));
-  }
-
-  private static class GenericWithAnnotation<T> {
-    @AvroSchema("[\"string\", \"int\"]")
-    private T onlySomeTypesAllowed;
-
-    public GenericWithAnnotation(T value) {
-      onlySomeTypesAllowed = value;
-    }
-
-    // For deserialization only
-    @SuppressWarnings("unused")
-    protected GenericWithAnnotation() { }
-
-    @Override
-    public boolean equals(Object other) {
-      return other instanceof GenericWithAnnotation
-          && onlySomeTypesAllowed.equals(((GenericWithAnnotation<?>) other).onlySomeTypesAllowed);
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(getClass(), onlySomeTypesAllowed);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
deleted file mode 100644
index d96c208..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianIntegerCoderTest.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Test case for {@link BigEndianIntegerCoder}.
- */
-@RunWith(JUnit4.class)
-public class BigEndianIntegerCoderTest {
-
-  private static final Coder<Integer> TEST_CODER = BigEndianIntegerCoder.of();
-
-  private static final List<Integer> TEST_VALUES = Arrays.asList(
-      -11, -3, -1, 0, 1, 5, 13, 29,
-      Integer.MAX_VALUE,
-      Integer.MIN_VALUE);
-
-  @Test
-  public void testDecodeEncodeEqual() throws Exception {
-    for (Integer value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
-    }
-  }
-
-  // This should never change. The definition of big endian encoding is fixed.
-  private static final String EXPECTED_ENCODING_ID = "";
-
-  @Test
-  public void testEncodingId() throws Exception {
-    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
-  }
-
-  /**
-   * Generated data to check that the wire format has not changed. To regenerate, see
-   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
-   */
-  private static final List<String> TEST_ENCODINGS = Arrays.asList(
-      "____9Q",
-      "_____Q",
-      "_____w",
-      "AAAAAA",
-      "AAAAAQ",
-      "AAAABQ",
-      "AAAADQ",
-      "AAAAHQ",
-      "f____w",
-      "gAAAAA");
-
-  @Test
-  public void testWireFormatEncode() throws Exception {
-    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
-  }
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void encodeNullThrowsCoderException() throws Exception {
-    thrown.expect(CoderException.class);
-    thrown.expectMessage("cannot encode a null Integer");
-
-    CoderUtils.encodeToBase64(TEST_CODER, null);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
deleted file mode 100644
index ea486c1..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/BigEndianLongCoderTest.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Test case for {@link BigEndianLongCoder}.
- */
-@RunWith(JUnit4.class)
-public class BigEndianLongCoderTest {
-
-  private static final Coder<Long> TEST_CODER = BigEndianLongCoder.of();
-
-  private static final List<Long> TEST_VALUES = Arrays.asList(
-      -11L, -3L, -1L, 0L, 1L, 5L, 13L, 29L,
-      Integer.MAX_VALUE + 131L,
-      Integer.MIN_VALUE - 29L,
-      Long.MAX_VALUE,
-      Long.MIN_VALUE);
-
-  @Test
-  public void testDecodeEncodeEqual() throws Exception {
-    for (Long value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
-    }
-  }
-
-  // This should never change. The definition of big endian is fixed.
-  private static final String EXPECTED_ENCODING_ID = "";
-
-  @Test
-  public void testEncodingId() throws Exception {
-    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
-  }
-
-  /**
-   * Generated data to check that the wire format has not changed. To regenerate, see
-   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
-   */
-  private static final List<String> TEST_ENCODINGS = Arrays.asList(
-      "__________U",
-      "__________0",
-      "__________8",
-      "AAAAAAAAAAA",
-      "AAAAAAAAAAE",
-      "AAAAAAAAAAU",
-      "AAAAAAAAAA0",
-      "AAAAAAAAAB0",
-      "AAAAAIAAAII",
-      "_____3___-M",
-      "f_________8",
-      "gAAAAAAAAAA");
-
-  @Test
-  public void testWireFormatEncode() throws Exception {
-    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
-  }
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void encodeNullThrowsCoderException() throws Exception {
-    thrown.expect(CoderException.class);
-    thrown.expectMessage("cannot encode a null Long");
-
-    CoderUtils.encodeToBase64(TEST_CODER, null);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
deleted file mode 100644
index 989bc7f..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteArrayCoderTest.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.cloud.dataflow.sdk.util.common.CounterTestUtils;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Unit tests for {@link ByteArrayCoder}.
- */
-@RunWith(JUnit4.class)
-public class ByteArrayCoderTest {
-
-  private static final ByteArrayCoder TEST_CODER = ByteArrayCoder.of();
-
-  private static final List<byte[]> TEST_VALUES = Arrays.asList(
-    new byte[]{0xa, 0xb, 0xc},
-    new byte[]{0xd, 0x3},
-    new byte[]{0xd, 0xe},
-    new byte[]{});
-
-  @Test
-  public void testDecodeEncodeEquals() throws Exception {
-    for (byte[] value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
-    }
-  }
-
-  @Test
-  public void testRegisterByteSizeObserver() throws Exception {
-    CounterTestUtils.testByteCount(ByteArrayCoder.of(), Coder.Context.OUTER,
-                                   new byte[][]{{ 0xa, 0xb, 0xc }});
-
-    CounterTestUtils.testByteCount(ByteArrayCoder.of(), Coder.Context.NESTED,
-                                   new byte[][]{{ 0xa, 0xb, 0xc }, {}, {}, { 0xd, 0xe }, {}});
-  }
-
-  @Test
-  public void testStructuralValueConsistentWithEquals() throws Exception {
-    // We know that byte array coders are NOT compatible with equals
-    // (aka injective w.r.t. Object.equals)
-    for (byte[] value1 : TEST_VALUES) {
-      for (byte[] value2 : TEST_VALUES) {
-        CoderProperties.structuralValueConsistentWithEquals(TEST_CODER, value1, value2);
-      }
-    }
-  }
-
-  @Test
-  public void testEncodeThenMutate() throws Exception {
-    byte[] input = { 0x7, 0x3, 0xA, 0xf };
-    byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, input);
-    input[1] = 0x9;
-    byte[] decoded = CoderUtils.decodeFromByteArray(TEST_CODER, encoded);
-
-    // now that I have mutated the input, the output should NOT match
-    assertThat(input, not(equalTo(decoded)));
-  }
-
-  @Test
-  public void testEncodeAndOwn() throws Exception {
-    for (byte[] value : TEST_VALUES) {
-      byte[] encodedSlow = CoderUtils.encodeToByteArray(TEST_CODER, value);
-      byte[] encodedFast = encodeToByteArrayAndOwn(TEST_CODER, value);
-      assertThat(encodedSlow, equalTo(encodedFast));
-    }
-  }
-
-  private static byte[] encodeToByteArrayAndOwn(ByteArrayCoder coder, byte[] value)
-      throws IOException {
-    return encodeToByteArrayAndOwn(coder, value, Coder.Context.OUTER);
-  }
-
-  private static byte[] encodeToByteArrayAndOwn(
-      ByteArrayCoder coder, byte[] value, Coder.Context context) throws IOException {
-    ByteArrayOutputStream os = new ByteArrayOutputStream();
-    coder.encodeAndOwn(value, os, context);
-    return os.toByteArray();
-  }
-
-  // If this changes, it implies the binary format has changed.
-  private static final String EXPECTED_ENCODING_ID = "";
-
-  @Test
-  public void testEncodingId() throws Exception {
-    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
-  }
-
-  /**
-   * Generated data to check that the wire format has not changed. To regenerate, see
-   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
-   */
-  private static final List<String> TEST_ENCODINGS = Arrays.asList(
-      "CgsM",
-      "DQM",
-      "DQ4",
-      "");
-
-  @Test
-  public void testWireFormatEncode() throws Exception {
-    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
-  }
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void encodeNullThrowsCoderException() throws Exception {
-    thrown.expect(CoderException.class);
-    thrown.expectMessage("cannot encode a null byte[]");
-
-    CoderUtils.encodeToBase64(TEST_CODER, null);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java
deleted file mode 100644
index 6cb852e..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteCoderTest.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Test case for {@link ByteCoder}.
- */
-@RunWith(JUnit4.class)
-public class ByteCoderTest {
-
-  private static final Coder<Byte> TEST_CODER = ByteCoder.of();
-
-  private static final List<Byte> TEST_VALUES = Arrays.asList(
-      (byte) 1,
-      (byte) 4,
-      (byte) 6,
-      (byte) 50,
-      (byte) 124,
-      Byte.MAX_VALUE,
-      Byte.MIN_VALUE);
-
-  @Test
-  public void testDecodeEncodeEqual() throws Exception {
-    for (Byte value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
-    }
-  }
-
-  // This should never change. The format is fixed by Java.
-  private static final String EXPECTED_ENCODING_ID = "";
-
-  @Test
-  public void testEncodingId() throws Exception {
-    CoderProperties.coderHasEncodingId(TEST_CODER, EXPECTED_ENCODING_ID);
-  }
-
-  /**
-   * Generated data to check that the wire format has not changed. To regenerate, see
-   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
-   */
-  private static final List<String> TEST_ENCODINGS = Arrays.asList(
-      "AQ",
-      "BA",
-      "Bg",
-      "Mg",
-      "fA",
-      "fw",
-      "gA");
-
-  @Test
-  public void testWireFormatEncode() throws Exception {
-    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
-  }
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void encodeNullThrowsCoderException() throws Exception {
-    thrown.expect(CoderException.class);
-    thrown.expectMessage("cannot encode a null Byte");
-
-    CoderUtils.encodeToBase64(TEST_CODER, null);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java
deleted file mode 100644
index debae71..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/ByteStringCoderTest.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import com.google.cloud.dataflow.sdk.coders.Coder.Context;
-import com.google.cloud.dataflow.sdk.testing.CoderProperties;
-import com.google.cloud.dataflow.sdk.util.CoderUtils;
-import com.google.common.collect.ImmutableList;
-import com.google.protobuf.ByteString;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Test case for {@link ByteStringCoder}.
- */
-@RunWith(JUnit4.class)
-public class ByteStringCoderTest {
-
-  private static final ByteStringCoder TEST_CODER = ByteStringCoder.of();
-
-  private static final List<String> TEST_STRING_VALUES = Arrays.asList(
-      "", "a", "13", "hello",
-      "a longer string with spaces and all that",
-      "a string with a \n newline",
-      "???????????????");
-  private static final ImmutableList<ByteString> TEST_VALUES;
-  static {
-    ImmutableList.Builder<ByteString> builder = ImmutableList.<ByteString>builder();
-    for (String s : TEST_STRING_VALUES) {
-      builder.add(ByteString.copyFrom(s.getBytes()));
-    }
-    TEST_VALUES = builder.build();
-  }
-
-  /**
-   * Generated data to check that the wire format has not changed. To regenerate, see
-   * {@link com.google.cloud.dataflow.sdk.coders.PrintBase64Encodings}.
-   */
-  private static final List<String> TEST_ENCODINGS = Arrays.asList(
-      "",
-      "YQ",
-      "MTM",
-      "aGVsbG8",
-      "YSBsb25nZXIgc3RyaW5nIHdpdGggc3BhY2VzIGFuZCBhbGwgdGhhdA",
-      "YSBzdHJpbmcgd2l0aCBhIAogbmV3bGluZQ",
-      "Pz8_Pz8_Pz8_Pz8_Pz8_");
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void testDecodeEncodeEqualInAllContexts() throws Exception {
-    for (ByteString value : TEST_VALUES) {
-      CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value);
-    }
-  }
-
-  @Test
-  public void testWireFormatEncode() throws Exception {
-    CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS);
-  }
-
-  @Test
-  public void testCoderDeterministic() throws Throwable {
-    TEST_CODER.verifyDeterministic();
-  }
-
-  @Test
-  public void testConsistentWithEquals() {
-    assertTrue(TEST_CODER.consistentWithEquals());
-  }
-
-  @Test
-  public void testEncodeNullThrowsCoderException() throws Exception {
-    thrown.expect(CoderException.class);
-    thrown.expectMessage("cannot encode a null ByteString");
-
-    CoderUtils.encodeToBase64(TEST_CODER, null);
-  }
-
-  @Test
-  public void testNestedCoding() throws Throwable {
-    Coder<List<ByteString>> listCoder = ListCoder.of(TEST_CODER);
-    CoderProperties.coderDecodeEncodeContentsEqual(listCoder, TEST_VALUES);
-    CoderProperties.coderDecodeEncodeContentsInSameOrder(listCoder, TEST_VALUES);
-  }
-
-  @Test
-  public void testEncodedElementByteSizeInAllContexts() throws Throwable {
-    for (Context context : CoderProperties.ALL_CONTEXTS) {
-      for (ByteString value : TEST_VALUES) {
-        byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, value, context);
-        assertEquals(encoded.length, TEST_CODER.getEncodedElementByteSize(value, context));
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java
deleted file mode 100644
index 8d702bf..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderFactoriesTest.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static org.junit.Assert.assertEquals;
-
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Arrays;
-import java.util.Collections;
-
-/**
- * Tests for {@link CoderFactories}.
- */
-@RunWith(JUnit4.class)
-public class CoderFactoriesTest {
-
-  /**
-   * Ensures that a few of our standard atomic coder classes
-   * can each be built into a factory that works as expected.
-   * It is presumed that testing a few, not all, suffices to
-   * exercise CoderFactoryFromStaticMethods.
-   */
-  @Test
-  public void testAtomicCoderClassFactories() {
-    checkAtomicCoderFactory(StringUtf8Coder.class, StringUtf8Coder.of());
-    checkAtomicCoderFactory(DoubleCoder.class, DoubleCoder.of());
-    checkAtomicCoderFactory(ByteArrayCoder.class, ByteArrayCoder.of());
-  }
-
-  /**
-   * Checks that {#link CoderFactories.fromStaticMethods} successfully
-   * builds a working {@link CoderFactory} from {@link KvCoder KvCoder.class}.
-   */
-  @Test
-  public void testKvCoderFactory() {
-    CoderFactory kvCoderFactory = CoderFactories.fromStaticMethods(KvCoder.class);
-    assertEquals(
-        KvCoder.of(DoubleCoder.of(), DoubleCoder.of()),
-        kvCoderFactory.create(Arrays.asList(DoubleCoder.of(), DoubleCoder.of())));
-  }
-
-  /**
-   * Checks that {#link CoderFactories.fromStaticMethods} successfully
-   * builds a working {@link CoderFactory} from {@link ListCoder ListCoder.class}.
-   */
-  @Test
-  public void testListCoderFactory() {
-    CoderFactory listCoderFactory = CoderFactories.fromStaticMethods(ListCoder.class);
-
-    assertEquals(
-        ListCoder.of(DoubleCoder.of()),
-        listCoderFactory.create(Arrays.asList(DoubleCoder.of())));
-  }
-
-  /**
-   * Checks that {#link CoderFactories.fromStaticMethods} successfully
-   * builds a working {@link CoderFactory} from {@link IterableCoder IterableCoder.class}.
-   */
-  @Test
-  public void testIterableCoderFactory() {
-    CoderFactory iterableCoderFactory = CoderFactories.fromStaticMethods(IterableCoder.class);
-
-    assertEquals(
-        IterableCoder.of(DoubleCoder.of()),
-        iterableCoderFactory.create(Arrays.asList(DoubleCoder.of())));
-  }
-
-  ///////////////////////////////////////////////////////////////////////
-
-  /**
-   * Checks that an atomic coder class can be converted into
-   * a factory that then yields a coder equal to the example
-   * provided.
-   */
-  private <T> void checkAtomicCoderFactory(
-      Class<? extends Coder<T>> coderClazz,
-      Coder<T> expectedCoder) {
-    CoderFactory factory = CoderFactories.fromStaticMethods(coderClazz);
-    @SuppressWarnings("unchecked")
-    Coder<T> actualCoder = (Coder<T>) factory.create(Collections.<Coder<?>>emptyList());
-    assertEquals(expectedCoder, actualCoder);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
deleted file mode 100644
index 1c0a89e..0000000
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/coders/CoderProvidersTest.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2014 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.coders;
-
-import static org.hamcrest.Matchers.instanceOf;
-import static org.junit.Assert.assertThat;
-
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-import java.util.Map;
-
-/**
- * Tests for {@link CoderFactories}.
- */
-@RunWith(JUnit4.class)
-public class CoderProvidersTest {
-
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
-  @Test
-  public void testAvroThenSerializableStringMap() throws Exception {
-    CoderProvider provider = CoderProviders.firstOf(AvroCoder.PROVIDER, SerializableCoder.PROVIDER);
-    Coder<Map<String, String>> coder =
-        provider.getCoder(new TypeDescriptor<Map<String, String>>(){});
-    assertThat(coder, instanceOf(AvroCoder.class));
-  }
-
-  @Test
-  public void testThrowingThenSerializable() throws Exception {
-    CoderProvider provider =
-        CoderProviders.firstOf(new ThrowingCoderProvider(), SerializableCoder.PROVIDER);
-    Coder<Integer> coder = provider.getCoder(new TypeDescriptor<Integer>(){});
-    assertThat(coder, instanceOf(SerializableCoder.class));
-  }
-
-  @Test
-  public void testNullThrows() throws Exception {
-    CoderProvider provider = CoderProviders.firstOf(new ThrowingCoderProvider());
-    thrown.expect(CannotProvideCoderException.class);
-    thrown.expectMessage("ThrowingCoderProvider");
-    provider.getCoder(new TypeDescriptor<Integer>(){});
-  }
-
-  private static class ThrowingCoderProvider implements CoderProvider {
-    @Override
-    public <T> Coder<T> getCoder(TypeDescriptor<T> type) throws CannotProvideCoderException {
-      throw new CannotProvideCoderException("ThrowingCoderProvider cannot ever provide a Coder");
-    }
-  }
-}

[38/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
deleted file mode 100644
index dd3d83a..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineWorkerPoolOptions.java
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-import java.util.List;
-
-/**
- * Options that are used to configure the Dataflow pipeline worker pool.
- */
-@Description("Options that are used to configure the Dataflow pipeline worker pool.")
-public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
-  /**
-   * Number of workers to use when executing the Dataflow job. Note that selection of an autoscaling
-   * algorithm other then {@code NONE} will affect the size of the worker pool. If left unspecified,
-   * the Dataflow service will determine the number of workers.
-   */
-  @Description("Number of workers to use when executing the Dataflow job. Note that "
-      + "selection of an autoscaling algorithm other then \"NONE\" will affect the "
-      + "size of the worker pool. If left unspecified, the Dataflow service will "
-      + "determine the number of workers.")
-  int getNumWorkers();
-  void setNumWorkers(int value);
-
-  /**
-   * Type of autoscaling algorithm to use.
-   */
-  @Experimental(Experimental.Kind.AUTOSCALING)
-  public enum AutoscalingAlgorithmType {
-    /** Use numWorkers machines. Do not autoscale the worker pool. */
-    NONE("AUTOSCALING_ALGORITHM_NONE"),
-
-    @Deprecated
-    BASIC("AUTOSCALING_ALGORITHM_BASIC"),
-
-    /** Autoscale the workerpool based on throughput (up to maxNumWorkers). */
-    THROUGHPUT_BASED("AUTOSCALING_ALGORITHM_BASIC");
-
-    private final String algorithm;
-
-    private AutoscalingAlgorithmType(String algorithm) {
-      this.algorithm = algorithm;
-    }
-
-    /** Returns the string representation of this type. */
-    public String getAlgorithm() {
-      return this.algorithm;
-    }
-  }
-
-  /**
-   * [Experimental] The autoscaling algorithm to use for the workerpool.
-   *
-   * <ul>
-   *   <li>NONE: does not change the size of the worker pool.</li>
-   *   <li>BASIC: autoscale the worker pool size up to maxNumWorkers until the job completes.</li>
-   *   <li>THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).
-   *   </li>
-   * </ul>
-   */
-  @Description("[Experimental] The autoscaling algorithm to use for the workerpool. "
-      + "NONE: does not change the size of the worker pool. "
-      + "BASIC (deprecated): autoscale the worker pool size up to maxNumWorkers until the job "
-      + "completes. "
-      + "THROUGHPUT_BASED: autoscale the workerpool based on throughput (up to maxNumWorkers).")
-  @Experimental(Experimental.Kind.AUTOSCALING)
-  AutoscalingAlgorithmType getAutoscalingAlgorithm();
-  void setAutoscalingAlgorithm(AutoscalingAlgorithmType value);
-
-  /**
-   * The maximum number of workers to use for the workerpool. This options limits the size of the
-   * workerpool for the lifetime of the job, including
-   * <a href="https://cloud.google.com/dataflow/pipelines/updating-a-pipeline">pipeline updates</a>.
-   * If left unspecified, the Dataflow service will compute a ceiling.
-   */
-  @Description("The maximum number of workers to use for the workerpool. This options limits the "
-      + "size of the workerpool for the lifetime of the job, including pipeline updates. "
-      + "If left unspecified, the Dataflow service will compute a ceiling.")
-  int getMaxNumWorkers();
-  void setMaxNumWorkers(int value);
-
-  /**
-   * Remote worker disk size, in gigabytes, or 0 to use the default size.
-   */
-  @Description("Remote worker disk size, in gigabytes, or 0 to use the default size.")
-  int getDiskSizeGb();
-  void setDiskSizeGb(int value);
-
-  /**
-   * Docker container image that executes Dataflow worker harness, residing in Google Container
-   * Registry.
-   */
-  @Default.InstanceFactory(WorkerHarnessContainerImageFactory.class)
-  @Description("Docker container image that executes Dataflow worker harness, residing in Google "
-      + " Container Registry.")
-  @Hidden
-  String getWorkerHarnessContainerImage();
-  void setWorkerHarnessContainerImage(String value);
-
-  /**
-   * Returns the default Docker container image that executes Dataflow worker harness, residing in
-   * Google Container Registry.
-   */
-  public static class WorkerHarnessContainerImageFactory
-      implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-      if (dataflowOptions.isStreaming()) {
-        return DataflowPipelineRunner.STREAMING_WORKER_HARNESS_CONTAINER_IMAGE;
-      } else {
-        return DataflowPipelineRunner.BATCH_WORKER_HARNESS_CONTAINER_IMAGE;
-      }
-    }
-  }
-
-  /**
-   * GCE <a href="https://cloud.google.com/compute/docs/networking">network</a> for launching
-   * workers.
-   *
-   * <p>Default is up to the Dataflow service.
-   */
-  @Description("GCE network for launching workers. For more information, see the reference "
-      + "documentation https://cloud.google.com/compute/docs/networking. "
-      + "Default is up to the Dataflow service.")
-  String getNetwork();
-  void setNetwork(String value);
-
-  /**
-   * GCE <a href="https://cloud.google.com/compute/docs/networking">subnetwork</a> for launching
-   * workers.
-   *
-   * <p>Default is up to the Dataflow service. Expected format is zones/ZONE/subnetworks/SUBNETWORK.
-   */
-  @Description("GCE subnetwork for launching workers. For more information, see the reference "
-      + "documentation https://cloud.google.com/compute/docs/networking. "
-      + "Default is up to the Dataflow service.")
-  String getSubnetwork();
-  void setSubnetwork(String value);
-
-  /**
-   * GCE <a href="https://developers.google.com/compute/docs/zones"
-   * >availability zone</a> for launching workers.
-   *
-   * <p>Default is up to the Dataflow service.
-   */
-  @Description("GCE availability zone for launching workers. See "
-      + "https://developers.google.com/compute/docs/zones for a list of valid options. "
-      + "Default is up to the Dataflow service.")
-  String getZone();
-  void setZone(String value);
-
-  /**
-   * Machine type to create Dataflow worker VMs as.
-   *
-   * <p>See <a href="https://cloud.google.com/compute/docs/machine-types">GCE machine types</a>
-   * for a list of valid options.
-   *
-   * <p>If unset, the Dataflow service will choose a reasonable default.
-   */
-  @Description("Machine type to create Dataflow worker VMs as. See "
-      + "https://cloud.google.com/compute/docs/machine-types for a list of valid options. "
-      + "If unset, the Dataflow service will choose a reasonable default.")
-  String getWorkerMachineType();
-  void setWorkerMachineType(String value);
-
-  /**
-   * The policy for tearing down the workers spun up by the service.
-   */
-  public enum TeardownPolicy {
-    /**
-     * All VMs created for a Dataflow job are deleted when the job finishes, regardless of whether
-     * it fails or succeeds.
-     */
-    TEARDOWN_ALWAYS("TEARDOWN_ALWAYS"),
-    /**
-     * All VMs created for a Dataflow job are left running when the job finishes, regardless of
-     * whether it fails or succeeds.
-     */
-    TEARDOWN_NEVER("TEARDOWN_NEVER"),
-    /**
-     * All VMs created for a Dataflow job are deleted when the job succeeds, but are left running
-     * when it fails. (This is typically used for debugging failing jobs by SSHing into the
-     * workers.)
-     */
-    TEARDOWN_ON_SUCCESS("TEARDOWN_ON_SUCCESS");
-
-    private final String teardownPolicy;
-
-    private TeardownPolicy(String teardownPolicy) {
-      this.teardownPolicy = teardownPolicy;
-    }
-
-    public String getTeardownPolicyName() {
-      return this.teardownPolicy;
-    }
-  }
-
-  /**
-   * The teardown policy for the VMs.
-   *
-   * <p>If unset, the Dataflow service will choose a reasonable default.
-   */
-  @Description("The teardown policy for the VMs. If unset, the Dataflow service will "
-      + "choose a reasonable default.")
-  TeardownPolicy getTeardownPolicy();
-  void setTeardownPolicy(TeardownPolicy value);
-
-  /**
-   * List of local files to make available to workers.
-   *
-   * <p>Files are placed on the worker's classpath.
-   *
-   * <p>The default value is the list of jars from the main program's classpath.
-   */
-  @Description("Files to stage on GCS and make available to workers. "
-      + "Files are placed on the worker's classpath. "
-      + "The default value is all files from the classpath.")
-  @JsonIgnore
-  List<String> getFilesToStage();
-  void setFilesToStage(List<String> value);
-
-  /**
-   * Specifies what type of persistent disk should be used. The value should be a full or partial
-   * URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For
-   * more information, see the
-   * <a href="https://cloud.google.com/compute/docs/reference/latest/diskTypes">API reference
-   * documentation for DiskTypes</a>.
-   */
-  @Description("Specifies what type of persistent disk should be used. The value should be a full "
-      + "or partial URL of a disk type resource, e.g., zones/us-central1-f/disks/pd-standard. For "
-      + "more information, see the API reference documentation for DiskTypes: "
-      + "https://cloud.google.com/compute/docs/reference/latest/diskTypes")
-  String getWorkerDiskType();
-  void setWorkerDiskType(String value);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java
deleted file mode 100644
index 8ad2ba2..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowProfilingOptions.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-
-import java.util.HashMap;
-
-/**
- * Options for controlling profiling of pipeline execution.
- */
-@Description("[Experimental] Used to configure profiling of the Dataflow pipeline")
-@Experimental
-@Hidden
-public interface DataflowProfilingOptions {
-
-  @Description("Whether to periodically dump profiling information to local disk.\n"
-      + "WARNING: Enabling this option may fill local disk with profiling information.")
-  boolean getEnableProfilingAgent();
-  void setEnableProfilingAgent(boolean enabled);
-
-  @Description(
-      "[INTERNAL] Additional configuration for the profiling agent. Not typically necessary.")
-  @Hidden
-  DataflowProfilingAgentConfiguration getProfilingAgentConfiguration();
-  void setProfilingAgentConfiguration(DataflowProfilingAgentConfiguration configuration);
-
-  /**
-   * Configuration the for profiling agent.
-   */
-  public static class DataflowProfilingAgentConfiguration extends HashMap<String, Object> {
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
deleted file mode 100644
index e4b1d72..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerHarnessOptions.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-/**
- * Options that are used exclusively within the Dataflow worker harness.
- * These options have no effect at pipeline creation time.
- */
-@Description("[Internal] Options that are used exclusively within the Dataflow worker harness. "
-    + "These options have no effect at pipeline creation time.")
-@Hidden
-public interface DataflowWorkerHarnessOptions extends DataflowPipelineOptions {
-  /**
-   * The identity of the worker running this pipeline.
-   */
-  @Description("The identity of the worker running this pipeline.")
-  String getWorkerId();
-  void setWorkerId(String value);
-
-  /**
-   * The identity of the Dataflow job.
-   */
-  @Description("The identity of the Dataflow job.")
-  String getJobId();
-  void setJobId(String value);
-
-  /**
-   * The size of the worker's in-memory cache, in megabytes.
-   *
-   * <p>Currently, this cache is used for storing read values of side inputs.
-   */
-  @Description("The size of the worker's in-memory cache, in megabytes.")
-  @Default.Integer(100)
-  Integer getWorkerCacheMb();
-  void setWorkerCacheMb(Integer value);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
deleted file mode 100644
index 2328873..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowWorkerLoggingOptions.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Options that are used to control logging configuration on the Dataflow worker.
- */
-@Description("Options that are used to control logging configuration on the Dataflow worker.")
-public interface DataflowWorkerLoggingOptions extends PipelineOptions {
-  /**
-   * The set of log levels that can be used on the Dataflow worker.
-   */
-  public enum Level {
-    DEBUG, ERROR, INFO, TRACE, WARN
-  }
-
-  /**
-   * This option controls the default log level of all loggers without a log level override.
-   */
-  @Description("Controls the default log level of all loggers without a log level override.")
-  @Default.Enum("INFO")
-  Level getDefaultWorkerLogLevel();
-  void setDefaultWorkerLogLevel(Level level);
-
-  /**
-   * This option controls the log levels for specifically named loggers.
-   *
-   * <p>Later options with equivalent names override earlier options.
-   *
-   * <p>See {@link WorkerLogLevelOverrides} for more information on how to configure logging
-   * on a per {@link Class}, {@link Package}, or name basis. If used from the command line,
-   * the expected format is {"Name":"Level",...}, further details on
-   * {@link WorkerLogLevelOverrides#from}.
-   */
-  @Description("This option controls the log levels for specifically named loggers. "
-      + "The expected format is {\"Name\":\"Level\",...}. The Dataflow worker uses "
-      + "java.util.logging, which supports a logging hierarchy based off of names that are '.' "
-      + "separated. For example, by specifying the value {\"a.b.c.Foo\":\"DEBUG\"}, the logger "
-      + "for the class 'a.b.c.Foo' will be configured to output logs at the DEBUG level. "
-      + "Similarly, by specifying the value {\"a.b.c\":\"WARN\"}, all loggers underneath the "
-      + "'a.b.c' package will be configured to output logs at the WARN level. Also, note that "
-      + "when multiple overrides are specified, the exact name followed by the closest parent "
-      + "takes precedence.")
-  WorkerLogLevelOverrides getWorkerLogLevelOverrides();
-  void setWorkerLogLevelOverrides(WorkerLogLevelOverrides value);
-
-  /**
-   * Defines a log level override for a specific class, package, or name.
-   *
-   * <p>{@code java.util.logging} is used on the Dataflow worker harness and supports
-   * a logging hierarchy based off of names that are "." separated. It is a common
-   * pattern to have the logger for a given class share the same name as the class itself.
-   * Given the classes {@code a.b.c.Foo}, {@code a.b.c.Xyz}, and {@code a.b.Bar}, with
-   * loggers named {@code "a.b.c.Foo"}, {@code "a.b.c.Xyz"}, and {@code "a.b.Bar"} respectively,
-   * we can override the log levels:
-   * <ul>
-   *    <li>for {@code Foo} by specifying the name {@code "a.b.c.Foo"} or the {@link Class}
-   *    representing {@code a.b.c.Foo}.
-   *    <li>for {@code Foo}, {@code Xyz}, and {@code Bar} by specifying the name {@code "a.b"} or
-   *    the {@link Package} representing {@code a.b}.
-   *    <li>for {@code Foo} and {@code Bar} by specifying both of their names or classes.
-   * </ul>
-   * Note that by specifying multiple overrides, the exact name followed by the closest parent
-   * takes precedence.
-   */
-  public static class WorkerLogLevelOverrides extends HashMap<String, Level> {
-    /**
-     * Overrides the default log level for the passed in class.
-     *
-     * <p>This is equivalent to calling
-     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
-     * and passing in the {@link Class#getName() class name}.
-     */
-    public WorkerLogLevelOverrides addOverrideForClass(Class<?> klass, Level level) {
-      Preconditions.checkNotNull(klass, "Expected class to be not null.");
-      addOverrideForName(klass.getName(), level);
-      return this;
-    }
-
-    /**
-     * Overrides the default log level for the passed in package.
-     *
-     * <p>This is equivalent to calling
-     * {@link #addOverrideForName(String, DataflowWorkerLoggingOptions.Level)}
-     * and passing in the {@link Package#getName() package name}.
-     */
-    public WorkerLogLevelOverrides addOverrideForPackage(Package pkg, Level level) {
-      Preconditions.checkNotNull(pkg, "Expected package to be not null.");
-      addOverrideForName(pkg.getName(), level);
-      return this;
-    }
-
-    /**
-     * Overrides the default log level for the passed in name.
-     *
-     * <p>Note that because of the hierarchical nature of logger names, this will
-     * override the log level of all loggers that have the passed in name or
-     * a parent logger that has the passed in name.
-     */
-    public WorkerLogLevelOverrides addOverrideForName(String name, Level level) {
-      Preconditions.checkNotNull(name, "Expected name to be not null.");
-      Preconditions.checkNotNull(level,
-          "Expected level to be one of %s.", Arrays.toString(Level.values()));
-      put(name, level);
-      return this;
-    }
-
-    /**
-     * Expects a map keyed by logger {@code Name}s with values representing {@code Level}s.
-     * The {@code Name} generally represents the fully qualified Java
-     * {@link Class#getName() class name}, or fully qualified Java
-     * {@link Package#getName() package name}, or custom logger name. The {@code Level}
-     * represents the log level and must be one of {@link Level}.
-     */
-    @JsonCreator
-    public static WorkerLogLevelOverrides from(Map<String, String> values) {
-      Preconditions.checkNotNull(values, "Expected values to be not null.");
-      WorkerLogLevelOverrides overrides = new WorkerLogLevelOverrides();
-      for (Map.Entry<String, String> entry : values.entrySet()) {
-        try {
-          overrides.addOverrideForName(entry.getKey(), Level.valueOf(entry.getValue()));
-        } catch (IllegalArgumentException e) {
-          throw new IllegalArgumentException(String.format(
-              "Unsupported log level '%s' requested for %s. Must be one of %s.",
-              entry.getValue(), entry.getKey(), Arrays.toString(Level.values())));
-        }
-
-      }
-      return overrides;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
deleted file mode 100644
index 46ff682..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Default.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-/**
- * {@link Default} represents a set of annotations that can be used to annotate getter properties
- * on {@link PipelineOptions} with information representing the default value to be returned
- * if no value is specified.
- */
-public @interface Default {
-  /**
-   * This represents that the default of the option is the specified {@link java.lang.Class} value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Class {
-    java.lang.Class<?> value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified {@link java.lang.String}
-   * value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface String {
-    java.lang.String value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified boolean primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Boolean {
-    boolean value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified char primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Character {
-    char value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified byte primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Byte {
-    byte value();
-  }
-  /**
-   * This represents that the default of the option is the specified short primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Short {
-    short value();
-  }
-  /**
-   * This represents that the default of the option is the specified int primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Integer {
-    int value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified long primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Long {
-    long value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified float primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Float {
-    float value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified double primitive value.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Double {
-    double value();
-  }
-
-  /**
-   * This represents that the default of the option is the specified enum.
-   * The value should equal the enum's {@link java.lang.Enum#name() name}.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface Enum {
-    java.lang.String value();
-  }
-
-  /**
-   * Value must be of type {@link DefaultValueFactory} and have a default constructor.
-   * Value is instantiated and then used as a factory to generate the default.
-   *
-   * <p>See {@link DefaultValueFactory} for more details.
-   */
-  @Target(ElementType.METHOD)
-  @Retention(RetentionPolicy.RUNTIME)
-  @Documented
-  public @interface InstanceFactory {
-    java.lang.Class<? extends DefaultValueFactory<?>> value();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
deleted file mode 100644
index 1faedb7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DefaultValueFactory.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-/**
- * An interface used with the {@link Default.InstanceFactory} annotation to specify the class that
- * will be an instance factory to produce default values for a given getter on
- * {@link PipelineOptions}. When a property on a {@link PipelineOptions} is fetched, and is
- * currently unset, the default value factory will be instantiated and invoked.
- *
- * <p>Care must be taken to not produce an infinite loop when accessing other fields on the
- * {@link PipelineOptions} object.
- *
- * @param <T> The type of object this factory produces.
- */
-public interface DefaultValueFactory<T> {
-  /**
-   * Creates a default value for a getter marked with {@link Default.InstanceFactory}.
-   *
-   * @param options The current pipeline options.
-   * @return The default value to be used for the annotated getter.
-   */
-  T create(PipelineOptions options);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
deleted file mode 100644
index 9ceaf58..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Description.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-/**
- * Descriptions are used to generate human readable output when the {@code --help}
- * command is specified. Description annotations placed on interfaces that extend
- * {@link PipelineOptions} will describe groups of related options. Description annotations
- * placed on getter methods will be used to provide human readable information
- * for the specific option.
- */
-@Target({ElementType.METHOD, ElementType.TYPE})
-@Retention(RetentionPolicy.RUNTIME)
-public @interface Description {
-  String value();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
deleted file mode 100644
index 0867740..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DirectPipelineOptions.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.runners.DirectPipeline;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-/**
- * Options that can be used to configure the {@link DirectPipeline}.
- */
-public interface DirectPipelineOptions extends
-    ApplicationNameOptions, BigQueryOptions, GcsOptions, GcpOptions,
-    PipelineOptions, StreamingOptions {
-
-  /**
-   * The random seed to use for pseudorandom behaviors in the {@link DirectPipelineRunner}.
-   * If not explicitly specified, a random seed will be generated.
-   */
-  @JsonIgnore
-  @Description("The random seed to use for pseudorandom behaviors in the DirectPipelineRunner."
-      + " If not explicitly specified, a random seed will be generated.")
-  Long getDirectPipelineRunnerRandomSeed();
-  void setDirectPipelineRunnerRandomSeed(Long value);
-
-  /**
-   * Controls whether the runner should ensure that all of the elements of
-   * the pipeline, such as DoFns, can be serialized.
-   */
-  @JsonIgnore
-  @Description("Controls whether the runner should ensure that all of the elements of the "
-      + "pipeline, such as DoFns, can be serialized.")
-  @Default.Boolean(true)
-  boolean isTestSerializability();
-  void setTestSerializability(boolean testSerializability);
-
-  /**
-   * Controls whether the runner should ensure that all of the elements of
-   * every {@link PCollection} can be encoded using the appropriate
-   * {@link Coder}.
-   */
-  @JsonIgnore
-  @Description("Controls whether the runner should ensure that all of the elements of every "
-      + "PCollection can be encoded using the appropriate Coder.")
-  @Default.Boolean(true)
-  boolean isTestEncodability();
-  void setTestEncodability(boolean testEncodability);
-
-  /**
-   * Controls whether the runner should randomize the order of each
-   * {@link PCollection}.
-   */
-  @JsonIgnore
-  @Description("Controls whether the runner should randomize the order of each PCollection.")
-  @Default.Boolean(true)
-  boolean isTestUnorderedness();
-  void setTestUnorderedness(boolean testUnorderedness);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
deleted file mode 100644
index 7b70f4c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.api.client.auth.oauth2.Credential;
-import com.google.api.client.googleapis.auth.oauth2.GoogleOAuthConstants;
-import com.google.cloud.dataflow.sdk.util.CredentialFactory;
-import com.google.cloud.dataflow.sdk.util.GcpCredentialFactory;
-import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.io.Files;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.security.GeneralSecurityException;
-import java.util.Locale;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Options used to configure Google Cloud Platform project and credentials.
- *
- * <p>These options configure which of the following three different mechanisms for obtaining a
- * credential are used:
- * <ol>
- *   <li>
- *     It can fetch the
- *     <a href="https://developers.google.com/accounts/docs/application-default-credentials">
- *     application default credentials</a>.
- *   </li>
- *   <li>
- *     The user can specify a client secrets file and go through the OAuth2
- *     webflow. The credential will then be cached in the user's home
- *     directory for reuse.
- *   </li>
- *   <li>
- *     The user can specify a file containing a service account private key along
- *     with the service account name.
- *   </li>
- * </ol>
- *
- * <p>The default mechanism is to use the
- * <a href="https://developers.google.com/accounts/docs/application-default-credentials">
- * application default credentials</a>. The other options can be
- * used by setting the corresponding properties.
- */
-@Description("Options used to configure Google Cloud Platform project and credentials.")
-public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
-  /**
-   * Project id to use when launching jobs.
-   */
-  @Description("Project id. Required when running a Dataflow in the cloud. "
-      + "See https://cloud.google.com/storage/docs/projects for further details.")
-  @Default.InstanceFactory(DefaultProjectFactory.class)
-  String getProject();
-  void setProject(String value);
-
-  /**
-   * This option controls which file to use when attempting to create the credentials using the
-   * service account method.
-   *
-   * <p>This option if specified, needs be combined with the
-   * {@link GcpOptions#getServiceAccountName() serviceAccountName}.
-   */
-  @JsonIgnore
-  @Description("Controls which file to use when attempting to create the credentials "
-      + "using the service account method. This option if specified, needs to be combined with "
-      + "the serviceAccountName option.")
-  String getServiceAccountKeyfile();
-  void setServiceAccountKeyfile(String value);
-
-  /**
-   * This option controls which service account to use when attempting to create the credentials
-   * using the service account method.
-   *
-   * <p>This option if specified, needs be combined with the
-   * {@link GcpOptions#getServiceAccountKeyfile() serviceAccountKeyfile}.
-   */
-  @JsonIgnore
-  @Description("Controls which service account to use when attempting to create the credentials "
-      + "using the service account method. This option if specified, needs to be combined with "
-      + "the serviceAccountKeyfile option.")
-  String getServiceAccountName();
-  void setServiceAccountName(String value);
-
-  /**
-   * This option controls which file to use when attempting to create the credentials
-   * using the OAuth 2 webflow. After the OAuth2 webflow, the credentials will be stored
-   * within credentialDir.
-   */
-  @JsonIgnore
-  @Description("This option controls which file to use when attempting to create the credentials "
-      + "using the OAuth 2 webflow. After the OAuth2 webflow, the credentials will be stored "
-      + "within credentialDir.")
-  String getSecretsFile();
-  void setSecretsFile(String value);
-
-  /**
-   * This option controls which credential store to use when creating the credentials
-   * using the OAuth 2 webflow.
-   */
-  @Description("This option controls which credential store to use when creating the credentials "
-      + "using the OAuth 2 webflow.")
-  @Default.String("cloud_dataflow")
-  String getCredentialId();
-  void setCredentialId(String value);
-
-  /**
-   * Directory for storing dataflow credentials after execution of the OAuth 2 webflow. Defaults
-   * to using the $HOME/.store/data-flow directory.
-   */
-  @Description("Directory for storing dataflow credentials after execution of the OAuth 2 webflow. "
-      + "Defaults to using the $HOME/.store/data-flow directory.")
-  @Default.InstanceFactory(CredentialDirFactory.class)
-  String getCredentialDir();
-  void setCredentialDir(String value);
-
-  /**
-   * Returns the default credential directory of ${user.home}/.store/data-flow.
-   */
-  public static class CredentialDirFactory implements DefaultValueFactory<String> {
-    @Override
-    public String create(PipelineOptions options) {
-      File home = new File(System.getProperty("user.home"));
-      File store = new File(home, ".store");
-      File dataflow = new File(store, "data-flow");
-      return dataflow.getPath();
-    }
-  }
-
-  /**
-   * The class of the credential factory that should be created and used to create
-   * credentials. If gcpCredential has not been set explicitly, an instance of this class will
-   * be constructed and used as a credential factory.
-   */
-  @Description("The class of the credential factory that should be created and used to create "
-      + "credentials. If gcpCredential has not been set explicitly, an instance of this class will "
-      + "be constructed and used as a credential factory.")
-  @Default.Class(GcpCredentialFactory.class)
-  Class<? extends CredentialFactory> getCredentialFactoryClass();
-  void setCredentialFactoryClass(
-      Class<? extends CredentialFactory> credentialFactoryClass);
-
-  /**
-   * The credential instance that should be used to authenticate against GCP services.
-   * If no credential has been set explicitly, the default is to use the instance factory
-   * that constructs a credential based upon the currently set credentialFactoryClass.
-   */
-  @JsonIgnore
-  @Description("The credential instance that should be used to authenticate against GCP services. "
-      + "If no credential has been set explicitly, the default is to use the instance factory "
-      + "that constructs a credential based upon the currently set credentialFactoryClass.")
-  @Default.InstanceFactory(GcpUserCredentialsFactory.class)
-  @Hidden
-  Credential getGcpCredential();
-  void setGcpCredential(Credential value);
-
-  /**
-   * Attempts to infer the default project based upon the environment this application
-   * is executing within. Currently this only supports getting the default project from gcloud.
-   */
-  public static class DefaultProjectFactory implements DefaultValueFactory<String> {
-    private static final Logger LOG = LoggerFactory.getLogger(DefaultProjectFactory.class);
-
-    @Override
-    public String create(PipelineOptions options) {
-      try {
-        File configFile;
-        if (getEnvironment().containsKey("CLOUDSDK_CONFIG")) {
-          configFile = new File(getEnvironment().get("CLOUDSDK_CONFIG"), "properties");
-        } else if (isWindows() && getEnvironment().containsKey("APPDATA")) {
-          configFile = new File(getEnvironment().get("APPDATA"), "gcloud/properties");
-        } else {
-          // New versions of gcloud use this file
-          configFile = new File(
-              System.getProperty("user.home"),
-              ".config/gcloud/configurations/config_default");
-          if (!configFile.exists()) {
-            // Old versions of gcloud use this file
-            configFile = new File(System.getProperty("user.home"), ".config/gcloud/properties");
-          }
-        }
-        String section = null;
-        Pattern projectPattern = Pattern.compile("^project\\s*=\\s*(.*)$");
-        Pattern sectionPattern = Pattern.compile("^\\[(.*)\\]$");
-        for (String line : Files.readLines(configFile, StandardCharsets.UTF_8)) {
-          line = line.trim();
-          if (line.isEmpty() || line.startsWith(";")) {
-            continue;
-          }
-          Matcher matcher = sectionPattern.matcher(line);
-          if (matcher.matches()) {
-            section = matcher.group(1);
-          } else if (section == null || section.equals("core")) {
-            matcher = projectPattern.matcher(line);
-            if (matcher.matches()) {
-              String project = matcher.group(1).trim();
-              LOG.info("Inferred default GCP project '{}' from gcloud. If this is the incorrect "
-                  + "project, please cancel this Pipeline and specify the command-line "
-                  + "argument --project.", project);
-              return project;
-            }
-          }
-        }
-      } catch (IOException expected) {
-        LOG.debug("Failed to find default project.", expected);
-      }
-      // return null if can't determine
-      return null;
-    }
-
-    /**
-     * Returns true if running on the Windows OS.
-     */
-    private static boolean isWindows() {
-      return System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows");
-    }
-
-    /**
-     * Used to mock out getting environment variables.
-     */
-    @VisibleForTesting
-    Map<String, String> getEnvironment() {
-        return System.getenv();
-    }
-  }
-
-  /**
-   * Attempts to load the GCP credentials. See
-   * {@link CredentialFactory#getCredential()} for more details.
-   */
-  public static class GcpUserCredentialsFactory implements DefaultValueFactory<Credential> {
-    @Override
-    public Credential create(PipelineOptions options) {
-      GcpOptions gcpOptions = options.as(GcpOptions.class);
-      try {
-        CredentialFactory factory = InstanceBuilder.ofType(CredentialFactory.class)
-            .fromClass(gcpOptions.getCredentialFactoryClass())
-            .fromFactoryMethod("fromOptions")
-            .withArg(PipelineOptions.class, options)
-            .build();
-        return factory.getCredential();
-      } catch (IOException | GeneralSecurityException e) {
-        throw new RuntimeException("Unable to obtain credential", e);
-      }
-    }
-  }
-
-  /**
-   * The token server URL to use for OAuth 2 authentication. Normally, the default is sufficient,
-   * but some specialized use cases may want to override this value.
-   */
-  @Description("The token server URL to use for OAuth 2 authentication. Normally, the default "
-      + "is sufficient, but some specialized use cases may want to override this value.")
-  @Default.String(GoogleOAuthConstants.TOKEN_SERVER_URL)
-  @Hidden
-  String getTokenServerUrl();
-  void setTokenServerUrl(String value);
-
-  /**
-   * The authorization server URL to use for OAuth 2 authentication. Normally, the default is
-   * sufficient, but some specialized use cases may want to override this value.
-   */
-  @Description("The authorization server URL to use for OAuth 2 authentication. Normally, the "
-      + "default is sufficient, but some specialized use cases may want to override this value.")
-  @Default.String(GoogleOAuthConstants.AUTHORIZATION_SERVER_URL)
-  @Hidden
-  String getAuthorizationServerEncodedUrl();
-  void setAuthorizationServerEncodedUrl(String value);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
deleted file mode 100644
index d221807..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcsOptions.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.cloud.dataflow.sdk.util.AppEngineEnvironment;
-import com.google.cloud.dataflow.sdk.util.GcsUtil;
-import com.google.cloud.hadoop.util.AbstractGoogleAsyncWriteChannel;
-import com.google.common.util.concurrent.MoreExecutors;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Options used to configure Google Cloud Storage.
- */
-public interface GcsOptions extends
-    ApplicationNameOptions, GcpOptions, PipelineOptions {
-  /**
-   * The GcsUtil instance that should be used to communicate with Google Cloud Storage.
-   */
-  @JsonIgnore
-  @Description("The GcsUtil instance that should be used to communicate with Google Cloud Storage.")
-  @Default.InstanceFactory(GcsUtil.GcsUtilFactory.class)
-  @Hidden
-  GcsUtil getGcsUtil();
-  void setGcsUtil(GcsUtil value);
-
-  /**
-   * The ExecutorService instance to use to create threads, can be overridden to specify an
-   * ExecutorService that is compatible with the users environment. If unset, the
-   * default is to create an ExecutorService with an unbounded number of threads; this
-   * is compatible with Google AppEngine.
-   */
-  @JsonIgnore
-  @Description("The ExecutorService instance to use to create multiple threads. Can be overridden "
-      + "to specify an ExecutorService that is compatible with the users environment. If unset, "
-      + "the default is to create an ExecutorService with an unbounded number of threads; this "
-      + "is compatible with Google AppEngine.")
-  @Default.InstanceFactory(ExecutorServiceFactory.class)
-  @Hidden
-  ExecutorService getExecutorService();
-  void setExecutorService(ExecutorService value);
-
-  /**
-   * GCS endpoint to use. If unspecified, uses the default endpoint.
-   */
-  @JsonIgnore
-  @Hidden
-  @Description("The URL for the GCS API.")
-  String getGcsEndpoint();
-  void setGcsEndpoint(String value);
-
-  /**
-   * The buffer size (in bytes) to use when uploading files to GCS. Please see the documentation for
-   * {@link AbstractGoogleAsyncWriteChannel#setUploadBufferSize} for more information on the
-   * restrictions and performance implications of this value.
-   */
-  @Description("The buffer size (in bytes) to use when uploading files to GCS. Please see the "
-      + "documentation for AbstractGoogleAsyncWriteChannel.setUploadBufferSize for more "
-      + "information on the restrictions and performance implications of this value.\n\n"
-      + "https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/util/src/main/java/"
-      + "com/google/cloud/hadoop/util/AbstractGoogleAsyncWriteChannel.java")
-  Integer getGcsUploadBufferSizeBytes();
-  void setGcsUploadBufferSizeBytes(Integer bytes);
-
-  /**
-   * Returns the default {@link ExecutorService} to use within the Dataflow SDK. The
-   * {@link ExecutorService} is compatible with AppEngine.
-   */
-  public static class ExecutorServiceFactory implements DefaultValueFactory<ExecutorService> {
-    @SuppressWarnings("deprecation")  // IS_APP_ENGINE is deprecated for internal use only.
-    @Override
-    public ExecutorService create(PipelineOptions options) {
-      ThreadFactoryBuilder threadFactoryBuilder = new ThreadFactoryBuilder();
-      threadFactoryBuilder.setThreadFactory(MoreExecutors.platformThreadFactory());
-      if (!AppEngineEnvironment.IS_APP_ENGINE) {
-        // AppEngine doesn't allow modification of threads to be daemon threads.
-        threadFactoryBuilder.setDaemon(true);
-      }
-      /* The SDK requires an unbounded thread pool because a step may create X writers
-       * each requiring their own thread to perform the writes otherwise a writer may
-       * block causing deadlock for the step because the writers buffer is full.
-       * Also, the MapTaskExecutor launches the steps in reverse order and completes
-       * them in forward order thus requiring enough threads so that each step's writers
-       * can be active.
-       */
-      return new ThreadPoolExecutor(
-          0, Integer.MAX_VALUE, // Allow an unlimited number of re-usable threads.
-          Long.MAX_VALUE, TimeUnit.NANOSECONDS, // Keep non-core threads alive forever.
-          new SynchronousQueue<Runnable>(),
-          threadFactoryBuilder.build());
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
deleted file mode 100644
index eff679b..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GoogleApiDebugOptions.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.api.client.googleapis.services.AbstractGoogleClient;
-import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
-import com.google.api.client.googleapis.services.GoogleClientRequestInitializer;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * These options configure debug settings for Google API clients created within the Dataflow SDK.
- */
-public interface GoogleApiDebugOptions extends PipelineOptions {
-  /**
-   * This option enables tracing of API calls to Google services used within the
-   * Dataflow SDK. Values are expected in JSON format <code>{"ApiName":"TraceDestination",...}
-   * </code> where the {@code ApiName} represents the request classes canonical name. The
-   * {@code TraceDestination} is a logical trace consumer to whom the trace will be reported.
-   * Typically, "producer" is the right destination to use: this makes API traces available to the
-   * team offering the API. Note that by enabling this option, the contents of the requests to and
-   * from Google Cloud services will be made available to Google. For example, by specifying
-   * <code>{"Dataflow":"producer"}</code>, all calls to the Dataflow service will be made available
-   * to Google, specifically to the Google Cloud Dataflow team.
-   */
-  @Description("This option enables tracing of API calls to Google services used within the "
-      + "Dataflow SDK. Values are expected in JSON format {\"ApiName\":\"TraceDestination\",...} "
-      + "where the ApiName represents the request classes canonical name. The TraceDestination is "
-      + "a logical trace consumer to whom the trace will be reported. Typically, \"producer\" is "
-      + "the right destination to use: this makes API traces available to the team offering the "
-      + "API. Note that by enabling this option, the contents of the requests to and from "
-      + "Google Cloud services will be made available to Google. For example, by specifying "
-      + "{\"Dataflow\":\"producer\"}, all calls to the Dataflow service will be made available to "
-      + "Google, specifically to the Google Cloud Dataflow team.")
-  GoogleApiTracer getGoogleApiTrace();
-  void setGoogleApiTrace(GoogleApiTracer commands);
-
-  /**
-   * A {@link GoogleClientRequestInitializer} that adds the trace destination to Google API calls.
-   */
-  public static class GoogleApiTracer extends HashMap<String, String>
-      implements GoogleClientRequestInitializer {
-    /**
-     * Creates a {@link GoogleApiTracer} that sets the trace destination on all
-     * calls that match the given client type.
-     */
-    public GoogleApiTracer addTraceFor(AbstractGoogleClient client, String traceDestination) {
-      put(client.getClass().getCanonicalName(), traceDestination);
-      return this;
-    }
-
-    /**
-     * Creates a {@link GoogleApiTracer} that sets the trace {@code traceDestination} on all
-     * calls that match for the given request type.
-     */
-    public GoogleApiTracer addTraceFor(
-        AbstractGoogleClientRequest<?> request, String traceDestination) {
-      put(request.getClass().getCanonicalName(), traceDestination);
-      return this;
-    }
-
-    @Override
-    public void initialize(AbstractGoogleClientRequest<?> request) throws IOException {
-      for (Map.Entry<String, String> entry : this.entrySet()) {
-        if (request.getClass().getCanonicalName().contains(entry.getKey())) {
-          request.set("$trace", entry.getValue());
-        }
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
deleted file mode 100644
index 6a487eb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/Hidden.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import java.lang.annotation.Documented;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-/**
- * Methods and/or interfaces annotated with {@code @Hidden} will be suppressed from
- * being output when {@code --help} is specified on the command-line.
- */
-@Target({ElementType.METHOD, ElementType.TYPE})
-@Retention(RetentionPolicy.RUNTIME)
-@Documented
-public @interface Hidden {
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
deleted file mode 100644
index 8ff1fa9..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.options;
-
-import com.google.auto.service.AutoService;
-import com.google.cloud.dataflow.sdk.Pipeline;
-import com.google.cloud.dataflow.sdk.options.GoogleApiDebugOptions.GoogleApiTracer;
-import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Deserializer;
-import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Serializer;
-import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
-import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.DoFn.Context;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
-import com.fasterxml.jackson.databind.annotation.JsonSerialize;
-
-import java.lang.reflect.Proxy;
-import java.util.ServiceLoader;
-
-import javax.annotation.concurrent.ThreadSafe;
-
-/**
- * PipelineOptions are used to configure Pipelines. You can extend {@link PipelineOptions}
- * to create custom configuration options specific to your {@link Pipeline},
- * for both local execution and execution via a {@link PipelineRunner}.
- *
- * <p>{@link PipelineOptions} and their subinterfaces represent a collection of properties
- * which can be manipulated in a type safe manner. {@link PipelineOptions} is backed by a
- * dynamic {@link Proxy} which allows for type safe manipulation of properties in an extensible
- * fashion through plain old Java interfaces.
- *
- * <p>{@link PipelineOptions} can be created with {@link PipelineOptionsFactory#create()}
- * and {@link PipelineOptionsFactory#as(Class)}. They can be created
- * from command-line arguments with {@link PipelineOptionsFactory#fromArgs(String[])}.
- * They can be converted to another type by invoking {@link PipelineOptions#as(Class)} and
- * can be accessed from within a {@link DoFn} by invoking
- * {@link Context#getPipelineOptions()}.
- *
- * <p>For example:
- * <pre>{@code
- * // The most common way to construct PipelineOptions is via command-line argument parsing:
- * public static void main(String[] args) {
- *   // Will parse the arguments passed into the application and construct a PipelineOptions
- *   // Note that --help will print registered options, and --help=PipelineOptionsClassName
- *   // will print out usage for the specific class.
- *   PipelineOptions options =
- *       PipelineOptionsFactory.fromArgs(args).create();
- *
- *   Pipeline p = Pipeline.create(options);
- *   ...
- *   p.run();
- * }
- *
- * // To create options for the DirectPipeline:
- * DirectPipelineOptions directPipelineOptions =
- *     PipelineOptionsFactory.as(DirectPipelineOptions.class);
- * directPipelineOptions.setStreaming(true);
- *
- * // To cast from one type to another using the as(Class) method:
- * DataflowPipelineOptions dataflowPipelineOptions =
- *     directPipelineOptions.as(DataflowPipelineOptions.class);
- *
- * // Options for the same property are shared between types
- * // The statement below will print out "true"
- * System.out.println(dataflowPipelineOptions.isStreaming());
- *
- * // Prints out registered options.
- * PipelineOptionsFactory.printHelp(System.out);
- *
- * // Prints out options which are available to be set on DataflowPipelineOptions
- * PipelineOptionsFactory.printHelp(System.out, DataflowPipelineOptions.class);
- * }</pre>
- *
- * <h2>Defining Your Own PipelineOptions</h2>
- *
- * Defining your own {@link PipelineOptions} is the way for you to make configuration
- * options available for both local execution and execution via a {@link PipelineRunner}.
- * By having PipelineOptionsFactory as your command-line interpreter, you will provide
- * a standardized way for users to interact with your application via the command-line.
- *
- * <p>To define your own {@link PipelineOptions}, you create an interface which
- * extends {@link PipelineOptions} and define getter/setter pairs. These
- * getter/setter pairs define a collection of
- * <a href="https://docs.oracle.com/javase/tutorial/javabeans/writing/properties.html">
- * JavaBean properties</a>.
- *
- * <p>For example:
- * <pre>{@code
- *  // Creates a user defined property called "myProperty"
- *  public interface MyOptions extends PipelineOptions {
- *    String getMyProperty();
- *    void setMyProperty(String value);
- *  }
- * }</pre>
- *
- * <p>Note: Please see the section on Registration below when using custom property types.
- *
- * <h3>Restrictions</h3>
- *
- * Since PipelineOptions can be "cast" to multiple types dynamically using
- * {@link PipelineOptions#as(Class)}, a property must conform to the following set of restrictions:
- * <ul>
- *   <li>Any property with the same name must have the same return type for all derived
- *       interfaces of {@link PipelineOptions}.
- *   <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
- *       getter and setter method.
- *   <li>Every method must conform to being a getter or setter for a JavaBean.
- *   <li>The derived interface of {@link PipelineOptions} must be composable with every interface
- *       part registered with the PipelineOptionsFactory.
- *   <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
- *   <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
- *       this property must be annotated with {@link JsonIgnore @JsonIgnore}.
- * </ul>
- *
- * <h3>Annotations For PipelineOptions</h3>
- *
- * {@link Description @Description} can be used to annotate an interface or a getter
- * with useful information which is output when {@code --help}
- * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
- *
- * <p>{@link Default @Default} represents a set of annotations that can be used to annotate getter
- * properties on {@link PipelineOptions} with information representing the default value to be
- * returned if no value is specified. Any default implementation (using the {@code default} keyword)
- * is ignored.
- *
- * <p>{@link Hidden @Hidden} hides an option from being listed when {@code --help}
- * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
- *
- * <p>{@link Validation @Validation} represents a set of annotations that can be used to annotate
- * getter properties on {@link PipelineOptions} with information representing the validation
- * criteria to be used when validating with the {@link PipelineOptionsValidator}. Validation
- * will be performed if during construction of the {@link PipelineOptions},
- * {@link PipelineOptionsFactory#withValidation()} is invoked.
- *
- * <p>{@link JsonIgnore @JsonIgnore} is used to prevent a property from being serialized and
- * available during execution of {@link DoFn}. See the Serialization section below for more
- * details.
- *
- * <h2>Registration Of PipelineOptions</h2>
- *
- * Registration of {@link PipelineOptions} by an application guarantees that the
- * {@link PipelineOptions} is composable during execution of their {@link Pipeline} and
- * meets the restrictions listed above or will fail during registration. Registration
- * also lists the registered {@link PipelineOptions} when {@code --help}
- * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
- *
- * <p>Registration can be performed by invoking {@link PipelineOptionsFactory#register} within
- * a users application or via automatic registration by creating a {@link ServiceLoader} entry
- * and a concrete implementation of the {@link PipelineOptionsRegistrar} interface.
- *
- * <p>It is optional but recommended to use one of the many build time tools such as
- * {@link AutoService} to generate the necessary META-INF files automatically.
- *
- * <p>A list of registered options can be fetched from
- * {@link PipelineOptionsFactory#getRegisteredOptions()}.
- *
- * <h2>Serialization Of PipelineOptions</h2>
- *
- * {@link PipelineRunner}s require support for options to be serialized. Each property
- * within {@link PipelineOptions} must be able to be serialized using Jackson's
- * {@link ObjectMapper} or the getter method for the property annotated with
- * {@link JsonIgnore @JsonIgnore}.
- *
- * <p>Jackson supports serialization of many types and supports a useful set of
- * <a href="https://github.com/FasterXML/jackson-annotations">annotations</a> to aid in
- * serialization of custom types. We point you to the public
- * <a href="https://github.com/FasterXML/jackson">Jackson documentation</a> when attempting
- * to add serialization support for your custom types. See {@link GoogleApiTracer} for an
- * example using the Jackson annotations to serialize and deserialize a custom type.
- *
- * <p>Note: It is an error to have the same property available in multiple interfaces with only
- * some of them being annotated with {@link JsonIgnore @JsonIgnore}. It is also an error to mark a
- * setter for a property with {@link JsonIgnore @JsonIgnore}.
- */
-@JsonSerialize(using = Serializer.class)
-@JsonDeserialize(using = Deserializer.class)
-@ThreadSafe
-public interface PipelineOptions {
-  /**
-   * Transforms this object into an object of type {@code <T>} saving each property
-   * that has been manipulated. {@code <T>} must extend {@link PipelineOptions}.
-   *
-   * <p>If {@code <T>} is not registered with the {@link PipelineOptionsFactory}, then we
-   * attempt to verify that {@code <T>} is composable with every interface that this
-   * instance of the {@code PipelineOptions} has seen.
-   *
-   * @param kls The class of the type to transform to.
-   * @return An object of type kls.
-   */
-  <T extends PipelineOptions> T as(Class<T> kls);
-
-  /**
-   * Makes a deep clone of this object, and transforms the cloned object into the specified
-   * type {@code kls}. See {@link #as} for more information about the conversion.
-   *
-   * <p>Properties that are marked with {@code @JsonIgnore} will not be cloned.
-   */
-  <T extends PipelineOptions> T cloneAs(Class<T> kls);
-
-  /**
-   * The pipeline runner that will be used to execute the pipeline.
-   * For registered runners, the class name can be specified, otherwise the fully
-   * qualified name needs to be specified.
-   */
-  @Validation.Required
-  @Description("The pipeline runner that will be used to execute the pipeline. "
-      + "For registered runners, the class name can be specified, otherwise the fully "
-      + "qualified name needs to be specified.")
-  @Default.Class(DirectPipelineRunner.class)
-  Class<? extends PipelineRunner<?>> getRunner();
-  void setRunner(Class<? extends PipelineRunner<?>> kls);
-
-  /**
-   * Enumeration of the possible states for a given check.
-   */
-  public static enum CheckEnabled {
-    OFF,
-    WARNING,
-    ERROR;
-  }
-
-  /**
-   * Whether to check for stable unique names on each transform. This is necessary to
-   * support updating of pipelines.
-   */
-  @Validation.Required
-  @Description("Whether to check for stable unique names on each transform. This is necessary to "
-      + "support updating of pipelines.")
-  @Default.Enum("WARNING")
-  CheckEnabled getStableUniqueNames();
-  void setStableUniqueNames(CheckEnabled enabled);
-}

[65/67] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
new file mode 100644
index 0000000..525de69
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/ExamplePubsubTopicOptions.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.DefaultValueFactory;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+
+/**
+ * Options that can be used to configure Pub/Sub topic in Dataflow examples.
+ */
+public interface ExamplePubsubTopicOptions extends DataflowPipelineOptions {
+  @Description("Pub/Sub topic")
+  @Default.InstanceFactory(PubsubTopicFactory.class)
+  String getPubsubTopic();
+  void setPubsubTopic(String topic);
+
+  @Description("Number of workers to use when executing the injector pipeline")
+  @Default.Integer(1)
+  int getInjectorNumWorkers();
+  void setInjectorNumWorkers(int numWorkers);
+
+  /**
+   * Returns a default Pub/Sub topic based on the project and the job names.
+   */
+  static class PubsubTopicFactory implements DefaultValueFactory<String> {
+    @Override
+    public String create(PipelineOptions options) {
+      DataflowPipelineOptions dataflowPipelineOptions =
+          options.as(DataflowPipelineOptions.class);
+      return "projects/" + dataflowPipelineOptions.getProject()
+          + "/topics/" + dataflowPipelineOptions.getJobName();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
new file mode 100644
index 0000000..f6f80ae
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/common/PubsubFileInjector.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package}.common;
+
+import com.google.api.services.pubsub.Pubsub;
+import com.google.api.services.pubsub.model.PublishRequest;
+import com.google.api.services.pubsub.model.PubsubMessage;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+import com.google.cloud.dataflow.sdk.options.Description;
+import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.options.Validation;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.IntraBundleParallelization;
+import com.google.cloud.dataflow.sdk.util.Transport;
+import com.google.common.collect.ImmutableMap;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * A batch Dataflow pipeline for injecting a set of GCS files into
+ * a PubSub topic line by line. Empty lines are skipped.
+ *
+ * <p>This is useful for testing streaming
+ * pipelines. Note that since batch pipelines might retry chunks, this
+ * does _not_ guarantee exactly-once injection of file data. Some lines may
+ * be published multiple times.
+ * </p>
+ */
+public class PubsubFileInjector {
+
+  /**
+   * An incomplete {@code PubsubFileInjector} transform with unbound output topic.
+   */
+  public static class Unbound {
+    private final String timestampLabelKey;
+
+    Unbound() {
+      this.timestampLabelKey = null;
+    }
+
+    Unbound(String timestampLabelKey) {
+      this.timestampLabelKey = timestampLabelKey;
+    }
+
+    Unbound withTimestampLabelKey(String timestampLabelKey) {
+      return new Unbound(timestampLabelKey);
+    }
+
+    public Bound publish(String outputTopic) {
+      return new Bound(outputTopic, timestampLabelKey);
+    }
+  }
+
+  /** A DoFn that publishes non-empty lines to Google Cloud PubSub. */
+  public static class Bound extends DoFn<String, Void> {
+    private final String outputTopic;
+    private final String timestampLabelKey;
+    public transient Pubsub pubsub;
+
+    public Bound(String outputTopic, String timestampLabelKey) {
+      this.outputTopic = outputTopic;
+      this.timestampLabelKey = timestampLabelKey;
+    }
+
+    @Override
+    public void startBundle(Context context) {
+      this.pubsub =
+          Transport.newPubsubClient(context.getPipelineOptions().as(DataflowPipelineOptions.class))
+              .build();
+    }
+
+    @Override
+    public void processElement(ProcessContext c) throws IOException {
+      if (c.element().isEmpty()) {
+        return;
+      }
+      PubsubMessage pubsubMessage = new PubsubMessage();
+      pubsubMessage.encodeData(c.element().getBytes());
+      if (timestampLabelKey != null) {
+        pubsubMessage.setAttributes(
+            ImmutableMap.of(timestampLabelKey, Long.toString(c.timestamp().getMillis())));
+      }
+      PublishRequest publishRequest = new PublishRequest();
+      publishRequest.setMessages(Arrays.asList(pubsubMessage));
+      this.pubsub.projects().topics().publish(outputTopic, publishRequest).execute();
+    }
+  }
+
+  /**
+   * Creates a {@code PubsubFileInjector} transform with the given timestamp label key.
+   */
+  public static Unbound withTimestampLabelKey(String timestampLabelKey) {
+    return new Unbound(timestampLabelKey);
+  }
+
+  /**
+   * Creates a {@code PubsubFileInjector} transform that publishes to the given output topic.
+   */
+  public static Bound publish(String outputTopic) {
+    return new Unbound().publish(outputTopic);
+  }
+
+  /**
+   * Command line parameter options.
+   */
+  private interface PubsubFileInjectorOptions extends PipelineOptions {
+    @Description("GCS location of files.")
+    @Validation.Required
+    String getInput();
+    void setInput(String value);
+
+    @Description("Topic to publish on.")
+    @Validation.Required
+    String getOutputTopic();
+    void setOutputTopic(String value);
+  }
+
+  /**
+   * Sets up and starts streaming pipeline.
+   */
+  public static void main(String[] args) {
+    PubsubFileInjectorOptions options = PipelineOptionsFactory.fromArgs(args)
+        .withValidation()
+        .as(PubsubFileInjectorOptions.class);
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    pipeline
+        .apply(TextIO.Read.from(options.getInput()))
+        .apply(IntraBundleParallelization.of(PubsubFileInjector.publish(options.getOutputTopic()))
+            .withMaxParallelism(20));
+
+    pipeline.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
new file mode 100644
index 0000000..7a9aa4c
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/DebuggingWordCountTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.common.io.Files;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Tests for {@link DebuggingWordCount}.
+ */
+@RunWith(JUnit4.class)
+public class DebuggingWordCountTest {
+  @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
+
+  @Test
+  public void testDebuggingWordCount() throws Exception {
+    File file = tmpFolder.newFile();
+    Files.write("stomach secret Flourish message Flourish here Flourish", file,
+        StandardCharsets.UTF_8);
+    DebuggingWordCount.main(new String[]{"--inputFile=" + file.getAbsolutePath()});
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
new file mode 100644
index 0000000..45555ce
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/test/java/WordCountTest.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import ${package}.WordCount.CountWords;
+import ${package}.WordCount.ExtractWordsFn;
+import ${package}.WordCount.FormatAsTextFn;
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder;
+import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
+import com.google.cloud.dataflow.sdk.testing.RunnableOnService;
+import com.google.cloud.dataflow.sdk.testing.TestPipeline;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+import com.google.cloud.dataflow.sdk.values.PCollection;
+
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests of WordCount.
+ */
+@RunWith(JUnit4.class)
+public class WordCountTest {
+
+  /** Example test that tests a specific DoFn. */
+  @Test
+  public void testExtractWordsFn() {
+    DoFnTester<String, String> extractWordsFn =
+        DoFnTester.of(new ExtractWordsFn());
+
+    Assert.assertThat(extractWordsFn.processBatch(" some  input  words "),
+                      CoreMatchers.hasItems("some", "input", "words"));
+    Assert.assertThat(extractWordsFn.processBatch(" "),
+                      CoreMatchers.<String>hasItems());
+    Assert.assertThat(extractWordsFn.processBatch(" some ", " input", " words"),
+                      CoreMatchers.hasItems("some", "input", "words"));
+  }
+
+  static final String[] WORDS_ARRAY = new String[] {
+    "hi there", "hi", "hi sue bob",
+    "hi sue", "", "bob hi"};
+
+  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
+
+  static final String[] COUNTS_ARRAY = new String[] {
+      "hi: 5", "there: 1", "sue: 2", "bob: 2"};
+
+  /** Example test that tests a PTransform by using an in-memory input and inspecting the output. */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testCountWords() throws Exception {
+    Pipeline p = TestPipeline.create();
+
+    PCollection<String> input = p.apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()));
+
+    PCollection<String> output = input.apply(new CountWords())
+      .apply(ParDo.of(new FormatAsTextFn()));
+
+    DataflowAssert.that(output).containsInAnyOrder(COUNTS_ARRAY);
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties b/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
new file mode 100644
index 0000000..c59e77a
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/archetype.properties
@@ -0,0 +1,5 @@
+package=it.pkg
+version=0.1-SNAPSHOT
+groupId=archetype.it
+artifactId=basic
+targetPlatform=1.7

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt b/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
new file mode 100644
index 0000000..0b59873
--- /dev/null
+++ b/sdks/java/maven-archetypes/examples/src/test/resources/projects/basic/goal.txt
@@ -0,0 +1 @@
+verify

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/pom.xml b/sdks/java/maven-archetypes/pom.xml
new file mode 100644
index 0000000..59efe50
--- /dev/null
+++ b/sdks/java/maven-archetypes/pom.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>parent</artifactId>
+    <version>0.1.0-incubating-SNAPSHOT</version>
+    <relativePath>../../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>maven-archetypes-parent</artifactId>
+  <packaging>pom</packaging>
+
+  <name>Apache Beam :: Maven Archetypes</name>
+
+  <modules>
+    <module>starter</module>
+    <module>examples</module>
+  </modules>
+
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/pom.xml b/sdks/java/maven-archetypes/starter/pom.xml
new file mode 100644
index 0000000..933e8b1
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/pom.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>maven-archetypes-parent</artifactId>
+    <version>0.1.0-incubating-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.beam</groupId>
+  <artifactId>maven-archetypes-starter</artifactId>
+  <name>Apache Beam :: Maven Archetypes :: Starter</name>
+  <description>A Maven archetype to create a simple starter pipeline to
+    get started using the Apache Beam Java SDK. </description>
+
+  <packaging>maven-archetype</packaging>
+
+  <build>
+    <extensions>
+      <extension>
+        <groupId>org.apache.maven.archetype</groupId>
+        <artifactId>archetype-packaging</artifactId>
+        <version>2.4</version>
+      </extension>
+    </extensions>
+
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-archetype-plugin</artifactId>
+          <version>2.4</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
new file mode 100644
index 0000000..bf75798
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<archetype-descriptor
+    xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
+    name="Google Cloud Dataflow Starter Pipeline Archetype"
+    xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <requiredProperties>
+    <requiredProperty key="targetPlatform">
+      <defaultValue>1.7</defaultValue>
+    </requiredProperty>
+  </requiredProperties>
+
+  <fileSets>
+    <fileSet filtered="true" packaged="true" encoding="UTF-8">
+      <directory>src/main/java</directory>
+      <includes>
+        <include>**/*.java</include>
+      </includes>
+    </fileSet>
+  </fileSets>
+</archetype-descriptor>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
new file mode 100644
index 0000000..19e7d2d
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/pom.xml
@@ -0,0 +1,43 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>${groupId}</groupId>
+  <artifactId>${artifactId}</artifactId>
+  <version>${version}</version>
+
+  <build>
+   <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>${targetPlatform}</source>
+          <target>${targetPlatform}</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-sdk-all</artifactId>
+      <version>[0-incubating, 1-incubating)</version>
+    </dependency>
+
+    <!-- slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
new file mode 100644
index 0000000..ffabbc0
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/src/main/resources/archetype-resources/src/main/java/StarterPipeline.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package ${package};
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A starter example for writing Google Cloud Dataflow programs.
+ *
+ * <p>The example takes two strings, converts them to their upper-case
+ * representation and logs them.
+ *
+ * <p>To run this starter example locally using DirectPipelineRunner, just
+ * execute it without any additional parameters from your favorite development
+ * environment.
+ *
+ * <p>To run this starter example using managed resource in Google Cloud
+ * Platform, you should specify the following command-line options:
+ *   --project=<YOUR_PROJECT_ID>
+ *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
+ *   --runner=BlockingDataflowPipelineRunner
+ */
+public class StarterPipeline {
+  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
+
+  public static void main(String[] args) {
+    Pipeline p = Pipeline.create(
+        PipelineOptionsFactory.fromArgs(args).withValidation().create());
+
+    p.apply(Create.of("Hello", "World"))
+    .apply(ParDo.of(new DoFn<String, String>() {
+      @Override
+      public void processElement(ProcessContext c) {
+        c.output(c.element().toUpperCase());
+      }
+    }))
+    .apply(ParDo.of(new DoFn<String, Void>() {
+      @Override
+      public void processElement(ProcessContext c)  {
+        LOG.info(c.element());
+      }
+    }));
+
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
new file mode 100644
index 0000000..c59e77a
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/archetype.properties
@@ -0,0 +1,5 @@
+package=it.pkg
+version=0.1-SNAPSHOT
+groupId=archetype.it
+artifactId=basic
+targetPlatform=1.7

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
new file mode 100644
index 0000000..0b59873
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/goal.txt
@@ -0,0 +1 @@
+verify

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
new file mode 100644
index 0000000..d29424a
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
@@ -0,0 +1,43 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <groupId>archetype.it</groupId>
+  <artifactId>basic</artifactId>
+  <version>0.1-SNAPSHOT</version>
+
+  <build>
+   <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>1.7</source>
+          <target>1.7</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-sdk-all</artifactId>
+      <version>[0-incubating, 1-incubating)</version>
+    </dependency>
+
+    <!-- slf4j API frontend binding with JUL backend -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jdk14</artifactId>
+      <version>1.7.7</version>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/257a7a6b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
new file mode 100644
index 0000000..2e7c4e1
--- /dev/null
+++ b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/src/main/java/it/pkg/StarterPipeline.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package it.pkg;
+
+import com.google.cloud.dataflow.sdk.Pipeline;
+import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+import com.google.cloud.dataflow.sdk.transforms.Create;
+import com.google.cloud.dataflow.sdk.transforms.DoFn;
+import com.google.cloud.dataflow.sdk.transforms.ParDo;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A starter example for writing Google Cloud Dataflow programs.
+ *
+ * <p>The example takes two strings, converts them to their upper-case
+ * representation and logs them.
+ *
+ * <p>To run this starter example locally using DirectPipelineRunner, just
+ * execute it without any additional parameters from your favorite development
+ * environment.
+ *
+ * <p>To run this starter example using managed resource in Google Cloud
+ * Platform, you should specify the following command-line options:
+ *   --project=<YOUR_PROJECT_ID>
+ *   --stagingLocation=<STAGING_LOCATION_IN_CLOUD_STORAGE>
+ *   --runner=BlockingDataflowPipelineRunner
+ */
+public class StarterPipeline {
+  private static final Logger LOG = LoggerFactory.getLogger(StarterPipeline.class);
+
+  public static void main(String[] args) {
+    Pipeline p = Pipeline.create(
+        PipelineOptionsFactory.fromArgs(args).withValidation().create());
+
+    p.apply(Create.of("Hello", "World"))
+    .apply(ParDo.of(new DoFn<String, String>() {
+      @Override
+      public void processElement(ProcessContext c) {
+        c.output(c.element().toUpperCase());
+      }
+    }))
+    .apply(ParDo.of(new DoFn<String, Void>() {
+      @Override
+      public void processElement(ProcessContext c)  {
+        LOG.info(c.element());
+      }
+    }));
+
+    p.run();
+  }
+}

[25/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
deleted file mode 100644
index b8d20e3..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/transforms/Combine.java
+++ /dev/null
@@ -1,2240 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.transforms;
-
-import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
-import com.google.cloud.dataflow.sdk.coders.CustomCoder;
-import com.google.cloud.dataflow.sdk.coders.DelegateCoder;
-import com.google.cloud.dataflow.sdk.coders.IterableCoder;
-import com.google.cloud.dataflow.sdk.coders.KvCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
-import com.google.cloud.dataflow.sdk.coders.VoidCoder;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.AbstractGlobalCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.AbstractPerKeyCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.GlobalCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineFnBase.PerKeyCombineFn;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.CombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.Context;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.KeyedCombineFnWithContext;
-import com.google.cloud.dataflow.sdk.transforms.CombineWithContext.RequiresContextInternal;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
-import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunner;
-import com.google.cloud.dataflow.sdk.util.PerKeyCombineFnRunners;
-import com.google.cloud.dataflow.sdk.util.PropertyNames;
-import com.google.cloud.dataflow.sdk.util.SerializableUtils;
-import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
-import com.google.cloud.dataflow.sdk.util.common.Counter;
-import com.google.cloud.dataflow.sdk.values.KV;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.cloud.dataflow.sdk.values.PCollectionList;
-import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-import com.google.cloud.dataflow.sdk.values.TupleTagList;
-import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.concurrent.ThreadLocalRandom;
-
-/**
- * {@code PTransform}s for combining {@code PCollection} elements
- * globally and per-key.
- *
- * <p>See the <a href="https://cloud.google.com/dataflow/model/combine">documentation</a>
- * for how to use the operations in this class.
- */
-public class Combine {
-  private Combine() {
-    // do not instantiate
-  }
-
-  /**
-   * Returns a {@link Globally Combine.Globally} {@code PTransform}
-   * that uses the given {@code SerializableFunction} to combine all
-   * the elements in each window of the input {@code PCollection} into a
-   * single value in the output {@code PCollection}.  The types of the input
-   * elements and the output elements must be the same.
-   *
-   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * a default value in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
-   * must be called.
-   *
-   * <p>See {@link Globally Combine.Globally} for more information.
-   */
-  public static <V> Globally<V, V> globally(
-      SerializableFunction<Iterable<V>, V> combiner) {
-    return globally(IterableCombineFn.of(combiner));
-  }
-
-  /**
-   * Returns a {@link Globally Combine.Globally} {@code PTransform}
-   * that uses the given {@code GloballyCombineFn} to combine all
-   * the elements in each window of the input {@code PCollection} into a
-   * single value in the output {@code PCollection}.  The types of the input
-   * elements and the output elements can differ.
-   *
-   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * a default value in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link Globally#withoutDefaults} or {@link Globally#asSingletonView}
-   * must be called.
-   *
-   * <p>See {@link Globally Combine.Globally} for more information.
-   */
-  public static <InputT, OutputT> Globally<InputT, OutputT> globally(
-      GlobalCombineFn<? super InputT, ?, OutputT> fn) {
-    return new Globally<>(fn, true, 0);
-  }
-
-  /**
-   * Returns a {@link PerKey Combine.PerKey} {@code PTransform} that
-   * first groups its input {@code PCollection} of {@code KV}s by keys and
-   * windows, then invokes the given function on each of the values lists to
-   * produce a combined value, and then returns a {@code PCollection}
-   * of {@code KV}s mapping each distinct key to its combined value for each
-   * window.
-   *
-   * <p>Each output element is in the window by which its corresponding input
-   * was grouped, and has the timestamp of the end of that window.  The output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * as the input.
-   *
-   * <p>See {@link PerKey Combine.PerKey} for more information.
-   */
-  public static <K, V> PerKey<K, V, V> perKey(
-      SerializableFunction<Iterable<V>, V> fn) {
-    return perKey(Combine.IterableCombineFn.of(fn));
-  }
-
-  /**
-   * Returns a {@link PerKey Combine.PerKey} {@code PTransform} that
-   * first groups its input {@code PCollection} of {@code KV}s by keys and
-   * windows, then invokes the given function on each of the values lists to
-   * produce a combined value, and then returns a {@code PCollection}
-   * of {@code KV}s mapping each distinct key to its combined value for each
-   * window.
-   *
-   * <p>Each output element is in the window by which its corresponding input
-   * was grouped, and has the timestamp of the end of that window.  The output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * as the input.
-   *
-   * <p>See {@link PerKey Combine.PerKey} for more information.
-   */
-  public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
-      GlobalCombineFn<? super InputT, ?, OutputT> fn) {
-    return perKey(fn.<K>asKeyedFn());
-  }
-
-  /**
-   * Returns a {@link PerKey Combine.PerKey} {@code PTransform} that
-   * first groups its input {@code PCollection} of {@code KV}s by keys and
-   * windows, then invokes the given function on each of the key/values-lists
-   * pairs to produce a combined value, and then returns a
-   * {@code PCollection} of {@code KV}s mapping each distinct key to
-   * its combined value for each window.
-   *
-   * <p>Each output element is in the window by which its corresponding input
-   * was grouped, and has the timestamp of the end of that window.  The output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * as the input.
-   *
-   * <p>See {@link PerKey Combine.PerKey} for more information.
-   */
-  public static <K, InputT, OutputT> PerKey<K, InputT, OutputT> perKey(
-      PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-    return new PerKey<>(fn, false /*fewKeys*/);
-  }
-
-  /**
-   * Returns a {@link PerKey Combine.PerKey}, and set fewKeys
-   * in {@link GroupByKey}.
-   */
-  private static <K, InputT, OutputT> PerKey<K, InputT, OutputT> fewKeys(
-      PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-    return new PerKey<>(fn, true /*fewKeys*/);
-  }
-
-  /**
-   * Returns a {@link GroupedValues Combine.GroupedValues}
-   * {@code PTransform} that takes a {@code PCollection} of
-   * {@code KV}s where a key maps to an {@code Iterable} of values, e.g.,
-   * the result of a {@code GroupByKey}, then uses the given
-   * {@code SerializableFunction} to combine all the values associated
-   * with a key, ignoring the key.  The type of the input and
-   * output values must be the same.
-   *
-   * <p>Each output element has the same timestamp and is in the same window
-   * as its corresponding input element, and the output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * associated with it as the input.
-   *
-   * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
-   *
-   * <p>Note that {@link #perKey(SerializableFunction)} is typically
-   * more convenient to use than {@link GroupByKey} followed by
-   * {@code groupedValues(...)}.
-   */
-  public static <K, V> GroupedValues<K, V, V> groupedValues(
-      SerializableFunction<Iterable<V>, V> fn) {
-    return groupedValues(IterableCombineFn.of(fn));
-  }
-
-  /**
-   * Returns a {@link GroupedValues Combine.GroupedValues}
-   * {@code PTransform} that takes a {@code PCollection} of
-   * {@code KV}s where a key maps to an {@code Iterable} of values, e.g.,
-   * the result of a {@code GroupByKey}, then uses the given
-   * {@code CombineFn} to combine all the values associated with a
-   * key, ignoring the key.  The types of the input and output values
-   * can differ.
-   *
-   * <p>Each output element has the same timestamp and is in the same window
-   * as its corresponding input element, and the output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * associated with it as the input.
-   *
-   * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
-   *
-   * <p>Note that {@link #perKey(CombineFnBase.GlobalCombineFn)} is typically
-   * more convenient to use than {@link GroupByKey} followed by
-   * {@code groupedValues(...)}.
-   */
-  public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValues(
-      GlobalCombineFn<? super InputT, ?, OutputT> fn) {
-    return groupedValues(fn.<K>asKeyedFn());
-  }
-
-  /**
-   * Returns a {@link GroupedValues Combine.GroupedValues}
-   * {@code PTransform} that takes a {@code PCollection} of
-   * {@code KV}s where a key maps to an {@code Iterable} of values, e.g.,
-   * the result of a {@code GroupByKey}, then uses the given
-   * {@code KeyedCombineFn} to combine all the values associated with
-   * each key.  The combining function is provided the key.  The types
-   * of the input and output values can differ.
-   *
-   * <p>Each output element has the same timestamp and is in the same window
-   * as its corresponding input element, and the output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * associated with it as the input.
-   *
-   * <p>See {@link GroupedValues Combine.GroupedValues} for more information.
-   *
-   * <p>Note that {@link #perKey(CombineFnBase.PerKeyCombineFn)} is typically
-   * more convenient to use than {@link GroupByKey} followed by
-   * {@code groupedValues(...)}.
-   */
-  public static <K, InputT, OutputT> GroupedValues<K, InputT, OutputT> groupedValues(
-      PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-    return new GroupedValues<>(fn);
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code CombineFn<InputT, AccumT, OutputT>} specifies how to combine a
-   * collection of input values of type {@code InputT} into a single
-   * output value of type {@code OutputT}.  It does this via one or more
-   * intermediate mutable accumulator values of type {@code AccumT}.
-   *
-   * <p>The overall process to combine a collection of input
-   * {@code InputT} values into a single output {@code OutputT} value is as
-   * follows:
-   *
-   * <ol>
-   *
-   * <li> The input {@code InputT} values are partitioned into one or more
-   * batches.
-   *
-   * <li> For each batch, the {@link #createAccumulator} operation is
-   * invoked to create a fresh mutable accumulator value of type
-   * {@code AccumT}, initialized to represent the combination of zero
-   * values.
-   *
-   * <li> For each input {@code InputT} value in a batch, the
-   * {@link #addInput} operation is invoked to add the value to that
-   * batch's accumulator {@code AccumT} value.  The accumulator may just
-   * record the new value (e.g., if {@code AccumT == List<InputT>}, or may do
-   * work to represent the combination more compactly.
-   *
-   * <li> The {@link #mergeAccumulators} operation is invoked to
-   * combine a collection of accumulator {@code AccumT} values into a
-   * single combined output accumulator {@code AccumT} value, once the
-   * merging accumulators have had all all the input values in their
-   * batches added to them.  This operation is invoked repeatedly,
-   * until there is only one accumulator value left.
-   *
-   * <li> The {@link #extractOutput} operation is invoked on the final
-   * accumulator {@code AccumT} value to get the output {@code OutputT} value.
-   *
-   * </ol>
-   *
-   * <p>For example:
-   * <pre> {@code
-   * public class AverageFn extends CombineFn<Integer, AverageFn.Accum, Double> {
-   *   public static class Accum {
-   *     int sum = 0;
-   *     int count = 0;
-   *   }
-   *   public Accum createAccumulator() {
-   *     return new Accum();
-   *   }
-   *   public Accum addInput(Accum accum, Integer input) {
-   *       accum.sum += input;
-   *       accum.count++;
-   *       return accum;
-   *   }
-   *   public Accum mergeAccumulators(Iterable<Accum> accums) {
-   *     Accum merged = createAccumulator();
-   *     for (Accum accum : accums) {
-   *       merged.sum += accum.sum;
-   *       merged.count += accum.count;
-   *     }
-   *     return merged;
-   *   }
-   *   public Double extractOutput(Accum accum) {
-   *     return ((double) accum.sum) / accum.count;
-   *   }
-   * }
-   * PCollection<Integer> pc = ...;
-   * PCollection<Double> average = pc.apply(Combine.globally(new AverageFn()));
-   * } </pre>
-   *
-   * <p>Combining functions used by {@link Combine.Globally},
-   * {@link Combine.PerKey}, {@link Combine.GroupedValues}, and
-   * {@code PTransforms} derived from them should be
-   * <i>associative</i> and <i>commutative</i>.  Associativity is
-   * required because input values are first broken up into subgroups
-   * before being combined, and their intermediate results further
-   * combined, in an arbitrary tree structure.  Commutativity is
-   * required because any order of the input values is ignored when
-   * breaking up input values into groups.
-   *
-   * @param <InputT> type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  public abstract static class CombineFn<InputT, AccumT, OutputT>
-      extends AbstractGlobalCombineFn<InputT, AccumT, OutputT> {
-
-    /**
-     * Returns a new, mutable accumulator value, representing the accumulation of zero input values.
-     */
-    public abstract AccumT createAccumulator();
-
-    /**
-     * Adds the given input value to the given accumulator, returning the
-     * new accumulator value.
-     *
-     * <p>For efficiency, the input accumulator may be modified and returned.
-     */
-    public abstract AccumT addInput(AccumT accumulator, InputT input);
-
-    /**
-     * Returns an accumulator representing the accumulation of all the
-     * input values accumulated in the merging accumulators.
-     *
-     * <p>May modify any of the argument accumulators.  May return a
-     * fresh accumulator, or may return one of the (modified) argument
-     * accumulators.
-     */
-    public abstract AccumT mergeAccumulators(Iterable<AccumT> accumulators);
-
-    /**
-     * Returns the output value that is the result of combining all
-     * the input values represented by the given accumulator.
-     */
-    public abstract OutputT extractOutput(AccumT accumulator);
-
-    /**
-     * Returns an accumulator that represents the same logical value as the
-     * input accumulator, but may have a more compact representation.
-     *
-     * <p>For most CombineFns this would be a no-op, but should be overridden
-     * by CombineFns that (for example) buffer up elements and combine
-     * them in batches.
-     *
-     * <p>For efficiency, the input accumulator may be modified and returned.
-     *
-     * <p>By default returns the original accumulator.
-     */
-    public AccumT compact(AccumT accumulator) {
-      return accumulator;
-    }
-
-    /**
-     * Applies this {@code CombineFn} to a collection of input values
-     * to produce a combined output value.
-     *
-     * <p>Useful when using a {@code CombineFn}  separately from a
-     * {@code Combine} transform.  Does not invoke the
-     * {@link mergeAccumulators} operation.
-     */
-    public OutputT apply(Iterable<? extends InputT> inputs) {
-      AccumT accum = createAccumulator();
-      for (InputT input : inputs) {
-        accum = addInput(accum, input);
-      }
-      return extractOutput(accum);
-    }
-
-    /**
-     * {@inheritDoc}
-     *
-     * <p>By default returns the extract output of an empty accumulator.
-     */
-    @Override
-    public OutputT defaultValue() {
-      return extractOutput(createAccumulator());
-    }
-
-    /**
-     * Returns a {@link TypeDescriptor} capturing what is known statically
-     * about the output type of this {@code CombineFn} instance's
-     * most-derived class.
-     *
-     * <p>In the normal case of a concrete {@code CombineFn} subclass with
-     * no generic type parameters of its own, this will be a complete
-     * non-generic type.
-     */
-    public TypeDescriptor<OutputT> getOutputType() {
-      return new TypeDescriptor<OutputT>(getClass()) {};
-    }
-
-    @SuppressWarnings({"unchecked", "rawtypes"})
-    @Override
-    public <K> KeyedCombineFn<K, InputT, AccumT, OutputT> asKeyedFn() {
-      // The key, an object, is never even looked at.
-      return new KeyedCombineFn<K, InputT, AccumT, OutputT>() {
-        @Override
-        public AccumT createAccumulator(K key) {
-          return CombineFn.this.createAccumulator();
-        }
-
-        @Override
-        public AccumT addInput(K key, AccumT accumulator, InputT input) {
-          return CombineFn.this.addInput(accumulator, input);
-        }
-
-        @Override
-        public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
-          return CombineFn.this.mergeAccumulators(accumulators);
-        }
-
-        @Override
-        public OutputT extractOutput(K key, AccumT accumulator) {
-          return CombineFn.this.extractOutput(accumulator);
-        }
-
-        @Override
-        public AccumT compact(K key, AccumT accumulator) {
-          return CombineFn.this.compact(accumulator);
-        }
-
-        @Override
-        public Coder<AccumT> getAccumulatorCoder(
-            CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
-            throws CannotProvideCoderException {
-          return CombineFn.this.getAccumulatorCoder(registry, inputCoder);
-        }
-
-        @Override
-        public Coder<OutputT> getDefaultOutputCoder(
-            CoderRegistry registry, Coder<K> keyCoder, Coder<InputT> inputCoder)
-            throws CannotProvideCoderException {
-          return CombineFn.this.getDefaultOutputCoder(registry, inputCoder);
-        }
-
-        @Override
-        public CombineFn<InputT, AccumT, OutputT> forKey(K key, Coder<K> keyCoder) {
-          return CombineFn.this;
-        }
-      };
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily expressed as binary operations.
-   */
-  public abstract static class BinaryCombineFn<V> extends
-      CombineFn<V, Holder<V>, V> {
-
-    /**
-     * Applies the binary operation to the two operands, returning the result.
-     */
-    public abstract V apply(V left, V right);
-
-    /**
-     * Returns the value that should be used for the combine of the empty set.
-     */
-    public V identity() {
-      return null;
-    }
-
-    @Override
-    public Holder<V> createAccumulator() {
-      return new Holder<>();
-    }
-
-    @Override
-    public Holder<V> addInput(Holder<V> accumulator, V input) {
-      if (accumulator.present) {
-        accumulator.set(apply(accumulator.value, input));
-      } else {
-        accumulator.set(input);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public Holder<V> mergeAccumulators(Iterable<Holder<V>> accumulators) {
-      Iterator<Holder<V>> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator();
-      } else {
-        Holder<V> running = iter.next();
-        while (iter.hasNext()) {
-          Holder<V> accum = iter.next();
-          if (accum.present) {
-            if (running.present) {
-              running.set(apply(running.value, accum.value));
-            } else {
-              running.set(accum.value);
-            }
-          }
-        }
-        return running;
-      }
-    }
-
-    @Override
-    public V extractOutput(Holder<V> accumulator) {
-      if (accumulator.present) {
-        return accumulator.value;
-      } else {
-        return identity();
-      }
-    }
-
-    @Override
-    public Coder<Holder<V>> getAccumulatorCoder(CoderRegistry registry, Coder<V> inputCoder) {
-      return new HolderCoder<>(inputCoder);
-    }
-
-    @Override
-    public Coder<V> getDefaultOutputCoder(CoderRegistry registry, Coder<V> inputCoder) {
-      return inputCoder;
-    }
-
-  }
-
-  /**
-   * Holds a single value value of type {@code V} which may or may not be present.
-   *
-   * <p>Used only as a private accumulator class.
-   */
-  public static class Holder<V> {
-    private V value;
-    private boolean present;
-    private Holder() { }
-    private Holder(V value) {
-      set(value);
-    }
-
-    private void set(V value) {
-      this.present = true;
-      this.value = value;
-    }
-  }
-
-  /**
-   * A {@link Coder} for a {@link Holder}.
-   */
-  private static class HolderCoder<V> extends CustomCoder<Holder<V>> {
-
-    private Coder<V> valueCoder;
-
-    public HolderCoder(Coder<V> valueCoder) {
-      this.valueCoder = valueCoder;
-    }
-
-    @Override
-    public List<Coder<?>> getCoderArguments() {
-      return Arrays.<Coder<?>>asList(valueCoder);
-    }
-
-    @Override
-    public void encode(Holder<V> accumulator, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      if (accumulator.present) {
-        outStream.write(1);
-        valueCoder.encode(accumulator.value, outStream, context);
-      } else {
-        outStream.write(0);
-      }
-    }
-
-    @Override
-    public Holder<V> decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      if (inStream.read() == 1) {
-        return new Holder<>(valueCoder.decode(inStream, context));
-      } else {
-        return new Holder<>();
-      }
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      valueCoder.verifyDeterministic();
-    }
-  }
-
-  /**
-   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily and efficiently expressed as binary operations on <code>int</code>s
-   *
-   * <p> It uses {@code int[0]} as the mutable accumulator.
-   */
-  public abstract static class BinaryCombineIntegerFn extends CombineFn<Integer, int[], Integer> {
-
-    /**
-     * Applies the binary operation to the two operands, returning the result.
-     */
-    public abstract int apply(int left, int right);
-
-    /**
-     * Returns the identity element of this operation, i.e. an element {@code e}
-     * such that {@code apply(e, x) == apply(x, e) == x} for all values of {@code x}.
-     */
-    public abstract int identity();
-
-    @Override
-    public int[] createAccumulator() {
-      return wrap(identity());
-    }
-
-    @Override
-    public int[] addInput(int[] accumulator, Integer input) {
-      accumulator[0] = apply(accumulator[0], input);
-      return accumulator;
-    }
-
-    @Override
-    public int[] mergeAccumulators(Iterable<int[]> accumulators) {
-      Iterator<int[]> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator();
-      } else {
-        int[] running = iter.next();
-        while (iter.hasNext()) {
-          running[0] = apply(running[0], iter.next()[0]);
-        }
-        return running;
-      }
-    }
-
-    @Override
-    public Integer extractOutput(int[] accumulator) {
-      return accumulator[0];
-    }
-
-    @Override
-    public Coder<int[]> getAccumulatorCoder(CoderRegistry registry, Coder<Integer> inputCoder) {
-      return DelegateCoder.of(
-          inputCoder,
-          new DelegateCoder.CodingFunction<int[], Integer>() {
-            @Override
-            public Integer apply(int[] accumulator) {
-              return accumulator[0];
-            }
-          },
-          new DelegateCoder.CodingFunction<Integer, int[]>() {
-            @Override
-            public int[] apply(Integer value) {
-              return wrap(value);
-            }
-          });
-    }
-
-    @Override
-    public Coder<Integer> getDefaultOutputCoder(CoderRegistry registry,
-                                                Coder<Integer> inputCoder) {
-      return inputCoder;
-    }
-
-    private int[] wrap(int value) {
-      return new int[] { value };
-    }
-
-    public Counter<Integer> getCounter(String name) {
-      throw new UnsupportedOperationException("BinaryCombineDoubleFn does not support getCounter");
-    }
-  }
-
-  /**
-   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily and efficiently expressed as binary operations on <code>long</code>s.
-   *
-   * <p> It uses {@code long[0]} as the mutable accumulator.
-   */
-  public abstract static class BinaryCombineLongFn extends CombineFn<Long, long[], Long> {
-    /**
-     * Applies the binary operation to the two operands, returning the result.
-     */
-    public abstract long apply(long left, long right);
-
-    /**
-     * Returns the identity element of this operation, i.e. an element {@code e}
-     * such that {@code apply(e, x) == apply(x, e) == x} for all values of {@code x}.
-     */
-    public abstract long identity();
-
-    @Override
-    public long[] createAccumulator() {
-      return wrap(identity());
-    }
-
-    @Override
-    public long[] addInput(long[] accumulator, Long input) {
-      accumulator[0] = apply(accumulator[0], input);
-      return accumulator;
-    }
-
-    @Override
-    public long[] mergeAccumulators(Iterable<long[]> accumulators) {
-      Iterator<long[]> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator();
-      } else {
-        long[] running = iter.next();
-        while (iter.hasNext()) {
-          running[0] = apply(running[0], iter.next()[0]);
-        }
-        return running;
-      }
-    }
-
-    @Override
-    public Long extractOutput(long[] accumulator) {
-      return accumulator[0];
-    }
-
-    @Override
-    public Coder<long[]> getAccumulatorCoder(CoderRegistry registry, Coder<Long> inputCoder) {
-      return DelegateCoder.of(
-          inputCoder,
-          new DelegateCoder.CodingFunction<long[], Long>() {
-            @Override
-            public Long apply(long[] accumulator) {
-              return accumulator[0];
-            }
-          },
-          new DelegateCoder.CodingFunction<Long, long[]>() {
-            @Override
-            public long[] apply(Long value) {
-              return wrap(value);
-            }
-          });
-    }
-
-    @Override
-    public Coder<Long> getDefaultOutputCoder(CoderRegistry registry, Coder<Long> inputCoder) {
-      return inputCoder;
-    }
-
-    private long[] wrap(long value) {
-      return new long[] { value };
-    }
-
-    public Counter<Long> getCounter(String name) {
-      throw new UnsupportedOperationException("BinaryCombineDoubleFn does not support getCounter");
-    }
-  }
-
-  /**
-   * An abstract subclass of {@link CombineFn} for implementing combiners that are more
-   * easily and efficiently expressed as binary operations on <code>double</code>s.
-   *
-   * <p> It uses {@code double[0]} as the mutable accumulator.
-   */
-  public abstract static class BinaryCombineDoubleFn extends CombineFn<Double, double[], Double> {
-
-    /**
-     * Applies the binary operation to the two operands, returning the result.
-     */
-    public abstract double apply(double left, double right);
-
-    /**
-     * Returns the identity element of this operation, i.e. an element {@code e}
-     * such that {@code apply(e, x) == apply(x, e) == x} for all values of {@code x}.
-     */
-    public abstract double identity();
-
-    @Override
-    public double[] createAccumulator() {
-      return wrap(identity());
-    }
-
-    @Override
-    public double[] addInput(double[] accumulator, Double input) {
-      accumulator[0] = apply(accumulator[0], input);
-      return accumulator;
-    }
-
-    @Override
-    public double[] mergeAccumulators(Iterable<double[]> accumulators) {
-      Iterator<double[]> iter = accumulators.iterator();
-      if (!iter.hasNext()) {
-        return createAccumulator();
-      } else {
-        double[] running = iter.next();
-        while (iter.hasNext()) {
-          running[0] = apply(running[0], iter.next()[0]);
-        }
-        return running;
-      }
-    }
-
-    @Override
-    public Double extractOutput(double[] accumulator) {
-      return accumulator[0];
-    }
-
-    @Override
-    public Coder<double[]> getAccumulatorCoder(CoderRegistry registry, Coder<Double> inputCoder) {
-      return DelegateCoder.of(
-          inputCoder,
-          new DelegateCoder.CodingFunction<double[], Double>() {
-            @Override
-            public Double apply(double[] accumulator) {
-              return accumulator[0];
-            }
-          },
-          new DelegateCoder.CodingFunction<Double, double[]>() {
-            @Override
-            public double[] apply(Double value) {
-              return wrap(value);
-            }
-          });
-    }
-
-    @Override
-    public Coder<Double> getDefaultOutputCoder(CoderRegistry registry, Coder<Double> inputCoder) {
-      return inputCoder;
-    }
-
-    private double[] wrap(double value) {
-      return new double[] { value };
-    }
-
-    public Counter<Double> getCounter(String name) {
-      throw new UnsupportedOperationException("BinaryCombineDoubleFn does not support getCounter");
-    }
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * A {@code CombineFn} that uses a subclass of
-   * {@link AccumulatingCombineFn.Accumulator} as its accumulator
-   * type.  By defining the operations of the {@code Accumulator}
-   * helper class, the operations of the enclosing {@code CombineFn}
-   * are automatically provided.  This can reduce the code required to
-   * implement a {@code CombineFn}.
-   *
-   * <p>For example, the example from {@link CombineFn} above can be
-   * expressed using {@code AccumulatingCombineFn} more concisely as
-   * follows:
-   *
-   * <pre> {@code
-   * public class AverageFn
-   *     extends AccumulatingCombineFn<Integer, AverageFn.Accum, Double> {
-   *   public Accum createAccumulator() {
-   *     return new Accum();
-   *   }
-   *   public class Accum
-   *       extends AccumulatingCombineFn<Integer, AverageFn.Accum, Double>
-   *               .Accumulator {
-   *     private int sum = 0;
-   *     private int count = 0;
-   *     public void addInput(Integer input) {
-   *       sum += input;
-   *       count++;
-   *     }
-   *     public void mergeAccumulator(Accum other) {
-   *       sum += other.sum;
-   *       count += other.count;
-   *     }
-   *     public Double extractOutput() {
-   *       return ((double) sum) / count;
-   *     }
-   *   }
-   * }
-   * PCollection<Integer> pc = ...;
-   * PCollection<Double> average = pc.apply(Combine.globally(new AverageFn()));
-   * } </pre>
-   *
-   * @param <InputT> type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  public abstract static class AccumulatingCombineFn<
-      InputT,
-      AccumT extends AccumulatingCombineFn.Accumulator<InputT, AccumT, OutputT>,
-      OutputT>
-      extends CombineFn<InputT, AccumT, OutputT> {
-
-    /**
-     * The type of mutable accumulator values used by this
-     * {@code AccumulatingCombineFn}.
-     */
-    public abstract static interface Accumulator<InputT, AccumT, OutputT> {
-      /**
-       * Adds the given input value to this accumulator, modifying
-       * this accumulator.
-       */
-      public abstract void addInput(InputT input);
-
-      /**
-       * Adds the input values represented by the given accumulator
-       * into this accumulator.
-       */
-      public abstract void mergeAccumulator(AccumT other);
-
-      /**
-       * Returns the output value that is the result of combining all
-       * the input values represented by this accumulator.
-       */
-      public abstract OutputT extractOutput();
-    }
-
-    @Override
-    public final AccumT addInput(AccumT accumulator, InputT input) {
-      accumulator.addInput(input);
-      return accumulator;
-    }
-
-    @Override
-    public final AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      AccumT accumulator = createAccumulator();
-      for (AccumT partial : accumulators) {
-        accumulator.mergeAccumulator(partial);
-      }
-      return accumulator;
-    }
-
-    @Override
-    public final OutputT extractOutput(AccumT accumulator) {
-      return accumulator.extractOutput();
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-
-  /**
-   * A {@code KeyedCombineFn<K, InputT, AccumT, OutputT>} specifies how to combine
-   * a collection of input values of type {@code InputT}, associated with
-   * a key of type {@code K}, into a single output value of type
-   * {@code OutputT}.  It does this via one or more intermediate mutable
-   * accumulator values of type {@code AccumT}.
-   *
-   * <p>The overall process to combine a collection of input
-   * {@code InputT} values associated with an input {@code K} key into a
-   * single output {@code OutputT} value is as follows:
-   *
-   * <ol>
-   *
-   * <li> The input {@code InputT} values are partitioned into one or more
-   * batches.
-   *
-   * <li> For each batch, the {@link #createAccumulator} operation is
-   * invoked to create a fresh mutable accumulator value of type
-   * {@code AccumT}, initialized to represent the combination of zero
-   * values.
-   *
-   * <li> For each input {@code InputT} value in a batch, the
-   * {@link #addInput} operation is invoked to add the value to that
-   * batch's accumulator {@code AccumT} value.  The accumulator may just
-   * record the new value (e.g., if {@code AccumT == List<InputT>}, or may do
-   * work to represent the combination more compactly.
-   *
-   * <li> The {@link #mergeAccumulators} operation is invoked to
-   * combine a collection of accumulator {@code AccumT} values into a
-   * single combined output accumulator {@code AccumT} value, once the
-   * merging accumulators have had all all the input values in their
-   * batches added to them.  This operation is invoked repeatedly,
-   * until there is only one accumulator value left.
-   *
-   * <li> The {@link #extractOutput} operation is invoked on the final
-   * accumulator {@code AccumT} value to get the output {@code OutputT} value.
-   *
-   * </ol>
-   *
-   * <p>All of these operations are passed the {@code K} key that the
-   * values being combined are associated with.
-   *
-   * <p>For example:
-   * <pre> {@code
-   * public class ConcatFn
-   *     extends KeyedCombineFn<String, Integer, ConcatFn.Accum, String> {
-   *   public static class Accum {
-   *     String s = "";
-   *   }
-   *   public Accum createAccumulator(String key) {
-   *     return new Accum();
-   *   }
-   *   public Accum addInput(String key, Accum accum, Integer input) {
-   *       accum.s += "+" + input;
-   *       return accum;
-   *   }
-   *   public Accum mergeAccumulators(String key, Iterable<Accum> accums) {
-   *     Accum merged = new Accum();
-   *     for (Accum accum : accums) {
-   *       merged.s += accum.s;
-   *     }
-   *     return merged;
-   *   }
-   *   public String extractOutput(String key, Accum accum) {
-   *     return key + accum.s;
-   *   }
-   * }
-   * PCollection<KV<String, Integer>> pc = ...;
-   * PCollection<KV<String, String>> pc2 = pc.apply(
-   *     Combine.perKey(new ConcatFn()));
-   * } </pre>
-   *
-   * <p>Keyed combining functions used by {@link Combine.PerKey},
-   * {@link Combine.GroupedValues}, and {@code PTransforms} derived
-   * from them should be <i>associative</i> and <i>commutative</i>.
-   * Associativity is required because input values are first broken
-   * up into subgroups before being combined, and their intermediate
-   * results further combined, in an arbitrary tree structure.
-   * Commutativity is required because any order of the input values
-   * is ignored when breaking up input values into groups.
-   *
-   * @param <K> type of keys
-   * @param <InputT> type of input values
-   * @param <AccumT> type of mutable accumulator values
-   * @param <OutputT> type of output values
-   */
-  public abstract static class KeyedCombineFn<K, InputT, AccumT, OutputT>
-      extends AbstractPerKeyCombineFn<K, InputT, AccumT, OutputT> {
-    /**
-     * Returns a new, mutable accumulator value representing the accumulation of zero input values.
-     *
-     * @param key the key that all the accumulated values using the
-     * accumulator are associated with
-     */
-    public abstract AccumT createAccumulator(K key);
-
-    /**
-     * Adds the given input value to the given accumulator, returning the new accumulator value.
-     *
-     * <p>For efficiency, the input accumulator may be modified and returned.
-     *
-     * @param key the key that all the accumulated values using the
-     * accumulator are associated with
-     */
-    public abstract AccumT addInput(K key, AccumT accumulator, InputT value);
-
-    /**
-     * Returns an accumulator representing the accumulation of all the
-     * input values accumulated in the merging accumulators.
-     *
-     * <p>May modify any of the argument accumulators.  May return a
-     * fresh accumulator, or may return one of the (modified) argument
-     * accumulators.
-     *
-     * @param key the key that all the accumulators are associated
-     * with
-     */
-    public abstract AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators);
-
-    /**
-     * Returns the output value that is the result of combining all
-     * the input values represented by the given accumulator.
-     *
-     * @param key the key that all the accumulated values using the
-     * accumulator are associated with
-     */
-    public abstract OutputT extractOutput(K key, AccumT accumulator);
-
-    /**
-     * Returns an accumulator that represents the same logical value as the
-     * input accumulator, but may have a more compact representation.
-     *
-     * <p>For most CombineFns this would be a no-op, but should be overridden
-     * by CombineFns that (for example) buffer up elements and combine
-     * them in batches.
-     *
-     * <p>For efficiency, the input accumulator may be modified and returned.
-     *
-     * <p>By default returns the original accumulator.
-     */
-    public AccumT compact(K key, AccumT accumulator) {
-      return accumulator;
-    }
-
-    @Override
-    public CombineFn<InputT, AccumT, OutputT> forKey(final K key, final Coder<K> keyCoder) {
-      return new CombineFn<InputT, AccumT, OutputT>() {
-
-        @Override
-        public AccumT createAccumulator() {
-          return KeyedCombineFn.this.createAccumulator(key);
-        }
-
-        @Override
-        public AccumT addInput(AccumT accumulator, InputT input) {
-          return KeyedCombineFn.this.addInput(key, accumulator, input);
-        }
-
-        @Override
-        public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-          return KeyedCombineFn.this.mergeAccumulators(key, accumulators);
-        }
-
-        @Override
-        public OutputT extractOutput(AccumT accumulator) {
-          return KeyedCombineFn.this.extractOutput(key, accumulator);
-        }
-
-        @Override
-        public AccumT compact(AccumT accumulator) {
-          return KeyedCombineFn.this.compact(key, accumulator);
-        }
-
-        @Override
-        public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<InputT> inputCoder)
-            throws CannotProvideCoderException {
-          return KeyedCombineFn.this.getAccumulatorCoder(registry, keyCoder, inputCoder);
-        }
-
-        @Override
-        public Coder<OutputT> getDefaultOutputCoder(
-            CoderRegistry registry, Coder<InputT> inputCoder) throws CannotProvideCoderException {
-          return KeyedCombineFn.this.getDefaultOutputCoder(registry, keyCoder, inputCoder);
-        }
-      };
-    }
-
-    /**
-     * Applies this {@code KeyedCombineFn} to a key and a collection
-     * of input values to produce a combined output value.
-     *
-     * <p>Useful when testing the behavior of a {@code KeyedCombineFn}
-     * separately from a {@code Combine} transform.
-     */
-    public OutputT apply(K key, Iterable<? extends InputT> inputs) {
-      AccumT accum = createAccumulator(key);
-      for (InputT input : inputs) {
-        accum = addInput(key, accum, input);
-      }
-      return extractOutput(key, accum);
-    }
-  }
-
-  ////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * {@code Combine.Globally<InputT, OutputT>} takes a {@code PCollection<InputT>}
-   * and returns a {@code PCollection<OutputT>} whose elements are the result of
-   * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn&lt;InputT, AccumT, OutputT&gt;}.
-   * It is common for {@code InputT == OutputT}, but not required.  Common combining
-   * functions include sums, mins, maxes, and averages of numbers,
-   * conjunctions and disjunctions of booleans, statistical
-   * aggregations, etc.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> pc = ...;
-   * PCollection<Integer> sum = pc.apply(
-   *     Combine.globally(new Sum.SumIntegerFn()));
-   * } </pre>
-   *
-   * <p>Combining can happen in parallel, with different subsets of the
-   * input {@code PCollection} being combined separately, and their
-   * intermediate results combined further, in an arbitrary tree
-   * reduction pattern, until a single result value is produced.
-   *
-   * <p>If the input {@code PCollection} is windowed into {@link GlobalWindows},
-   * a default value in the {@link GlobalWindow} will be output if the input
-   * {@code PCollection} is empty.  To use this with inputs with other windowing,
-   * either {@link #withoutDefaults} or {@link #asSingletonView} must be called,
-   * as the default value cannot be automatically assigned to any single window.
-   *
-   * <p>By default, the {@code Coder} of the output {@code PValue<OutputT>}
-   * is inferred from the concrete type of the
-   * {@code CombineFn<InputT, AccumT, OutputT>}'s output type {@code OutputT}.
-   *
-   * <p>See also {@link #perKey}/{@link PerKey Combine.PerKey} and
-   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
-   * are useful for combining values associated with each key in
-   * a {@code PCollection} of {@code KV}s.
-   *
-   * @param <InputT> type of input values
-   * @param <OutputT> type of output values
-   */
-  public static class Globally<InputT, OutputT>
-      extends PTransform<PCollection<InputT>, PCollection<OutputT>> {
-
-    private final GlobalCombineFn<? super InputT, ?, OutputT> fn;
-    private final boolean insertDefault;
-    private final int fanout;
-    private final List<PCollectionView<?>> sideInputs;
-
-    private Globally(GlobalCombineFn<? super InputT, ?, OutputT> fn,
-        boolean insertDefault, int fanout) {
-      this.fn = fn;
-      this.insertDefault = insertDefault;
-      this.fanout = fanout;
-      this.sideInputs = ImmutableList.<PCollectionView<?>>of();
-    }
-
-    private Globally(String name, GlobalCombineFn<? super InputT, ?, OutputT> fn,
-        boolean insertDefault, int fanout) {
-      super(name);
-      this.fn = fn;
-      this.insertDefault = insertDefault;
-      this.fanout = fanout;
-      this.sideInputs = ImmutableList.<PCollectionView<?>>of();
-    }
-
-    private Globally(String name, GlobalCombineFn<? super InputT, ?, OutputT> fn,
-        boolean insertDefault, int fanout, List<PCollectionView<?>> sideInputs) {
-      super(name);
-      this.fn = fn;
-      this.insertDefault = insertDefault;
-      this.fanout = fanout;
-      this.sideInputs = sideInputs;
-    }
-
-    /**
-     * Return a new {@code Globally} transform that's like this transform but with the
-     * specified name. Does not modify this transform.
-     */
-    public Globally<InputT, OutputT> named(String name) {
-      return new Globally<>(name, fn, insertDefault, fanout);
-    }
-
-    /**
-     * Returns a {@link PTransform} that produces a {@code PCollectionView}
-     * whose elements are the result of combining elements per-window in
-     * the input {@code PCollection}.  If a value is requested from the view
-     * for a window that is not present, the result of applying the {@code CombineFn}
-     * to an empty input set will be returned.
-     */
-    public GloballyAsSingletonView<InputT, OutputT> asSingletonView() {
-      return new GloballyAsSingletonView<>(fn, insertDefault, fanout);
-    }
-
-    /**
-     * Returns a {@link PTransform} identical to this, but that does not attempt to
-     * provide a default value in the case of empty input.  Required when the input
-     * is not globally windowed and the output is not being used as a side input.
-     */
-    public Globally<InputT, OutputT> withoutDefaults() {
-      return new Globally<>(name, fn, false, fanout);
-    }
-
-    /**
-     * Returns a {@link PTransform} identical to this, but that uses an intermediate node
-     * to combine parts of the data to reduce load on the final global combine step.
-     *
-     * <p>The {@code fanout} parameter determines the number of intermediate keys
-     * that will be used.
-     */
-    public Globally<InputT, OutputT> withFanout(int fanout) {
-      return new Globally<>(name, fn, insertDefault, fanout);
-    }
-
-    /**
-     * Returns a {@link PTransform} identical to this, but with the specified side inputs to use
-     * in {@link CombineFnWithContext}.
-     */
-    public Globally<InputT, OutputT> withSideInputs(
-        Iterable<? extends PCollectionView<?>> sideInputs) {
-      Preconditions.checkState(fn instanceof RequiresContextInternal);
-      return new Globally<InputT, OutputT>(name, fn, insertDefault, fanout,
-          ImmutableList.<PCollectionView<?>>copyOf(sideInputs));
-    }
-
-    @Override
-    public PCollection<OutputT> apply(PCollection<InputT> input) {
-      PCollection<KV<Void, InputT>> withKeys = input
-          .apply(WithKeys.<Void, InputT>of((Void) null))
-          .setCoder(KvCoder.of(VoidCoder.of(), input.getCoder()));
-
-      Combine.PerKey<Void, InputT, OutputT> combine =
-          Combine.<Void, InputT, OutputT>fewKeys(fn.asKeyedFn());
-      if (!sideInputs.isEmpty()) {
-        combine = combine.withSideInputs(sideInputs);
-      }
-
-      PCollection<KV<Void, OutputT>> combined;
-      if (fanout >= 2) {
-        combined = withKeys.apply(combine.withHotKeyFanout(fanout));
-      } else {
-        combined = withKeys.apply(combine);
-      }
-
-      PCollection<OutputT> output = combined.apply(Values.<OutputT>create());
-
-      if (insertDefault) {
-        if (!output.getWindowingStrategy().getWindowFn().isCompatible(new GlobalWindows())) {
-          throw new IllegalStateException(fn.getIncompatibleGlobalWindowErrorMessage());
-        }
-        return insertDefaultValueIfEmpty(output);
-      } else {
-        return output;
-      }
-    }
-
-    private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> maybeEmpty) {
-      final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(
-          View.<OutputT>asIterable());
-
-
-      final OutputT defaultValue = fn.defaultValue();
-      PCollection<OutputT> defaultIfEmpty = maybeEmpty.getPipeline()
-          .apply("CreateVoid", Create.of((Void) null).withCoder(VoidCoder.of()))
-          .apply(ParDo.named("ProduceDefault").withSideInputs(maybeEmptyView).of(
-              new DoFn<Void, OutputT>() {
-                @Override
-                public void processElement(DoFn<Void, OutputT>.ProcessContext c) {
-                  Iterator<OutputT> combined = c.sideInput(maybeEmptyView).iterator();
-                  if (!combined.hasNext()) {
-                    c.output(defaultValue);
-                  }
-                }
-              }))
-          .setCoder(maybeEmpty.getCoder())
-          .setWindowingStrategyInternal(maybeEmpty.getWindowingStrategy());
-
-      return PCollectionList.of(maybeEmpty).and(defaultIfEmpty)
-          .apply(Flatten.<OutputT>pCollections());
-    }
-  }
-
-  /**
-   * {@code Combine.GloballyAsSingletonView<InputT, OutputT>} takes a {@code PCollection<InputT>}
-   * and returns a {@code PCollectionView<OutputT>} whose elements are the result of
-   * combining all the elements in each window of the input {@code PCollection},
-   * using a specified {@link CombineFn CombineFn&lt;InputT, AccumT, OutputT&gt;}.
-   * It is common for {@code InputT == OutputT}, but not required. Common combining
-   * functions include sums, mins, maxes, and averages of numbers,
-   * conjunctions and disjunctions of booleans, statistical
-   * aggregations, etc.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<Integer> pc = ...;
-   * PCollection<Integer> sum = pc.apply(
-   *     Combine.globally(new Sum.SumIntegerFn()));
-   * } </pre>
-   *
-   * <p>Combining can happen in parallel, with different subsets of the
-   * input {@code PCollection} being combined separately, and their
-   * intermediate results combined further, in an arbitrary tree
-   * reduction pattern, until a single result value is produced.
-   *
-   * <p>If a value is requested from the view for a window that is not present
-   * and {@code insertDefault} is true, the result of calling the {@code CombineFn}
-   * on empty input will returned. If {@code insertDefault} is false, an
-   * exception will be thrown instead.
-   *
-   * <p>By default, the {@code Coder} of the output {@code PValue<OutputT>}
-   * is inferred from the concrete type of the
-   * {@code CombineFn<InputT, AccumT, OutputT>}'s output type {@code OutputT}.
-   *
-   * <p>See also {@link #perKey}/{@link PerKey Combine.PerKey} and
-   * {@link #groupedValues}/{@link GroupedValues Combine.GroupedValues}, which
-   * are useful for combining values associated with each key in
-   * a {@code PCollection} of {@code KV}s.
-   *
-   * @param <InputT> type of input values
-   * @param <OutputT> type of output values
-   */
-  public static class GloballyAsSingletonView<InputT, OutputT>
-      extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
-
-    private final GlobalCombineFn<? super InputT, ?, OutputT> fn;
-    private final boolean insertDefault;
-    private final int fanout;
-
-    private GloballyAsSingletonView(
-        GlobalCombineFn<? super InputT, ?, OutputT> fn, boolean insertDefault, int fanout) {
-      this.fn = fn;
-      this.insertDefault = insertDefault;
-      this.fanout = fanout;
-    }
-
-    @Override
-    public PCollectionView<OutputT> apply(PCollection<InputT> input) {
-      Globally<InputT, OutputT> combineGlobally =
-          Combine.<InputT, OutputT>globally(fn).withoutDefaults().withFanout(fanout);
-      if (insertDefault) {
-        return input
-            .apply(combineGlobally)
-            .apply(View.<OutputT>asSingleton().withDefaultValue(fn.defaultValue()));
-      } else {
-        return input
-            .apply(combineGlobally)
-            .apply(View.<OutputT>asSingleton());
-      }
-    }
-
-    public int getFanout() {
-      return fanout;
-    }
-
-    public boolean getInsertDefault() {
-      return insertDefault;
-    }
-
-    public GlobalCombineFn<? super InputT, ?, OutputT> getCombineFn() {
-      return fn;
-    }
-  }
-
-  /**
-   * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
-   * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
-   *
-   * <p>Used in the implementation of convenience methods like
-   * {@link #globally(SerializableFunction)},
-   * {@link #perKey(SerializableFunction)}, and
-   * {@link #groupedValues(SerializableFunction)}.
-   */
-  public static class IterableCombineFn<V> extends CombineFn<V, List<V>, V> {
-    /**
-     * Returns a {@code CombineFn} that uses the given
-     * {@code SerializableFunction} to combine values.
-     */
-    public static <V> IterableCombineFn<V> of(
-        SerializableFunction<Iterable<V>, V> combiner) {
-      return of(combiner, DEFAULT_BUFFER_SIZE);
-    }
-
-    /**
-     * Returns a {@code CombineFn} that uses the given
-     * {@code SerializableFunction} to combine values,
-     * attempting to buffer at least {@code bufferSize}
-     * values between invocations.
-     */
-    public static <V> IterableCombineFn<V> of(
-        SerializableFunction<Iterable<V>, V> combiner, int bufferSize) {
-      return new IterableCombineFn<>(combiner, bufferSize);
-    }
-
-    private static final int DEFAULT_BUFFER_SIZE = 20;
-
-    /** The combiner function. */
-    private final SerializableFunction<Iterable<V>, V> combiner;
-
-    /**
-     * The number of values to accumulate before invoking the combiner
-     * function to combine them.
-     */
-    private final int bufferSize;
-
-    private IterableCombineFn(
-        SerializableFunction<Iterable<V>, V> combiner, int bufferSize) {
-      this.combiner = combiner;
-      this.bufferSize = bufferSize;
-    }
-
-    @Override
-    public List<V> createAccumulator() {
-      return new ArrayList<>();
-    }
-
-    @Override
-    public List<V> addInput(List<V> accumulator, V input) {
-      accumulator.add(input);
-      if (accumulator.size() > bufferSize) {
-        return mergeToSingleton(accumulator);
-      } else {
-        return accumulator;
-      }
-    }
-
-    @Override
-    public List<V> mergeAccumulators(Iterable<List<V>> accumulators) {
-      return mergeToSingleton(Iterables.concat(accumulators));
-    }
-
-    @Override
-    public V extractOutput(List<V> accumulator) {
-      return combiner.apply(accumulator);
-    }
-
-    @Override
-    public List<V> compact(List<V> accumulator) {
-      return accumulator.size() > 1 ? mergeToSingleton(accumulator) : accumulator;
-    }
-
-    private List<V> mergeToSingleton(Iterable<V> values) {
-      List<V> singleton = new ArrayList<>();
-      singleton.add(combiner.apply(values));
-      return singleton;
-    }
-  }
-
-  /**
-   * Converts a {@link SerializableFunction} from {@code Iterable<V>}s
-   * to {@code V}s into a simple {@link CombineFn} over {@code V}s.
-   *
-   * <p>@deprecated Use {@link IterableCombineFn} or the more space efficient
-   * {@link BinaryCombineFn} instead (which avoids buffering values).
-   */
-  @Deprecated
-  public static class SimpleCombineFn<V> extends IterableCombineFn<V> {
-
-    /**
-     * Returns a {@code CombineFn} that uses the given
-     * {@code SerializableFunction} to combine values.
-     */
-    @Deprecated
-    public static <V> SimpleCombineFn<V> of(
-        SerializableFunction<Iterable<V>, V> combiner) {
-      return new SimpleCombineFn<>(combiner);
-    }
-
-    protected SimpleCombineFn(SerializableFunction<Iterable<V>, V> combiner) {
-      super(combiner, IterableCombineFn.DEFAULT_BUFFER_SIZE);
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * {@code PerKey<K, InputT, OutputT>} takes a
-   * {@code PCollection<KV<K, InputT>>}, groups it by key, applies a
-   * combining function to the {@code InputT} values associated with each
-   * key to produce a combined {@code OutputT} value, and returns a
-   * {@code PCollection<KV<K, OutputT>>} representing a map from each
-   * distinct key of the input {@code PCollection} to the corresponding
-   * combined value.  {@code InputT} and {@code OutputT} are often the same.
-   *
-   * <p>This is a concise shorthand for an application of
-   * {@link GroupByKey} followed by an application of
-   * {@link GroupedValues Combine.GroupedValues}.  See those
-   * operations for more details on how keys are compared for equality
-   * and on the default {@code Coder} for the output.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<String, Double>> salesRecords = ...;
-   * PCollection<KV<String, Double>> totalSalesPerPerson =
-   *     salesRecords.apply(Combine.<String, Double>perKey(
-   *         new Sum.SumDoubleFn()));
-   * } </pre>
-   *
-   * <p>Each output element is in the window by which its corresponding input
-   * was grouped, and has the timestamp of the end of that window.  The output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * as the input.
-   *
-   * @param <K> the type of the keys of the input and output
-   * {@code PCollection}s
-   * @param <InputT> the type of the values of the input {@code PCollection}
-   * @param <OutputT> the type of the values of the output {@code PCollection}
-   */
-  public static class PerKey<K, InputT, OutputT>
-    extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
-
-    private final transient PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn;
-    private final boolean fewKeys;
-    private final List<PCollectionView<?>> sideInputs;
-
-    private PerKey(
-        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn, boolean fewKeys) {
-      this.fn = fn;
-      this.fewKeys = fewKeys;
-      this.sideInputs = ImmutableList.of();
-    }
-
-    private PerKey(String name,
-        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
-        boolean fewKeys, List<PCollectionView<?>> sideInputs) {
-      super(name);
-      this.fn = fn;
-      this.fewKeys = fewKeys;
-      this.sideInputs = sideInputs;
-    }
-
-    private PerKey(
-        String name, PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
-        boolean fewKeys) {
-      super(name);
-      this.fn = fn;
-      this.fewKeys = fewKeys;
-      this.sideInputs = ImmutableList.of();
-    }
-
-    /**
-     * Return a new {@code Globally} transform that's like this transform but with the
-     * specified name. Does not modify this transform.
-     */
-    public PerKey<K, InputT, OutputT> named(String name) {
-      return new PerKey<K, InputT, OutputT>(name, fn, fewKeys);
-    }
-
-    /**
-     * Returns a {@link PTransform} identical to this, but with the specified side inputs to use
-     * in {@link KeyedCombineFnWithContext}.
-     */
-    public PerKey<K, InputT, OutputT> withSideInputs(
-        Iterable<? extends PCollectionView<?>> sideInputs) {
-      Preconditions.checkState(fn instanceof RequiresContextInternal);
-      return new PerKey<K, InputT, OutputT>(name, fn, fewKeys,
-          ImmutableList.<PCollectionView<?>>copyOf(sideInputs));
-    }
-
-    /**
-     * If a single key has disproportionately many values, it may become a
-     * bottleneck, especially in streaming mode.  This returns a new per-key
-     * combining transform that inserts an intermediate node to combine "hot"
-     * keys partially before performing the full combine.
-     *
-     * @param hotKeyFanout a function from keys to an integer N, where the key
-     * will be spread among N intermediate nodes for partial combining.
-     * If N is less than or equal to 1, this key will not be sent through an
-     * intermediate node.
-     */
-    public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(
-        SerializableFunction<? super K, Integer> hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn, hotKeyFanout);
-    }
-
-    /**
-     * Like {@link #withHotKeyFanout(SerializableFunction)}, but returning the given
-     * constant value for every key.
-     */
-    public PerKeyWithHotKeyFanout<K, InputT, OutputT> withHotKeyFanout(final int hotKeyFanout) {
-      return new PerKeyWithHotKeyFanout<K, InputT, OutputT>(name, fn,
-          new SerializableFunction<K, Integer>(){
-            @Override
-            public Integer apply(K unused) {
-              return hotKeyFanout;
-            }
-          });
-    }
-
-    /**
-     * Returns the {@link PerKeyCombineFn} used by this Combine operation.
-     */
-    public PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
-      return fn;
-    }
-
-    /**
-     * Returns the side inputs used by this Combine operation.
-     */
-    public List<PCollectionView<?>> getSideInputs() {
-      return sideInputs;
-    }
-
-    @Override
-    public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
-      return input
-          .apply(GroupByKey.<K, InputT>create(fewKeys))
-          .apply(Combine.<K, InputT, OutputT>groupedValues(fn).withSideInputs(sideInputs));
-    }
-  }
-
-  /**
-   * Like {@link PerKey}, but sharding the combining of hot keys.
-   */
-  public static class PerKeyWithHotKeyFanout<K, InputT, OutputT>
-      extends PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> {
-
-    private final transient PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn;
-    private final SerializableFunction<? super K, Integer> hotKeyFanout;
-
-    private PerKeyWithHotKeyFanout(String name,
-        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
-        SerializableFunction<? super K, Integer> hotKeyFanout) {
-      super(name);
-      this.fn = fn;
-      this.hotKeyFanout = hotKeyFanout;
-    }
-
-    @Override
-    public PCollection<KV<K, OutputT>> apply(PCollection<KV<K, InputT>> input) {
-      return applyHelper(input);
-    }
-
-    private <AccumT> PCollection<KV<K, OutputT>> applyHelper(PCollection<KV<K, InputT>> input) {
-
-      // Name the accumulator type.
-      @SuppressWarnings("unchecked")
-      final PerKeyCombineFn<K, InputT, AccumT, OutputT> typedFn =
-          (PerKeyCombineFn<K, InputT, AccumT, OutputT>) this.fn;
-
-      if (!(input.getCoder() instanceof KvCoder)) {
-        throw new IllegalStateException(
-            "Expected input coder to be KvCoder, but was " + input.getCoder());
-      }
-
-      @SuppressWarnings("unchecked")
-      final KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder();
-      final Coder<AccumT> accumCoder;
-
-      try {
-        accumCoder = typedFn.getAccumulatorCoder(
-            input.getPipeline().getCoderRegistry(),
-            inputCoder.getKeyCoder(), inputCoder.getValueCoder());
-      } catch (CannotProvideCoderException e) {
-        throw new IllegalStateException("Unable to determine accumulator coder.", e);
-      }
-      Coder<InputOrAccum<InputT, AccumT>> inputOrAccumCoder =
-          new InputOrAccum.InputOrAccumCoder<InputT, AccumT>(
-              inputCoder.getValueCoder(), accumCoder);
-
-      // A CombineFn's mergeAccumulator can be applied in a tree-like fashon.
-      // Here we shard the key using an integer nonce, combine on that partial
-      // set of values, then drop the nonce and do a final combine of the
-      // aggregates.  We do this by splitting the original CombineFn into two,
-      // on that does addInput + merge and another that does merge + extract.
-      PerKeyCombineFn<KV<K, Integer>, InputT, AccumT, AccumT> hotPreCombine;
-      PerKeyCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT> postCombine;
-      if (!(typedFn instanceof RequiresContextInternal)) {
-        final KeyedCombineFn<K, InputT, AccumT, OutputT> keyedFn =
-            (KeyedCombineFn<K, InputT, AccumT, OutputT>) typedFn;
-        hotPreCombine =
-            new KeyedCombineFn<KV<K, Integer>, InputT, AccumT, AccumT>() {
-              @Override
-              public AccumT createAccumulator(KV<K, Integer> key) {
-                return keyedFn.createAccumulator(key.getKey());
-              }
-              @Override
-              public AccumT addInput(KV<K, Integer> key, AccumT accumulator, InputT value) {
-                return keyedFn.addInput(key.getKey(), accumulator, value);
-              }
-              @Override
-              public AccumT mergeAccumulators(
-                  KV<K, Integer> key, Iterable<AccumT> accumulators) {
-                return keyedFn.mergeAccumulators(key.getKey(), accumulators);
-              }
-              @Override
-              public AccumT compact(KV<K, Integer> key, AccumT accumulator) {
-                return keyedFn.compact(key.getKey(), accumulator);
-              }
-              @Override
-              public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator) {
-                return accumulator;
-              }
-              @Override
-              @SuppressWarnings("unchecked")
-              public Coder<AccumT> getAccumulatorCoder(
-                  CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<InputT> inputCoder)
-                  throws CannotProvideCoderException {
-                return accumCoder;
-              }
-            };
-        postCombine =
-            new KeyedCombineFn<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT>() {
-              @Override
-              public AccumT createAccumulator(K key) {
-                return keyedFn.createAccumulator(key);
-              }
-              @Override
-              public AccumT addInput(
-                  K key, AccumT accumulator, InputOrAccum<InputT, AccumT> value) {
-                if (value.accum == null) {
-                  return keyedFn.addInput(key, accumulator, value.input);
-                } else {
-                  return keyedFn.mergeAccumulators(key, ImmutableList.of(accumulator, value.accum));
-                }
-              }
-              @Override
-              public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators) {
-                return keyedFn.mergeAccumulators(key, accumulators);
-              }
-              @Override
-              public AccumT compact(K key, AccumT accumulator) {
-                return keyedFn.compact(key, accumulator);
-              }
-              @Override
-              public OutputT extractOutput(K key, AccumT accumulator) {
-                return keyedFn.extractOutput(key, accumulator);
-              }
-              @Override
-              public Coder<OutputT> getDefaultOutputCoder(
-                  CoderRegistry registry,
-                  Coder<K> keyCoder,
-                  Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder)
-                  throws CannotProvideCoderException {
-                return keyedFn.getDefaultOutputCoder(
-                    registry, keyCoder, inputCoder.getValueCoder());
-              }
-
-              @Override
-              public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-                  Coder<InputOrAccum<InputT, AccumT>> inputCoder)
-                      throws CannotProvideCoderException {
-                return accumCoder;
-              }
-            };
-      } else {
-        final KeyedCombineFnWithContext<K, InputT, AccumT, OutputT> keyedFnWithContext =
-            (KeyedCombineFnWithContext<K, InputT, AccumT, OutputT>) typedFn;
-        hotPreCombine =
-            new KeyedCombineFnWithContext<KV<K, Integer>, InputT, AccumT, AccumT>() {
-              @Override
-              public AccumT createAccumulator(KV<K, Integer> key, Context c) {
-                return keyedFnWithContext.createAccumulator(key.getKey(), c);
-              }
-
-              @Override
-              public AccumT addInput(
-                  KV<K, Integer> key, AccumT accumulator, InputT value, Context c) {
-                return keyedFnWithContext.addInput(key.getKey(), accumulator, value, c);
-              }
-
-              @Override
-              public AccumT mergeAccumulators(
-                  KV<K, Integer> key, Iterable<AccumT> accumulators, Context c) {
-                return keyedFnWithContext.mergeAccumulators(key.getKey(), accumulators, c);
-              }
-
-              @Override
-              public AccumT compact(KV<K, Integer> key, AccumT accumulator, Context c) {
-                return keyedFnWithContext.compact(key.getKey(), accumulator, c);
-              }
-
-              @Override
-              public AccumT extractOutput(KV<K, Integer> key, AccumT accumulator, Context c) {
-                return accumulator;
-              }
-
-              @Override
-              @SuppressWarnings("unchecked")
-              public Coder<AccumT> getAccumulatorCoder(
-                  CoderRegistry registry, Coder<KV<K, Integer>> keyCoder, Coder<InputT> inputCoder)
-                  throws CannotProvideCoderException {
-                return accumCoder;
-              }
-            };
-        postCombine =
-            new KeyedCombineFnWithContext<K, InputOrAccum<InputT, AccumT>, AccumT, OutputT>() {
-              @Override
-              public AccumT createAccumulator(K key, Context c) {
-                return keyedFnWithContext.createAccumulator(key, c);
-              }
-              @Override
-              public AccumT addInput(
-                  K key, AccumT accumulator, InputOrAccum<InputT, AccumT> value, Context c) {
-                if (value.accum == null) {
-                  return keyedFnWithContext.addInput(key, accumulator, value.input, c);
-                } else {
-                  return keyedFnWithContext.mergeAccumulators(
-                      key, ImmutableList.of(accumulator, value.accum), c);
-                }
-              }
-              @Override
-              public AccumT mergeAccumulators(K key, Iterable<AccumT> accumulators, Context c) {
-                return keyedFnWithContext.mergeAccumulators(key, accumulators, c);
-              }
-              @Override
-              public AccumT compact(K key, AccumT accumulator, Context c) {
-                return keyedFnWithContext.compact(key, accumulator, c);
-              }
-              @Override
-              public OutputT extractOutput(K key, AccumT accumulator, Context c) {
-                return keyedFnWithContext.extractOutput(key, accumulator, c);
-              }
-              @Override
-              public Coder<OutputT> getDefaultOutputCoder(
-                  CoderRegistry registry,
-                  Coder<K> keyCoder,
-                  Coder<InputOrAccum<InputT, AccumT>> accumulatorCoder)
-                  throws CannotProvideCoderException {
-                return keyedFnWithContext.getDefaultOutputCoder(
-                    registry, keyCoder, inputCoder.getValueCoder());
-              }
-
-              @Override
-              public Coder<AccumT> getAccumulatorCoder(CoderRegistry registry, Coder<K> keyCoder,
-                  Coder<InputOrAccum<InputT, AccumT>> inputCoder)
-                  throws CannotProvideCoderException {
-                return accumCoder;
-              }
-            };
-      }
-
-      // Use the provided hotKeyFanout fn to split into "hot" and "cold" keys,
-      // augmenting the hot keys with a nonce.
-      final TupleTag<KV<KV<K, Integer>, InputT>> hot = new TupleTag<>();
-      final TupleTag<KV<K, InputT>> cold = new TupleTag<>();
-      PCollectionTuple split = input.apply(
-          ParDo.named("AddNonce").of(
-              new DoFn<KV<K, InputT>, KV<K, InputT>>() {
-                transient int counter;
-                @Override
-                public void startBundle(Context c) {
-                  counter = ThreadLocalRandom.current().nextInt(
-                      Integer.MAX_VALUE);
-                }
-
-                @Override
-                public void processElement(ProcessContext c) {
-                  KV<K, InputT> kv = c.element();
-                  int spread = Math.max(1, hotKeyFanout.apply(kv.getKey()));
-                  if (spread <= 1) {
-                    c.output(kv);
-                  } else {
-                    int nonce = counter++ % spread;
-                    c.sideOutput(hot, KV.of(KV.of(kv.getKey(), nonce), kv.getValue()));
-                  }
-                }
-              })
-          .withOutputTags(cold, TupleTagList.of(hot)));
-
-      // The first level of combine should never use accumulating mode.
-      WindowingStrategy<?, ?> preCombineStrategy = input.getWindowingStrategy();
-      if (preCombineStrategy.getMode()
-          == WindowingStrategy.AccumulationMode.ACCUMULATING_FIRED_PANES) {
-        preCombineStrategy = preCombineStrategy.withMode(
-            WindowingStrategy.AccumulationMode.DISCARDING_FIRED_PANES);
-      }
-
-      // Combine the hot and cold keys separately.
-      PCollection<KV<K, InputOrAccum<InputT, AccumT>>> precombinedHot = split
-          .get(hot)
-          .setCoder(KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()),
-                               inputCoder.getValueCoder()))
-          .setWindowingStrategyInternal(preCombineStrategy)
-          .apply("PreCombineHot", Combine.perKey(hotPreCombine))
-          .apply(ParDo.named("StripNonce").of(
-              new DoFn<KV<KV<K, Integer>, AccumT>,
-                       KV<K, InputOrAccum<InputT, AccumT>>>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  c.output(KV.of(
-                      c.element().getKey().getKey(),
-                      InputOrAccum.<InputT, AccumT>accum(c.element().getValue())));
-                }
-              }))
-          .setCoder(KvCoder.of(inputCoder.getKeyCoder(), inputOrAccumCoder))
-          .apply(Window.<KV<K, InputOrAccum<InputT, AccumT>>>remerge())
-          .setWindowingStrategyInternal(input.getWindowingStrategy());
-      PCollection<KV<K, InputOrAccum<InputT, AccumT>>> preprocessedCold = split
-          .get(cold)
-          .setCoder(inputCoder)
-          .apply(ParDo.named("PrepareCold").of(
-              new DoFn<KV<K, InputT>, KV<K, InputOrAccum<InputT, AccumT>>>() {
-                @Override
-                public void processElement(ProcessContext c) {
-                  c.output(KV.of(c.element().getKey(),
-                                 InputOrAccum.<InputT, AccumT>input(c.element().getValue())));
-                }
-              }))
-          .setCoder(KvCoder.of(inputCoder.getKeyCoder(), inputOrAccumCoder));
-
-      // Combine the union of the pre-processed hot and cold key results.
-      return PCollectionList.of(precombinedHot).and(preprocessedCold)
-          .apply(Flatten.<KV<K, InputOrAccum<InputT, AccumT>>>pCollections())
-          .apply("PostCombine", Combine.perKey(postCombine));
-    }
-
-    /**
-     * Used to store either an input or accumulator value, for flattening
-     * the hot and cold key paths.
-     */
-    private static class InputOrAccum<InputT, AccumT> {
-      public final InputT input;
-      public final AccumT accum;
-
-      private InputOrAccum(InputT input, AccumT aggr) {
-        this.input = input;
-        this.accum = aggr;
-      }
-
-      public static <InputT, AccumT> InputOrAccum<InputT, AccumT> input(InputT input) {
-        return new InputOrAccum<InputT, AccumT>(input, null);
-      }
-
-      public static <InputT, AccumT> InputOrAccum<InputT, AccumT> accum(AccumT aggr) {
-        return new InputOrAccum<InputT, AccumT>(null, aggr);
-      }
-
-      private static class InputOrAccumCoder<InputT, AccumT>
-          extends StandardCoder<InputOrAccum<InputT, AccumT>> {
-
-        private final Coder<InputT> inputCoder;
-        private final Coder<AccumT> accumCoder;
-
-        public InputOrAccumCoder(Coder<InputT> inputCoder, Coder<AccumT> accumCoder) {
-          this.inputCoder = inputCoder;
-          this.accumCoder = accumCoder;
-        }
-
-        @JsonCreator
-        @SuppressWarnings({"rawtypes", "unchecked"})
-        public static <InputT, AccumT> InputOrAccumCoder<InputT, AccumT> of(
-            @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-            List<Coder<?>> elementCoders) {
-          return new InputOrAccumCoder(elementCoders.get(0), elementCoders.get(1));
-        }
-
-        @Override
-        public void encode(
-            InputOrAccum<InputT, AccumT> value, OutputStream outStream, Coder.Context context)
-            throws CoderException, IOException {
-          if (value.input != null) {
-            outStream.write(0);
-            inputCoder.encode(value.input, outStream, context);
-          } else {
-            outStream.write(1);
-            accumCoder.encode(value.accum, outStream, context);
-          }
-        }
-
-        @Override
-        public InputOrAccum<InputT, AccumT> decode(InputStream inStream, Coder.Context context)
-            throws CoderException, IOException {
-          if (inStream.read() == 0) {
-            return InputOrAccum.<InputT, AccumT>input(inputCoder.decode(inStream, context));
-          } else {
-            return InputOrAccum.<InputT, AccumT>accum(accumCoder.decode(inStream, context));
-          }
-        }
-
-        @Override
-        public List<? extends Coder<?>> getCoderArguments() {
-          return ImmutableList.of(inputCoder, accumCoder);
-        }
-
-        @Override
-        public void verifyDeterministic() throws Coder.NonDeterministicException {
-          inputCoder.verifyDeterministic();
-          accumCoder.verifyDeterministic();
-        }
-      }
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * {@code GroupedValues<K, InputT, OutputT>} takes a
-   * {@code PCollection<KV<K, Iterable<InputT>>>}, such as the result of
-   * {@link GroupByKey}, applies a specified
-   * {@link KeyedCombineFn KeyedCombineFn&lt;K, InputT, AccumT, OutputT&gt;}
-   * to each of the input {@code KV<K, Iterable<InputT>>} elements to
-   * produce a combined output {@code KV<K, OutputT>} element, and returns a
-   * {@code PCollection<KV<K, OutputT>>} containing all the combined output
-   * elements.  It is common for {@code InputT == OutputT}, but not required.
-   * Common combining functions include sums, mins, maxes, and averages
-   * of numbers, conjunctions and disjunctions of booleans, statistical
-   * aggregations, etc.
-   *
-   * <p>Example of use:
-   * <pre> {@code
-   * PCollection<KV<String, Integer>> pc = ...;
-   * PCollection<KV<String, Iterable<Integer>>> groupedByKey = pc.apply(
-   *     new GroupByKey<String, Integer>());
-   * PCollection<KV<String, Integer>> sumByKey = groupedByKey.apply(
-   *     Combine.<String, Integer>groupedValues(
-   *         new Sum.SumIntegerFn()));
-   * } </pre>
-   *
-   * <p>See also {@link #perKey}/{@link PerKey Combine.PerKey}, which
-   * captures the common pattern of "combining by key" in a
-   * single easy-to-use {@code PTransform}.
-   *
-   * <p>Combining for different keys can happen in parallel.  Moreover,
-   * combining of the {@code Iterable<InputT>} values associated a single
-   * key can happen in parallel, with different subsets of the values
-   * being combined separately, and their intermediate results combined
-   * further, in an arbitrary tree reduction pattern, until a single
-   * result value is produced for each key.
-   *
-   * <p>By default, the {@code Coder} of the keys of the output
-   * {@code PCollection<KV<K, OutputT>>} is that of the keys of the input
-   * {@code PCollection<KV<K, InputT>>}, and the {@code Coder} of the values
-   * of the output {@code PCollection<KV<K, OutputT>>} is inferred from the
-   * concrete type of the {@code KeyedCombineFn<K, InputT, AccumT, OutputT>}'s output
-   * type {@code OutputT}.
-   *
-   * <p>Each output element has the same timestamp and is in the same window
-   * as its corresponding input element, and the output
-   * {@code PCollection} has the same
-   * {@link com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn}
-   * associated with it as the input.
-   *
-   * <p>See also {@link #globally}/{@link Globally Combine.Globally}, which
-   * combines all the values in a {@code PCollection} into a
-   * single value in a {@code PCollection}.
-   *
-   * @param <K> type of input and output keys
-   * @param <InputT> type of input values
-   * @param <OutputT> type of output values
-   */
-  public static class GroupedValues<K, InputT, OutputT>
-      extends PTransform
-                        <PCollection<? extends KV<K, ? extends Iterable<InputT>>>,
-                         PCollection<KV<K, OutputT>>> {
-
-    private final PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn;
-    private final List<PCollectionView<?>> sideInputs;
-
-    private GroupedValues(PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn) {
-      this.fn = SerializableUtils.clone(fn);
-      this.sideInputs = ImmutableList.<PCollectionView<?>>of();
-    }
-
-    private GroupedValues(
-        PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> fn,
-        List<PCollectionView<?>> sideInputs) {
-      this.fn = SerializableUtils.clone(fn);
-      this.sideInputs = sideInputs;
-    }
-
-    public GroupedValues<K, InputT, OutputT> withSideInputs(
-        Iterable<? extends PCollectionView<?>> sideInputs) {
-      return new GroupedValues<>(fn, ImmutableList.<PCollectionView<?>>copyOf(sideInputs));
-    }
-
-    /**
-     * Returns the KeyedCombineFn used by this Combine operation.
-     */
-    public PerKeyCombineFn<? super K, ? super InputT, ?, OutputT> getFn() {
-      return fn;
-    }
-
-    public List<PCollectionView<?>> getSideInputs() {
-      return sideInputs;
-    }
-
-    @Override
-    public PCollection<KV<K, OutputT>> apply(
-        PCollection<? extends KV<K, ? extends Iterable<InputT>>> input) {
-
-      final PerKeyCombineFnRunner<? super K, ? super InputT, ?, OutputT> combineFnRunner =
-          PerKeyCombineFnRunners.create(fn);
-      PCollection<KV<K, OutputT>> output = input.apply(ParDo.of(
-          new DoFn<KV<K, ? extends Iterable<InputT>>, KV<K, OutputT>>() {
-            @Override
-            public void processElement(ProcessContext c) {
-              K key = c.element().getKey();
-
-              c.output(KV.of(key, combineFnRunner.apply(key, c.element().getValue(), c)));
-            }
-          }).withSideInputs(sideInputs));
-
-      try {
-        Coder<KV<K, OutputT>> outputCoder = getDefaultOutputCoder(input);
-        output.setCoder(outputCoder);
-      } catch (CannotProvideCoderException exc) {
-        // let coder inference happen later, if it can
-      }
-
-      return output;
-    }
-
-    /**
-     * Returns the {@link CombineFn} bound to its coders.
-     *
-     * <p>For internal use.
-     */
-    public AppliedCombineFn<? super K, ? super InputT, ?, OutputT> getAppliedFn(
-        CoderRegistry registry, Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder,
-        WindowingStrategy<?, ?> windowingStrategy) {
-      KvCoder<K, InputT> kvCoder = getKvCoder(inputCoder);
-      return AppliedCombineFn.withInputCoder(
-          fn, registry, kvCoder, sideInputs, windowingStrategy);
-    }
-
-    private KvCoder<K, InputT> getKvCoder(
-        Coder<? extends KV<K, ? extends Iterable<InputT>>> inputCoder) {
-      if (!(inputCoder instanceof KvCoder)) {
-        throw new IllegalStateException(
-            "Combine.GroupedValues requires its input to use KvCoder");
-      }
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      KvCoder<K, ? extends Iterable<InputT>> kvCoder = (KvCoder) inputCoder;
-      Coder<K> keyCoder = kvCoder.getKeyCoder();
-      Coder<? extends Iterable<InputT>> kvValueCoder = kvCoder.getValueCoder();
-      if (!(kvValueCoder instanceof IterableCoder)) {
-        throw new IllegalStateException(
-            "Combine.GroupedValues requires its input values to use "
-            + "IterableCoder");
-      }
-      @SuppressWarnings("unchecked")
-      IterableCoder<InputT> inputValuesCoder = (IterableCoder<InputT>) kvValueCoder;
-      Coder<InputT> inputValueCoder = inputValuesCoder.getElemCoder();
-      return KvCoder.of(keyCoder, inputValueCoder);
-    }
-
-    @Override
-    public Coder<KV<K, OutputT>> getDefaultOutputCoder(
-        PCollection<? extends KV<K, ? extends Iterable<InputT>>> input)
-        throws CannotProvideCoderException {
-      KvCoder<K, InputT> kvCoder = getKvCoder(input.getCoder());
-      @SuppressWarnings("unchecked")
-      Coder<OutputT> outputValueCoder =
-          ((PerKeyCombineFn<K, InputT, ?, OutputT>) fn)
-          .getDefaultOutputCoder(
-              input.getPipeline().getCoderRegistry(),
-              kvCoder.getKeyCoder(), kvCoder.getValueCoder());
-      return KvCoder.of(kvCoder.getKeyCoder(), outputValueCoder);
-    }
-  }
-}

[07/67] [partial] incubator-beam git commit: Directory reorganization

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java
deleted file mode 100644
index 3d80230..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedInputStream.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.common.base.MoreObjects;
-
-import java.io.FilterInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * A {@link OutputStream} wrapper which protects against the user attempting to modify
- * the underlying stream by closing it or using mark.
- */
-public class UnownedInputStream extends FilterInputStream {
-  public UnownedInputStream(InputStream delegate) {
-    super(delegate);
-  }
-
-  @Override
-  public void close() throws IOException {
-    throw new UnsupportedOperationException("Caller does not own the underlying input stream "
-        + " and should not call close().");
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    return obj instanceof UnownedInputStream
-        && ((UnownedInputStream) obj).in.equals(in);
-  }
-
-  @Override
-  public int hashCode() {
-    return in.hashCode();
-  }
-
-  @SuppressWarnings("UnsynchronizedOverridesSynchronized")
-  @Override
-  public void mark(int readlimit) {
-    throw new UnsupportedOperationException("Caller does not own the underlying input stream "
-        + " and should not call mark().");
-  }
-
-  @Override
-  public boolean markSupported() {
-    return false;
-  }
-
-  @SuppressWarnings("UnsynchronizedOverridesSynchronized")
-  @Override
-  public void reset() throws IOException {
-    throw new UnsupportedOperationException("Caller does not own the underlying input stream "
-        + " and should not call reset().");
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(UnownedInputStream.class).add("in", in).toString();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java
deleted file mode 100644
index 29187a1..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UnownedOutputStream.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.common.base.MoreObjects;
-
-import java.io.FilterOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-
-/**
- * A {@link OutputStream} wrapper which protects against the user attempting to modify
- * the underlying stream by closing it.
- */
-public class UnownedOutputStream extends FilterOutputStream {
-  public UnownedOutputStream(OutputStream delegate) {
-    super(delegate);
-  }
-
-  @Override
-  public void close() throws IOException {
-    throw new UnsupportedOperationException("Caller does not own the underlying output stream "
-        + " and should not call close().");
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    return obj instanceof UnownedOutputStream
-        && ((UnownedOutputStream) obj).out.equals(out);
-  }
-
-  @Override
-  public int hashCode() {
-    return out.hashCode();
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(UnownedOutputStream.class).add("out", out).toString();
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
deleted file mode 100644
index da597e6..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UploadIdResponseInterceptor.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.api.client.http.GenericUrl;
-import com.google.api.client.http.HttpResponse;
-import com.google.api.client.http.HttpResponseInterceptor;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-
-/**
- * Implements a response intercepter that logs the upload id if the upload
- * id header exists and it is the first request (does not have upload_id parameter in the request).
- * Only logs if debug level is enabled.
- */
-public class UploadIdResponseInterceptor implements HttpResponseInterceptor {
-
-  private static final Logger LOG = LoggerFactory.getLogger(UploadIdResponseInterceptor.class);
-  private static final String UPLOAD_ID_PARAM = "upload_id";
-  private static final String UPLOAD_TYPE_PARAM = "uploadType";
-  private static final String UPLOAD_HEADER = "X-GUploader-UploadID";
-
-  @Override
-  public void interceptResponse(HttpResponse response) throws IOException {
-    if (!LOG.isDebugEnabled()) {
-      return;
-    }
-    String uploadId = response.getHeaders().getFirstHeaderStringValue(UPLOAD_HEADER);
-    if (uploadId == null) {
-      return;
-    }
-
-    GenericUrl url = response.getRequest().getUrl();
-    // The check for no upload id limits the output to one log line per upload.
-    // The check for upload type makes sure this is an upload and not a read.
-    if (url.get(UPLOAD_ID_PARAM) == null && url.get(UPLOAD_TYPE_PARAM) != null) {
-      LOG.debug(
-          "Upload ID for url {} on worker {} is {}",
-          url,
-          System.getProperty("worker_id"),
-          uploadId);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
deleted file mode 100644
index 9b9c7a5..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/UserCodeException.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.util.Arrays;
-import java.util.Objects;
-
-/**
- * An exception that was thrown in user-code. Sets the stack trace
- * from the first time execution enters user code down through the
- * rest of the user's stack frames until the exception is
- * reached.
- */
-public class UserCodeException extends RuntimeException {
-
-  public static UserCodeException wrap(Throwable t) {
-    if (t instanceof UserCodeException) {
-      return (UserCodeException) t;
-    }
-
-    return new UserCodeException(t);
-  }
-
-  public static RuntimeException wrapIf(boolean condition, Throwable t) {
-    if (condition) {
-      return wrap(t);
-    }
-
-    if (t instanceof RuntimeException) {
-      return (RuntimeException) t;
-    }
-
-    return new RuntimeException(t);
-  }
-
-  private UserCodeException(Throwable t) {
-    super(t);
-    truncateStackTrace(t);
-  }
-
-  /**
-   * Truncates the @{Throwable}'s stack trace to contain only user code,
-   * removing all frames below.
-   *
-   * <p>This is to remove infrastructure noise below user code entry point. We do this
-   * by finding common stack frames between the throwable's captured stack and that
-   * of the current thread.
-   */
-  private void truncateStackTrace(Throwable t) {
-
-    StackTraceElement[] currentStack = Thread.currentThread().getStackTrace();
-    StackTraceElement[] throwableStack = t.getStackTrace();
-
-    int currentStackSize = currentStack.length;
-    int throwableStackSize = throwableStack.length;
-
-    int commonFrames = 0;
-    while (framesEqual(currentStack[currentStackSize - commonFrames - 1],
-        throwableStack[throwableStackSize - commonFrames - 1])) {
-      commonFrames++;
-      if (commonFrames >= Math.min(currentStackSize, throwableStackSize)) {
-        break;
-      }
-    }
-
-    StackTraceElement[] truncatedStack = Arrays.copyOfRange(throwableStack, 0,
-        throwableStackSize - commonFrames);
-    t.setStackTrace(truncatedStack);
-  }
-
-  /**
-   * Check if two frames are equal; Frames are considered equal if they point to the same method.
-   */
-  private boolean framesEqual(StackTraceElement frame1, StackTraceElement frame2) {
-    boolean areEqual = Objects.equals(frame1.getClassName(), frame2.getClassName());
-    areEqual &= Objects.equals(frame1.getMethodName(), frame2.getMethodName());
-
-    return areEqual;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
deleted file mode 100644
index ac1f2eb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/ValueWithRecordId.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.ByteArrayCoder;
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.transforms.DoFn;
-import com.google.cloud.dataflow.sdk.transforms.PTransform;
-import com.google.cloud.dataflow.sdk.transforms.ParDo;
-import com.google.cloud.dataflow.sdk.values.PCollection;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * Immutable struct containing a value as well as a unique id identifying the value.
- *
- * @param <ValueT> the underlying value type
- */
-public class ValueWithRecordId<ValueT> {
-  private final ValueT value;
-  private final byte[] id;
-
-  public ValueWithRecordId(ValueT value, byte[] id) {
-    this.value = value;
-    this.id = id;
-  }
-
-  public ValueT getValue() {
-    return value;
-  }
-
-  public byte[] getId() {
-    return id;
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(this)
-        .add("id", id)
-        .add("value", value)
-        .toString();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) {
-      return true;
-    }
-    if (!(other instanceof ValueWithRecordId)) {
-      return false;
-    }
-    ValueWithRecordId<?> otherRecord = (ValueWithRecordId<?>) other;
-    return Objects.deepEquals(id, otherRecord.id)
-        && Objects.deepEquals(value, otherRecord.value);
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(Arrays.hashCode(id), value);
-  }
-
-  /**
-   * A {@link Coder} for {@code ValueWithRecordId}, using a wrapped value {@code Coder}.
-   */
-  public static class ValueWithRecordIdCoder<ValueT>
-      extends StandardCoder<ValueWithRecordId<ValueT>> {
-    public static <ValueT> ValueWithRecordIdCoder<ValueT> of(Coder<ValueT> valueCoder) {
-      return new ValueWithRecordIdCoder<>(valueCoder);
-    }
-
-    @JsonCreator
-    public static <ValueT> ValueWithRecordIdCoder<ValueT> of(
-         @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<ValueT>> components) {
-      Preconditions.checkArgument(components.size() == 1,
-          "Expecting 1 component, got " + components.size());
-      return of(components.get(0));
-    }
-
-    protected ValueWithRecordIdCoder(Coder<ValueT> valueCoder) {
-      this.valueCoder = valueCoder;
-      this.idCoder = ByteArrayCoder.of();
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.asList(valueCoder);
-    }
-
-    @Override
-    public void encode(ValueWithRecordId<ValueT> value, OutputStream outStream, Context context)
-        throws IOException {
-      valueCoder.encode(value.value, outStream, context.nested());
-      idCoder.encode(value.id, outStream, context);
-    }
-
-    @Override
-    public ValueWithRecordId<ValueT> decode(InputStream inStream, Context context)
-        throws IOException {
-      return new ValueWithRecordId<ValueT>(
-          valueCoder.decode(inStream, context.nested()),
-          idCoder.decode(inStream, context));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      valueCoder.verifyDeterministic();
-    }
-
-    public Coder<ValueT> getValueCoder() {
-      return valueCoder;
-    }
-
-    Coder<ValueT> valueCoder;
-    ByteArrayCoder idCoder;
-  }
-
-  public static <T>
-      PTransform<PCollection<? extends ValueWithRecordId<T>>, PCollection<T>> stripIds() {
-    return ParDo.named("StripIds")
-        .of(
-            new DoFn<ValueWithRecordId<T>, T>() {
-              @Override
-              public void processElement(ProcessContext c) {
-                c.output(c.element().getValue());
-              }
-            });
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
deleted file mode 100644
index d4440e7..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Values.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.util.Map;
-
-import javax.annotation.Nullable;
-
-/**
- * A collection of static methods for manipulating value representations
- * transfered via the Dataflow API.
- */
-public final class Values {
-  private Values() {}  // Non-instantiable
-
-  public static Boolean asBoolean(Object value) throws ClassCastException {
-    @Nullable Boolean knownResult = checkKnownValue(CloudKnownType.BOOLEAN, value, Boolean.class);
-    if (knownResult != null) {
-      return knownResult;
-    }
-    return Boolean.class.cast(value);
-  }
-
-  public static Double asDouble(Object value) throws ClassCastException {
-    @Nullable Double knownResult = checkKnownValue(CloudKnownType.FLOAT, value, Double.class);
-    if (knownResult != null) {
-      return knownResult;
-    }
-    if (value instanceof Double) {
-      return (Double) value;
-    }
-    return ((Float) value).doubleValue();
-  }
-
-  public static Long asLong(Object value) throws ClassCastException {
-    @Nullable Long knownResult = checkKnownValue(CloudKnownType.INTEGER, value, Long.class);
-    if (knownResult != null) {
-      return knownResult;
-    }
-    if (value instanceof Long) {
-      return (Long) value;
-    }
-    return ((Integer) value).longValue();
-  }
-
-  public static String asString(Object value) throws ClassCastException {
-    @Nullable String knownResult = checkKnownValue(CloudKnownType.TEXT, value, String.class);
-    if (knownResult != null) {
-      return knownResult;
-    }
-    return String.class.cast(value);
-  }
-
-  @Nullable
-  private static <T> T checkKnownValue(CloudKnownType type, Object value, Class<T> clazz) {
-    if (!(value instanceof Map)) {
-      return null;
-    }
-    Map<String, Object> map = (Map<String, Object>) value;
-    @Nullable String typeName = (String) map.get(PropertyNames.OBJECT_TYPE_NAME);
-    if (typeName == null) {
-      return null;
-    }
-    @Nullable CloudKnownType knownType = CloudKnownType.forUri(typeName);
-    if (knownType == null || knownType != type) {
-      return null;
-    }
-    @Nullable Object scalar = map.get(PropertyNames.SCALAR_FIELD_NAME);
-    if (scalar == null) {
-      return null;
-    }
-    return knownType.parse(scalar, clazz);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
deleted file mode 100644
index af03911..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/VarInt.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-/**
- * Variable-length encoding for integers.
- *
- * <p>Handles, in a common encoding format, signed bytes, shorts, ints, and longs.
- * Takes between 1 and 10 bytes.
- * Less efficient than BigEndian{Int,Long} coder for negative or large numbers.
- * All negative ints are encoded using 5 bytes, longs take 10 bytes.
- */
-public class VarInt {
-
-  private static long convertIntToLongNoSignExtend(int v) {
-    return v & 0xFFFFFFFFL;
-  }
-
-  /**
-   * Encodes the given value onto the stream.
-   */
-  public static void encode(int v, OutputStream stream) throws IOException {
-    encode(convertIntToLongNoSignExtend(v), stream);
-  }
-
-  /**
-   * Encodes the given value onto the stream.
-   */
-  public static void encode(long v, OutputStream stream) throws IOException {
-    do {
-      // Encode next 7 bits + terminator bit
-      long bits = v & 0x7F;
-      v >>>= 7;
-      byte b = (byte) (bits | ((v != 0) ? 0x80 : 0));
-      stream.write(b);
-    } while (v != 0);
-  }
-
-  /**
-   * Decodes an integer value from the given stream.
-   */
-  public static int decodeInt(InputStream stream) throws IOException {
-    long r = decodeLong(stream);
-    if (r < 0 || r >= 1L << 32) {
-      throw new IOException("varint overflow " + r);
-    }
-    return (int) r;
-  }
-
-  /**
-   * Decodes a long value from the given stream.
-   */
-  public static long decodeLong(InputStream stream) throws IOException {
-    long result = 0;
-    int shift = 0;
-    int b;
-    do {
-      // Get 7 bits from next byte
-      b = stream.read();
-      if (b < 0) {
-        if (shift == 0) {
-          throw new EOFException();
-        } else {
-          throw new IOException("varint not terminated");
-        }
-      }
-      long bits = b & 0x7F;
-      if (shift >= 64 || (shift == 63 && bits > 1)) {
-        // Out of range
-        throw new IOException("varint too long");
-      }
-      result |= bits << shift;
-      shift += 7;
-    } while ((b & 0x80) != 0);
-    return result;
-  }
-
-  /**
-   * Returns the length of the encoding of the given value (in bytes).
-   */
-  public static int getLength(int v) {
-    return getLength(convertIntToLongNoSignExtend(v));
-  }
-
-  /**
-   * Returns the length of the encoding of the given value (in bytes).
-   */
-  public static int getLength(long v) {
-    int result = 0;
-    do {
-      result++;
-      v >>>= 7;
-    } while (v != 0);
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
deleted file mode 100644
index d537ddb..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WatermarkHold.java
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFns;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
-import com.google.cloud.dataflow.sdk.util.state.MergingStateAccessor;
-import com.google.cloud.dataflow.sdk.util.state.ReadableState;
-import com.google.cloud.dataflow.sdk.util.state.StateMerging;
-import com.google.cloud.dataflow.sdk.util.state.StateTag;
-import com.google.cloud.dataflow.sdk.util.state.StateTags;
-import com.google.cloud.dataflow.sdk.util.state.WatermarkHoldState;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-import java.io.Serializable;
-
-import javax.annotation.Nullable;
-
-/**
- * Implements the logic to hold the output watermark for a computation back
- * until it has seen all the elements it needs based on the input watermark for the
- * computation.
- *
- * <p>The backend ensures the output watermark can never progress beyond the
- * input watermark for a computation. GroupAlsoByWindows computations may add a 'hold'
- * to the output watermark in order to prevent it progressing beyond a time within a window.
- * The hold will be 'cleared' when the associated pane is emitted.
- *
- * <p>This class is only intended for use by {@link ReduceFnRunner}. The two evolve together and
- * will likely break any other uses.
- *
- * @param <W> The kind of {@link BoundedWindow} the hold is for.
- */
-class WatermarkHold<W extends BoundedWindow> implements Serializable {
-  /**
-   * Return tag for state containing the output watermark hold
-   * used for elements.
-   */
-  public static <W extends BoundedWindow>
-      StateTag<Object, WatermarkHoldState<W>> watermarkHoldTagForOutputTimeFn(
-          OutputTimeFn<? super W> outputTimeFn) {
-    return StateTags.<Object, WatermarkHoldState<W>>makeSystemTagInternal(
-        StateTags.<W>watermarkStateInternal("hold", outputTimeFn));
-  }
-
-  /**
-   * Tag for state containing end-of-window and garbage collection output watermark holds.
-   * (We can't piggy-back on the data hold state since the outputTimeFn may be
-   * {@link OutputTimeFns#outputAtLatestInputTimestamp()}, in which case every pane will
-   * would take the end-of-window time as its element time.)
-   */
-  @VisibleForTesting
-  public static final StateTag<Object, WatermarkHoldState<BoundedWindow>> EXTRA_HOLD_TAG =
-      StateTags.makeSystemTagInternal(StateTags.watermarkStateInternal(
-          "extra", OutputTimeFns.outputAtEarliestInputTimestamp()));
-
-  private final TimerInternals timerInternals;
-  private final WindowingStrategy<?, W> windowingStrategy;
-  private final StateTag<Object, WatermarkHoldState<W>> elementHoldTag;
-
-  public WatermarkHold(TimerInternals timerInternals, WindowingStrategy<?, W> windowingStrategy) {
-    this.timerInternals = timerInternals;
-    this.windowingStrategy = windowingStrategy;
-    this.elementHoldTag = watermarkHoldTagForOutputTimeFn(windowingStrategy.getOutputTimeFn());
-  }
-
-  /**
-   * Add a hold to prevent the output watermark progressing beyond the (possibly adjusted) timestamp
-   * of the element in {@code context}. We allow the actual hold time to be shifted later by
-   * {@link OutputTimeFn#assignOutputTime}, but no further than the end of the window. The hold will
-   * remain until cleared by {@link #extractAndRelease}. Return the timestamp at which the hold
-   * was placed, or {@literal null} if no hold was placed.
-   *
-   * <p>In the following we'll write {@code E} to represent an element's timestamp after passing
-   * through the window strategy's output time function, {@code IWM} for the local input watermark,
-   * {@code OWM} for the local output watermark, and {@code GCWM} for the garbage collection
-   * watermark (which is at {@code IWM - getAllowedLateness}). Time progresses from left to right,
-   * and we write {@code [ ... ]} to denote a bounded window with implied lower bound.
-   *
-   * <p>Note that the GCWM will be the same as the IWM if {@code getAllowedLateness}
-   * is {@code ZERO}.
-   *
-   * <p>Here are the cases we need to handle. They are conceptually considered in the
-   * sequence written since if getAllowedLateness is ZERO the GCWM is the same as the IWM.
-   * <ol>
-   * <li>(Normal)
-   * <pre>
-   *          |
-   *      [   | E        ]
-   *          |
-   *         IWM
-   * </pre>
-   * This is, hopefully, the common and happy case. The element is locally on-time and can
-   * definitely make it to an {@code ON_TIME} pane which we can still set an end-of-window timer
-   * for. We place an element hold at E, which may contribute to the {@code ON_TIME} pane's
-   * timestamp (depending on the output time function). Thus the OWM will not proceed past E
-   * until the next pane fires.
-   *
-   * <li>(Discard - no target window)
-   * <pre>
-   *                       |                            |
-   *      [     E        ] |                            |
-   *                       |                            |
-   *                     GCWM  <-getAllowedLateness->  IWM
-   * </pre>
-   * The element is very locally late. The window has been garbage collected, thus there
-   * is no target pane E could be assigned to. We discard E.
-   *
-   * <li>(Unobservably late)
-   * <pre>
-   *          |    |
-   *      [   | E  |     ]
-   *          |    |
-   *         OWM  IWM
-   * </pre>
-   * The element is locally late, however we can still treat this case as for 'Normal' above
-   * since the IWM has not yet passed the end of the window and the element is ahead of the
-   * OWM. In effect, we get to 'launder' the locally late element and consider it as locally
-   * on-time because no downstream computation can observe the difference.
-   *
-   * <li>(Maybe late 1)
-   * <pre>
-   *          |            |
-   *      [   | E        ] |
-   *          |            |
-   *         OWM          IWM
-   * </pre>
-   * The end-of-window timer may have already fired for this window, and thus an {@code ON_TIME}
-   * pane may have already been emitted. However, if timer firings have been delayed then it
-   * is possible the {@code ON_TIME} pane has not yet been emitted. We can't place an element
-   * hold since we can't be sure if it will be cleared promptly. Thus this element *may* find
-   * its way into an {@code ON_TIME} pane, but if so it will *not* contribute to that pane's
-   * timestamp. We may however set a garbage collection hold if required.
-   *
-   * <li>(Maybe late 2)
-   * <pre>
-   *               |   |
-   *      [     E  |   | ]
-   *               |   |
-   *              OWM IWM
-   * </pre>
-   * The end-of-window timer has not yet fired, so this element may still appear in an
-   * {@code ON_TIME} pane. However the element is too late to contribute to the output
-   * watermark hold, and thus won't contribute to the pane's timestamp. We can still place an
-   * end-of-window hold.
-   *
-   * <li>(Maybe late 3)
-   * <pre>
-   *               |       |
-   *      [     E  |     ] |
-   *               |       |
-   *              OWM     IWM
-   * </pre>
-   * As for the (Maybe late 2) case, however we don't even know if the end-of-window timer
-   * has already fired, or it is about to fire. We can place only the garbage collection hold,
-   * if required.
-   *
-   * <li>(Definitely late)
-   * <pre>
-   *                       |   |
-   *      [     E        ] |   |
-   *                       |   |
-   *                      OWM IWM
-   * </pre>
-   * The element is definitely too late to make an {@code ON_TIME} pane. We are too late to
-   * place an end-of-window hold. We can still place a garbage collection hold if required.
-   *
-   * </ol>
-   */
-  @Nullable
-  public Instant addHolds(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
-    Instant hold = addElementHold(context);
-    if (hold == null) {
-      hold = addEndOfWindowOrGarbageCollectionHolds(context);
-    }
-    return hold;
-  }
-
-  /**
-   * Return {@code timestamp}, possibly shifted forward in time according to the window
-   * strategy's output time function.
-   */
-  private Instant shift(Instant timestamp, W window) {
-    Instant shifted = windowingStrategy.getOutputTimeFn().assignOutputTime(timestamp, window);
-    if (shifted.isBefore(timestamp)) {
-      throw new IllegalStateException(
-          String.format("OutputTimeFn moved element from %s to earlier time %s for window %s",
-              timestamp, shifted, window));
-    }
-    if (!timestamp.isAfter(window.maxTimestamp()) && shifted.isAfter(window.maxTimestamp())) {
-      throw new IllegalStateException(
-          String.format("OutputTimeFn moved element from %s to %s which is beyond end of window %s",
-              timestamp, shifted, window));
-    }
-
-    return shifted;
-  }
-
-  /**
-   * Add an element hold if possible. Return instant at which hold was added, or {@literal null}
-   * if no hold was added.
-   */
-  @Nullable
-  private Instant addElementHold(ReduceFn<?, ?, ?, W>.ProcessValueContext context) {
-    // Give the window function a chance to move the hold timestamp forward to encourage progress.
-    // (A later hold implies less impediment to the output watermark making progress, which in
-    // turn encourages end-of-window triggers to fire earlier in following computations.)
-    Instant elementHold = shift(context.timestamp(), context.window());
-
-    Instant outputWM = timerInternals.currentOutputWatermarkTime();
-    Instant inputWM = timerInternals.currentInputWatermarkTime();
-
-    // Only add the hold if we can be sure:
-    // - the backend will be able to respect it
-    // (ie the hold is at or ahead of the output watermark), AND
-    // - a timer will be set to clear it by the end of window
-    // (ie the end of window is at or ahead of the input watermark).
-    String which;
-    boolean tooLate;
-    // TODO: These case labels could be tightened.
-    // See the case analysis in addHolds above for the motivation.
-    if (outputWM != null && elementHold.isBefore(outputWM)) {
-      which = "too late to effect output watermark";
-      tooLate = true;
-    } else if (inputWM != null && context.window().maxTimestamp().isBefore(inputWM)) {
-      which = "too late for end-of-window timer";
-      tooLate = true;
-    } else {
-      which = "on time";
-      tooLate = false;
-      context.state().access(elementHoldTag).add(elementHold);
-    }
-    WindowTracing.trace(
-        "WatermarkHold.addHolds: element hold at {} is {} for "
-        + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        elementHold, which, context.key(), context.window(), inputWM,
-        outputWM);
-
-    return tooLate ? null : elementHold;
-  }
-
-  /**
-   * Add an end-of-window hold or, if too late for that, a garbage collection hold (if required).
-   * Return the {@link Instant} at which hold was added, or {@literal null} if no hold was added.
-   *
-   * <p>The end-of-window hold guarantees that an empty {@code ON_TIME} pane can be given
-   * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
-   */
-  @Nullable
-  private Instant addEndOfWindowOrGarbageCollectionHolds(ReduceFn<?, ?, ?, W>.Context context) {
-    Instant hold = addEndOfWindowHold(context);
-    if (hold == null) {
-      hold = addGarbageCollectionHold(context);
-    }
-    return hold;
-  }
-
-  /**
-   * Add an end-of-window hold. Return the {@link Instant} at which hold was added,
-   * or {@literal null} if no hold was added.
-   *
-   * <p>The end-of-window hold guarantees that any empty {@code ON_TIME} pane can be given
-   * a timestamp which will not be considered beyond allowed lateness by any downstream computation.
-   */
-  @Nullable
-  private Instant addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context context) {
-    // Only add an end-of-window hold if we can be sure a timer will be set to clear it
-    // by the end of window (ie the end of window is at or ahead of the input watermark).
-    Instant outputWM = timerInternals.currentOutputWatermarkTime();
-    Instant inputWM = timerInternals.currentInputWatermarkTime();
-    String which;
-    boolean tooLate;
-    Instant eowHold = context.window().maxTimestamp();
-    if (inputWM != null && eowHold.isBefore(inputWM)) {
-      which = "too late for end-of-window timer";
-      tooLate = true;
-    } else {
-      which = "on time";
-      tooLate = false;
-      Preconditions.checkState(outputWM == null || !eowHold.isBefore(outputWM),
-          "End-of-window hold %s cannot be before output watermark %s", eowHold, outputWM);
-      context.state().access(EXTRA_HOLD_TAG).add(eowHold);
-    }
-    WindowTracing.trace(
-        "WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is {} for "
-        + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        eowHold, which, context.key(), context.window(), inputWM,
-        outputWM);
-
-    return tooLate ? null : eowHold;
-  }
-
-  /**
-   * Add a garbage collection hold, if required. Return the {@link Instant} at which hold was added,
-   * or {@literal null} if no hold was added.
-   *
-   * <p>The garbage collection hold gurantees that any empty final pane can be given
-   * a timestamp which will not be considered beyond allowed lateness by any downstream
-   * computation. If we are sure no empty final panes can be emitted then there's no need
-   * for an additional hold.
-   */
-  @Nullable
-  private Instant addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context context) {
-    // Only add a garbage collection hold if we may need to emit an empty pane
-    // at garbage collection time, and garbage collection time is strictly after the
-    // end of window. (All non-empty panes will have holds at their output
-    // time derived from their incoming elements and no additional hold is required.)
-    if (context.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS
-        && windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO)) {
-      Instant gcHold = context.window().maxTimestamp().plus(windowingStrategy.getAllowedLateness());
-      Instant outputWM = timerInternals.currentOutputWatermarkTime();
-      Instant inputWM = timerInternals.currentInputWatermarkTime();
-      WindowTracing.trace(
-          "WatermarkHold.addGarbageCollectionHold: garbage collection at {} hold for "
-          + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-          gcHold, context.key(), context.window(), inputWM, outputWM);
-      Preconditions.checkState(inputWM == null || !gcHold.isBefore(inputWM),
-          "Garbage collection hold %s cannot be before input watermark %s", gcHold, inputWM);
-      context.state().access(EXTRA_HOLD_TAG).add(gcHold);
-      return gcHold;
-    } else {
-      return null;
-    }
-  }
-
-  /**
-   * Prefetch watermark holds in preparation for merging.
-   */
-  public void prefetchOnMerge(MergingStateAccessor<?, W> state) {
-    StateMerging.prefetchWatermarks(state, elementHoldTag);
-  }
-
-  /**
-   * Updates the watermark hold when windows merge if it is possible the merged value does
-   * not equal all of the existing holds. For example, if the new window implies a later
-   * watermark hold, then earlier holds may be released.
-   */
-  public void onMerge(ReduceFn<?, ?, ?, W>.OnMergeContext context) {
-    WindowTracing.debug("onMerge: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
-        timerInternals.currentOutputWatermarkTime());
-    StateMerging.mergeWatermarks(context.state(), elementHoldTag, context.window());
-    // If we had a cheap way to determine if we have an element hold then we could
-    // avoid adding an unnecessary end-of-window or garbage collection hold.
-    // Simply reading the above merged watermark would impose an additional read for the
-    // common case that the active window has just one undelying state address window and
-    // the hold depends on the min of the elemest timestamps.
-    StateMerging.clear(context.state(), EXTRA_HOLD_TAG);
-    addEndOfWindowOrGarbageCollectionHolds(context);
-  }
-
-  /**
-   * Return (a future for) the earliest hold for {@code context}. Clear all the holds after
-   * reading, but add/restore an end-of-window or garbage collection hold if required.
-   *
-   * <p>The returned timestamp is the output timestamp according to the {@link OutputTimeFn}
-   * from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
-   * elements in the current pane. If there is no such value the timestamp is the end
-   * of the window.
-   */
-  public ReadableState<Instant> extractAndRelease(
-      final ReduceFn<?, ?, ?, W>.Context context, final boolean isFinished) {
-    WindowTracing.debug(
-        "extractAndRelease: for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
-        timerInternals.currentOutputWatermarkTime());
-    final WatermarkHoldState<W> elementHoldState = context.state().access(elementHoldTag);
-    final WatermarkHoldState<BoundedWindow> extraHoldState = context.state().access(EXTRA_HOLD_TAG);
-    return new ReadableState<Instant>() {
-      @Override
-      public ReadableState<Instant> readLater() {
-        elementHoldState.readLater();
-        extraHoldState.readLater();
-        return this;
-      }
-
-      @Override
-      public Instant read() {
-        // Read both the element and extra holds.
-        Instant elementHold = elementHoldState.read();
-        Instant extraHold = extraHoldState.read();
-        Instant hold;
-        // Find the minimum, accounting for null.
-        if (elementHold == null) {
-          hold = extraHold;
-        } else if (extraHold == null) {
-          hold = elementHold;
-        } else if (elementHold.isBefore(extraHold)) {
-          hold = elementHold;
-        } else {
-          hold = extraHold;
-        }
-        if (hold == null || hold.isAfter(context.window().maxTimestamp())) {
-          // If no hold (eg because all elements came in behind the output watermark), or
-          // the hold was for garbage collection, take the end of window as the result.
-          WindowTracing.debug(
-              "WatermarkHold.extractAndRelease.read: clipping from {} to end of window "
-              + "for key:{}; window:{}",
-              hold, context.key(), context.window());
-          hold = context.window().maxTimestamp();
-        }
-        WindowTracing.debug("WatermarkHold.extractAndRelease.read: clearing for key:{}; window:{}",
-            context.key(), context.window());
-
-        // Clear the underlying state to allow the output watermark to progress.
-        elementHoldState.clear();
-        extraHoldState.clear();
-
-        if (!isFinished) {
-          // Only need to leave behind an end-of-window or garbage collection hold
-          // if future elements will be processed.
-          addEndOfWindowOrGarbageCollectionHolds(context);
-        }
-
-        return hold;
-      }
-    };
-  }
-
-  /**
-   * Clear any remaining holds.
-   */
-  public void clearHolds(ReduceFn<?, ?, ?, W>.Context context) {
-    WindowTracing.debug(
-        "WatermarkHold.clearHolds: For key:{}; window:{}; inputWatermark:{}; outputWatermark:{}",
-        context.key(), context.window(), timerInternals.currentInputWatermarkTime(),
-        timerInternals.currentOutputWatermarkTime());
-    context.state().access(elementHoldTag).clear();
-    context.state().access(EXTRA_HOLD_TAG).clear();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
deleted file mode 100644
index c31ad7f..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/Weighted.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * Interface representing an object that has a weight, in unspecified units.
- */
-public interface Weighted {
-  /**
-   * Returns the weight of the object.
-   */
-  long getWeight();
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
deleted file mode 100644
index 4a6e840..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WeightedValue.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-/**
- * A {@code T} with an accompanying weight. Units are unspecified.
- *
- * @param <T> the underlying type of object
- */
-public final class WeightedValue<T> implements Weighted {
-
-  private final T value;
-  private final long weight;
-
-  private WeightedValue(T value, long weight) {
-    this.value = value;
-    this.weight = weight;
-  }
-
-  public static <T> WeightedValue<T> of(T value, long weight) {
-    return new WeightedValue<>(value, weight);
-  }
-
-  public long getWeight() {
-    return weight;
-  }
-
-  public T getValue() {
-    return value;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java
deleted file mode 100644
index 6ae2f42..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowTracing.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Logging for window operations. Generally only feasible to enable on hand-picked pipelines.
- */
-public final class WindowTracing {
-  private static final Logger LOG = LoggerFactory.getLogger(WindowTracing.class);
-
-  public static void debug(String format, Object... args) {
-    LOG.debug(format, args);
-  }
-
-  @SuppressWarnings("unused")
-  public static void trace(String format, Object... args) {
-    LOG.trace(format, args);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
deleted file mode 100644
index 1e944e2..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowedValue.java
+++ /dev/null
@@ -1,720 +0,0 @@
-/*******************************************************************************
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- ******************************************************************************/
-
-package com.google.cloud.dataflow.sdk.util;
-
-import static com.google.cloud.dataflow.sdk.util.Structs.addBoolean;
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.coders.CoderException;
-import com.google.cloud.dataflow.sdk.coders.CollectionCoder;
-import com.google.cloud.dataflow.sdk.coders.InstantCoder;
-import com.google.cloud.dataflow.sdk.coders.StandardCoder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
-import com.google.cloud.dataflow.sdk.util.common.ElementByteSizeObserver;
-import com.google.common.base.MoreObjects;
-import com.google.common.base.Preconditions;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Objects;
-import java.util.Set;
-
-/**
- * An immutable triple of value, timestamp, and windows.
- *
- * @param <T> the type of the value
- */
-public abstract class WindowedValue<T> {
-
-  protected final T value;
-  protected final PaneInfo pane;
-
-  /**
-   * Returns a {@code WindowedValue} with the given value, timestamp,
-   * and windows.
-   */
-  public static <T> WindowedValue<T> of(
-      T value,
-      Instant timestamp,
-      Collection<? extends BoundedWindow> windows,
-      PaneInfo pane) {
-    Preconditions.checkNotNull(pane);
-
-    if (windows.size() == 0 && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
-      return valueInEmptyWindows(value, pane);
-    } else if (windows.size() == 1) {
-      return of(value, timestamp, windows.iterator().next(), pane);
-    } else {
-      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows, pane);
-    }
-  }
-
-  /**
-   * Returns a {@code WindowedValue} with the given value, timestamp, and window.
-   */
-  public static <T> WindowedValue<T> of(
-      T value,
-      Instant timestamp,
-      BoundedWindow window,
-      PaneInfo pane) {
-    Preconditions.checkNotNull(pane);
-
-    boolean isGlobal = GlobalWindow.INSTANCE.equals(window);
-    if (isGlobal && BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
-      return valueInGlobalWindow(value, pane);
-    } else if (isGlobal) {
-      return new TimestampedValueInGlobalWindow<>(value, timestamp, pane);
-    } else {
-      return new TimestampedValueInSingleWindow<>(value, timestamp, window, pane);
-    }
-  }
-
-  /**
-   * Returns a {@code WindowedValue} with the given value in the {@link GlobalWindow} using the
-   * default timestamp and pane.
-   */
-  public static <T> WindowedValue<T> valueInGlobalWindow(T value) {
-    return new ValueInGlobalWindow<>(value, PaneInfo.NO_FIRING);
-  }
-
-  /**
-   * Returns a {@code WindowedValue} with the given value in the {@link GlobalWindow} using the
-   * default timestamp and the specified pane.
-   */
-  public static <T> WindowedValue<T> valueInGlobalWindow(T value, PaneInfo pane) {
-    return new ValueInGlobalWindow<>(value, pane);
-  }
-
-  /**
-   * Returns a {@code WindowedValue} with the given value and timestamp,
-   * {@code GlobalWindow} and default pane.
-   */
-  public static <T> WindowedValue<T> timestampedValueInGlobalWindow(T value, Instant timestamp) {
-    if (BoundedWindow.TIMESTAMP_MIN_VALUE.equals(timestamp)) {
-      return valueInGlobalWindow(value);
-    } else {
-      return new TimestampedValueInGlobalWindow<>(value, timestamp, PaneInfo.NO_FIRING);
-    }
-  }
-
-  /**
-   * Returns a {@code WindowedValue} with the given value in no windows, and the default timestamp
-   * and pane.
-   */
-  public static <T> WindowedValue<T> valueInEmptyWindows(T value) {
-    return new ValueInEmptyWindows<T>(value, PaneInfo.NO_FIRING);
-  }
-
-  /**
-   * Returns a {@code WindowedValue} with the given value in no windows, and the default timestamp
-   * and the specified pane.
-   */
-  public static <T> WindowedValue<T> valueInEmptyWindows(T value, PaneInfo pane) {
-    return new ValueInEmptyWindows<T>(value, pane);
-  }
-
-  private WindowedValue(T value, PaneInfo pane) {
-    this.value = value;
-    this.pane = checkNotNull(pane);
-  }
-
-  /**
-   * Returns a new {@code WindowedValue} that is a copy of this one, but with a different value,
-   * which may have a new type {@code NewT}.
-   */
-  public abstract <NewT> WindowedValue<NewT> withValue(NewT value);
-
-  /**
-   * Returns the value of this {@code WindowedValue}.
-   */
-  public T getValue() {
-    return value;
-  }
-
-  /**
-   * Returns the timestamp of this {@code WindowedValue}.
-   */
-  public abstract Instant getTimestamp();
-
-  /**
-   * Returns the windows of this {@code WindowedValue}.
-   */
-  public abstract Collection<? extends BoundedWindow> getWindows();
-
-  /**
-   * Returns the pane of this {@code WindowedValue} in its window.
-   */
-  public PaneInfo getPane() {
-    return pane;
-  }
-
-  @Override
-  public abstract boolean equals(Object o);
-
-  @Override
-  public abstract int hashCode();
-
-  @Override
-  public abstract String toString();
-
-  private static final Collection<? extends BoundedWindow> GLOBAL_WINDOWS =
-      Collections.singletonList(GlobalWindow.INSTANCE);
-
-  /**
-   * The abstract superclass of WindowedValue representations where
-   * timestamp == MIN.
-   */
-  private abstract static class MinTimestampWindowedValue<T>
-      extends WindowedValue<T> {
-    public MinTimestampWindowedValue(T value, PaneInfo pane) {
-      super(value, pane);
-    }
-
-    @Override
-    public Instant getTimestamp() {
-      return BoundedWindow.TIMESTAMP_MIN_VALUE;
-    }
-  }
-
-  /**
-   * The representation of a WindowedValue where timestamp == MIN and
-   * windows == {GlobalWindow}.
-   */
-  private static class ValueInGlobalWindow<T>
-      extends MinTimestampWindowedValue<T> {
-    public ValueInGlobalWindow(T value, PaneInfo pane) {
-      super(value, pane);
-    }
-
-    @Override
-    public <NewT> WindowedValue<NewT> withValue(NewT value) {
-      return new ValueInGlobalWindow<>(value, pane);
-    }
-
-    @Override
-    public Collection<? extends BoundedWindow> getWindows() {
-      return GLOBAL_WINDOWS;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o instanceof ValueInGlobalWindow) {
-        ValueInGlobalWindow<?> that = (ValueInGlobalWindow<?>) o;
-        return Objects.equals(that.pane, this.pane)
-            && Objects.equals(that.value, this.value);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(value, pane);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("value", value)
-          .add("pane", pane)
-          .toString();
-    }
-  }
-
-  /**
-   * The representation of a WindowedValue where timestamp == MIN and
-   * windows == {}.
-   */
-  private static class ValueInEmptyWindows<T>
-      extends MinTimestampWindowedValue<T> {
-    public ValueInEmptyWindows(T value, PaneInfo pane) {
-      super(value, pane);
-    }
-
-    @Override
-    public <NewT> WindowedValue<NewT> withValue(NewT value) {
-      return new ValueInEmptyWindows<>(value, pane);
-    }
-
-    @Override
-    public Collection<? extends BoundedWindow> getWindows() {
-      return Collections.emptyList();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o instanceof ValueInEmptyWindows) {
-        ValueInEmptyWindows<?> that = (ValueInEmptyWindows<?>) o;
-        return Objects.equals(that.pane, this.pane)
-            && Objects.equals(that.value, this.value);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(value, pane);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("value", value)
-          .add("pane", pane)
-          .toString();
-    }
-  }
-
-  /**
-   * The abstract superclass of WindowedValue representations where
-   * timestamp is arbitrary.
-   */
-  private abstract static class TimestampedWindowedValue<T>
-      extends WindowedValue<T> {
-    protected final Instant timestamp;
-
-    public TimestampedWindowedValue(T value,
-                                    Instant timestamp,
-                                    PaneInfo pane) {
-      super(value, pane);
-      this.timestamp = checkNotNull(timestamp);
-    }
-
-    @Override
-    public Instant getTimestamp() {
-      return timestamp;
-    }
-  }
-
-  /**
-   * The representation of a WindowedValue where timestamp {@code >}
-   * MIN and windows == {GlobalWindow}.
-   */
-  private static class TimestampedValueInGlobalWindow<T>
-      extends TimestampedWindowedValue<T> {
-    public TimestampedValueInGlobalWindow(T value,
-                                          Instant timestamp,
-                                          PaneInfo pane) {
-      super(value, timestamp, pane);
-    }
-
-    @Override
-    public <NewT> WindowedValue<NewT> withValue(NewT value) {
-      return new TimestampedValueInGlobalWindow<>(value, timestamp, pane);
-    }
-
-    @Override
-    public Collection<? extends BoundedWindow> getWindows() {
-      return GLOBAL_WINDOWS;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o instanceof TimestampedValueInGlobalWindow) {
-        TimestampedValueInGlobalWindow<?> that =
-            (TimestampedValueInGlobalWindow<?>) o;
-        return this.timestamp.isEqual(that.timestamp) // don't compare chronology objects
-            && Objects.equals(that.pane, this.pane)
-            && Objects.equals(that.value, this.value);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(value, pane, timestamp.getMillis());
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("value", value)
-          .add("timestamp", timestamp)
-          .add("pane", pane)
-          .toString();
-    }
-  }
-
-  /**
-   * The representation of a WindowedValue where timestamp is arbitrary and
-   * windows == a single non-Global window.
-   */
-  private static class TimestampedValueInSingleWindow<T>
-      extends TimestampedWindowedValue<T> {
-    private final BoundedWindow window;
-
-    public TimestampedValueInSingleWindow(T value,
-                                          Instant timestamp,
-                                          BoundedWindow window,
-                                          PaneInfo pane) {
-      super(value, timestamp, pane);
-      this.window = checkNotNull(window);
-    }
-
-    @Override
-    public <NewT> WindowedValue<NewT> withValue(NewT value) {
-      return new TimestampedValueInSingleWindow<>(value, timestamp, window, pane);
-    }
-
-    @Override
-    public Collection<? extends BoundedWindow> getWindows() {
-      return Collections.singletonList(window);
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o instanceof TimestampedValueInSingleWindow) {
-        TimestampedValueInSingleWindow<?> that =
-            (TimestampedValueInSingleWindow<?>) o;
-        return Objects.equals(that.value, this.value)
-            && this.timestamp.isEqual(that.timestamp) // don't compare chronology objects
-            && Objects.equals(that.pane, this.pane)
-            && Objects.equals(that.window, this.window);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(value, timestamp.getMillis(), pane, window);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("value", value)
-          .add("timestamp", timestamp)
-          .add("window", window)
-          .add("pane", pane)
-          .toString();
-    }
-  }
-
-  /**
-   * The representation of a WindowedValue, excluding the special
-   * cases captured above.
-   */
-  private static class TimestampedValueInMultipleWindows<T>
-      extends TimestampedWindowedValue<T> {
-    private Collection<? extends BoundedWindow> windows;
-
-    public TimestampedValueInMultipleWindows(
-        T value,
-        Instant timestamp,
-        Collection<? extends BoundedWindow> windows,
-        PaneInfo pane) {
-      super(value, timestamp, pane);
-      this.windows = checkNotNull(windows);
-    }
-
-    @Override
-    public <NewT> WindowedValue<NewT> withValue(NewT value) {
-      return new TimestampedValueInMultipleWindows<>(value, timestamp, windows, pane);
-    }
-
-    @Override
-    public Collection<? extends BoundedWindow> getWindows() {
-      return windows;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o instanceof TimestampedValueInMultipleWindows) {
-        TimestampedValueInMultipleWindows<?> that =
-            (TimestampedValueInMultipleWindows<?>) o;
-        if (this.timestamp.isEqual(that.timestamp) // don't compare chronology objects
-            && Objects.equals(that.value, this.value)
-            && Objects.equals(that.pane, this.pane)) {
-          ensureWindowsAreASet();
-          that.ensureWindowsAreASet();
-          return that.windows.equals(this.windows);
-        }
-      }
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      ensureWindowsAreASet();
-      return Objects.hash(value, timestamp.getMillis(), pane, windows);
-    }
-
-    @Override
-    public String toString() {
-      return MoreObjects.toStringHelper(getClass())
-          .add("value", value)
-          .add("timestamp", timestamp)
-          .add("windows", windows)
-          .add("pane", pane)
-          .toString();
-    }
-
-    private void ensureWindowsAreASet() {
-      if (!(windows instanceof Set)) {
-        windows = new LinkedHashSet<>(windows);
-      }
-    }
-  }
-
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns the {@code Coder} to use for a {@code WindowedValue<T>},
-   * using the given valueCoder and windowCoder.
-   */
-  public static <T> FullWindowedValueCoder<T> getFullCoder(
-      Coder<T> valueCoder,
-      Coder<? extends BoundedWindow> windowCoder) {
-    return FullWindowedValueCoder.of(valueCoder, windowCoder);
-  }
-
-  /**
-   * Returns the {@code ValueOnlyCoder} from the given valueCoder.
-   */
-  public static <T> ValueOnlyWindowedValueCoder<T> getValueOnlyCoder(Coder<T> valueCoder) {
-    return ValueOnlyWindowedValueCoder.of(valueCoder);
-  }
-
-  /**
-   * Abstract class for {@code WindowedValue} coder.
-   */
-  public abstract static class WindowedValueCoder<T>
-      extends StandardCoder<WindowedValue<T>> {
-    final Coder<T> valueCoder;
-
-    WindowedValueCoder(Coder<T> valueCoder) {
-      this.valueCoder = checkNotNull(valueCoder);
-    }
-
-    /**
-     * Returns the value coder.
-     */
-    public Coder<T> getValueCoder() {
-      return valueCoder;
-    }
-
-    /**
-     * Returns a new {@code WindowedValueCoder} that is a copy of this one,
-     * but with a different value coder.
-     */
-    public abstract <NewT> WindowedValueCoder<NewT> withValueCoder(Coder<NewT> valueCoder);
-  }
-
-  /**
-   * Coder for {@code WindowedValue}.
-   */
-  public static class FullWindowedValueCoder<T> extends WindowedValueCoder<T> {
-    private final Coder<? extends BoundedWindow> windowCoder;
-    // Precompute and cache the coder for a list of windows.
-    private final Coder<Collection<? extends BoundedWindow>> windowsCoder;
-
-    public static <T> FullWindowedValueCoder<T> of(
-        Coder<T> valueCoder,
-        Coder<? extends BoundedWindow> windowCoder) {
-      return new FullWindowedValueCoder<>(valueCoder, windowCoder);
-    }
-
-    @JsonCreator
-    public static FullWindowedValueCoder<?> of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<?>> components) {
-      checkArgument(components.size() == 2,
-                    "Expecting 2 components, got " + components.size());
-      @SuppressWarnings("unchecked")
-      Coder<? extends BoundedWindow> window = (Coder<? extends BoundedWindow>) components.get(1);
-      return of(components.get(0), window);
-    }
-
-    FullWindowedValueCoder(Coder<T> valueCoder,
-                           Coder<? extends BoundedWindow> windowCoder) {
-      super(valueCoder);
-      this.windowCoder = checkNotNull(windowCoder);
-      // It's not possible to statically type-check correct use of the
-      // windowCoder (we have to ensure externally that we only get
-      // windows of the class handled by windowCoder), so type
-      // windowsCoder in a way that makes encode() and decode() work
-      // right, and cast the window type away here.
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Coder<Collection<? extends BoundedWindow>> collectionCoder =
-          (Coder) CollectionCoder.of(this.windowCoder);
-      this.windowsCoder = collectionCoder;
-    }
-
-    public Coder<? extends BoundedWindow> getWindowCoder() {
-      return windowCoder;
-    }
-
-    public Coder<Collection<? extends BoundedWindow>> getWindowsCoder() {
-      return windowsCoder;
-    }
-
-    @Override
-    public <NewT> WindowedValueCoder<NewT> withValueCoder(Coder<NewT> valueCoder) {
-      return new FullWindowedValueCoder<>(valueCoder, windowCoder);
-    }
-
-    @Override
-    public void encode(WindowedValue<T> windowedElem,
-                       OutputStream outStream,
-                       Context context)
-        throws CoderException, IOException {
-      Context nestedContext = context.nested();
-      valueCoder.encode(windowedElem.getValue(), outStream, nestedContext);
-      InstantCoder.of().encode(
-          windowedElem.getTimestamp(), outStream, nestedContext);
-      windowsCoder.encode(windowedElem.getWindows(), outStream, nestedContext);
-      PaneInfoCoder.INSTANCE.encode(windowedElem.getPane(), outStream, context);
-    }
-
-    @Override
-    public WindowedValue<T> decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      Context nestedContext = context.nested();
-      T value = valueCoder.decode(inStream, nestedContext);
-      Instant timestamp = InstantCoder.of().decode(inStream, nestedContext);
-      Collection<? extends BoundedWindow> windows =
-          windowsCoder.decode(inStream, nestedContext);
-      PaneInfo pane = PaneInfoCoder.INSTANCE.decode(inStream, nestedContext);
-      return WindowedValue.of(value, timestamp, windows, pane);
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "FullWindowedValueCoder requires a deterministic valueCoder",
-          valueCoder);
-      verifyDeterministic(
-          "FullWindowedValueCoder requires a deterministic windowCoder",
-          windowCoder);
-    }
-
-    @Override
-    public void registerByteSizeObserver(WindowedValue<T> value,
-                                         ElementByteSizeObserver observer,
-                                         Context context) throws Exception {
-      valueCoder.registerByteSizeObserver(value.getValue(), observer, context);
-      InstantCoder.of().registerByteSizeObserver(value.getTimestamp(), observer, context);
-      windowsCoder.registerByteSizeObserver(value.getWindows(), observer, context);
-    }
-
-    @Override
-    public CloudObject asCloudObject() {
-      CloudObject result = super.asCloudObject();
-      addBoolean(result, PropertyNames.IS_WRAPPER, true);
-      return result;
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return null;
-    }
-
-    @Override
-    public List<? extends Coder<?>> getComponents() {
-      return Arrays.<Coder<?>>asList(valueCoder, windowCoder);
-    }
-  }
-
-  /**
-   * Coder for {@code WindowedValue}.
-   *
-   * <p>A {@code ValueOnlyWindowedValueCoder} only encodes and decodes the value. It drops
-   * timestamp and windows for encoding, and uses defaults timestamp, and windows for decoding.
-   */
-  public static class ValueOnlyWindowedValueCoder<T> extends WindowedValueCoder<T> {
-    public static <T> ValueOnlyWindowedValueCoder<T> of(
-        Coder<T> valueCoder) {
-      return new ValueOnlyWindowedValueCoder<>(valueCoder);
-    }
-
-    @JsonCreator
-    public static ValueOnlyWindowedValueCoder<?> of(
-        @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-        List<Coder<?>> components) {
-      checkArgument(components.size() == 1, "Expecting 1 component, got " + components.size());
-      return of(components.get(0));
-    }
-
-    ValueOnlyWindowedValueCoder(Coder<T> valueCoder) {
-      super(valueCoder);
-    }
-
-    @Override
-    public <NewT> WindowedValueCoder<NewT> withValueCoder(Coder<NewT> valueCoder) {
-      return new ValueOnlyWindowedValueCoder<>(valueCoder);
-    }
-
-    @Override
-    public void encode(WindowedValue<T> windowedElem, OutputStream outStream, Context context)
-        throws CoderException, IOException {
-      valueCoder.encode(windowedElem.getValue(), outStream, context);
-    }
-
-    @Override
-    public WindowedValue<T> decode(InputStream inStream, Context context)
-        throws CoderException, IOException {
-      T value = valueCoder.decode(inStream, context);
-      return WindowedValue.valueInGlobalWindow(value);
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      verifyDeterministic(
-          "ValueOnlyWindowedValueCoder requires a deterministic valueCoder",
-          valueCoder);
-    }
-
-    @Override
-    public void registerByteSizeObserver(
-        WindowedValue<T> value, ElementByteSizeObserver observer, Context context)
-        throws Exception {
-      valueCoder.registerByteSizeObserver(value.getValue(), observer, context);
-    }
-
-    @Override
-    public CloudObject asCloudObject() {
-      CloudObject result = super.asCloudObject();
-      addBoolean(result, PropertyNames.IS_WRAPPER, true);
-      return result;
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Arrays.<Coder<?>>asList(valueCoder);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
deleted file mode 100644
index 12fcd53..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingInternals.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.coders.Coder;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
-import com.google.cloud.dataflow.sdk.util.state.StateInternals;
-import com.google.cloud.dataflow.sdk.values.PCollectionView;
-import com.google.cloud.dataflow.sdk.values.TupleTag;
-
-import org.joda.time.Instant;
-
-import java.io.IOException;
-import java.util.Collection;
-
-/**
- * Interface that may be required by some (internal) {@code DoFn}s to implement windowing. It should
- * not be necessary for general user code to interact with this at all.
- *
- * <p>This interface should be provided by runner implementors to support windowing on their runner.
- *
- * @param <InputT> input type
- * @param <OutputT> output type
- */
-public interface WindowingInternals<InputT, OutputT> {
-
-  /**
-   * Unsupported state internals. The key type is unknown. It is up to the user to use the
-   * correct type of key.
-   */
-  StateInternals<?> stateInternals();
-
-  /**
-   * Output the value at the specified timestamp in the listed windows.
-   */
-  void outputWindowedValue(OutputT output, Instant timestamp,
-      Collection<? extends BoundedWindow> windows, PaneInfo pane);
-
-  /**
-   * Return the timer manager provided by the underlying system, or null if Timers need
-   * to be emulated.
-   */
-  TimerInternals timerInternals();
-
-  /**
-   * Access the windows the element is being processed in without "exploding" it.
-   */
-  Collection<? extends BoundedWindow> windows();
-
-  /**
-   * Access the pane of the current window(s).
-   */
-  PaneInfo pane();
-
-  /**
-   * Write the given {@link PCollectionView} data to a location accessible by other workers.
-   */
-  <T> void writePCollectionViewData(
-      TupleTag<?> tag,
-      Iterable<WindowedValue<T>> data,
-      Coder<T> elemCoder) throws IOException;
-
-  /**
-   * Return the value of the side input for the window of a main input element.
-   */
-  <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow);
-}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/7bef2b7e/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
deleted file mode 100644
index c167b8c..0000000
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/util/WindowingStrategy.java
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright (C) 2015 Google Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may not
- * use this file except in compliance with the License. You may obtain a copy of
- * the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package com.google.cloud.dataflow.sdk.util;
-
-import com.google.cloud.dataflow.sdk.annotations.Experimental;
-import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
-import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
-import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Trigger;
-import com.google.cloud.dataflow.sdk.transforms.windowing.Window.ClosingBehavior;
-import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
-import com.google.common.base.MoreObjects;
-
-import org.joda.time.Duration;
-
-import java.io.Serializable;
-import java.util.Objects;
-
-/**
- * A {@code WindowingStrategy} describes the windowing behavior for a specific collection of values.
- * It has both a {@link WindowFn} describing how elements are assigned to windows and a
- * {@link Trigger} that controls when output is produced for each window.
- *
- * @param <T> type of elements being windowed
- * @param <W> {@link BoundedWindow} subclass used to represent the
- *            windows used by this {@code WindowingStrategy}
- */
-public class WindowingStrategy<T, W extends BoundedWindow> implements Serializable {
-
-  /**
-   * The accumulation modes that can be used with windowing.
-   */
-  public enum AccumulationMode {
-    DISCARDING_FIRED_PANES,
-    ACCUMULATING_FIRED_PANES;
-  }
-
-  private static final Duration DEFAULT_ALLOWED_LATENESS = Duration.ZERO;
-  private static final WindowingStrategy<Object, GlobalWindow> DEFAULT = of(new GlobalWindows());
-
-  private final WindowFn<T, W> windowFn;
-  private final OutputTimeFn<? super W> outputTimeFn;
-  private final ExecutableTrigger<W> trigger;
-  private final AccumulationMode mode;
-  private final Duration allowedLateness;
-  private final ClosingBehavior closingBehavior;
-  private final boolean triggerSpecified;
-  private final boolean modeSpecified;
-  private final boolean allowedLatenessSpecified;
-  private final boolean outputTimeFnSpecified;
-
-  private WindowingStrategy(
-      WindowFn<T, W> windowFn,
-      ExecutableTrigger<W> trigger, boolean triggerSpecified,
-      AccumulationMode mode, boolean modeSpecified,
-      Duration allowedLateness, boolean allowedLatenessSpecified,
-      OutputTimeFn<? super W> outputTimeFn, boolean outputTimeFnSpecified,
-      ClosingBehavior closingBehavior) {
-    this.windowFn = windowFn;
-    this.trigger = trigger;
-    this.triggerSpecified = triggerSpecified;
-    this.mode = mode;
-    this.modeSpecified = modeSpecified;
-    this.allowedLateness = allowedLateness;
-    this.allowedLatenessSpecified = allowedLatenessSpecified;
-    this.closingBehavior = closingBehavior;
-    this.outputTimeFn = outputTimeFn;
-    this.outputTimeFnSpecified = outputTimeFnSpecified;
-  }
-
-  /**
-   * Return a fully specified, default windowing strategy.
-   */
-  public static WindowingStrategy<Object, GlobalWindow> globalDefault() {
-    return DEFAULT;
-  }
-
-  public static <T, W extends BoundedWindow> WindowingStrategy<T, W> of(
-      WindowFn<T, W> windowFn) {
-    return new WindowingStrategy<>(windowFn,
-        ExecutableTrigger.create(DefaultTrigger.<W>of()), false,
-        AccumulationMode.DISCARDING_FIRED_PANES, false,
-        DEFAULT_ALLOWED_LATENESS, false,
-        windowFn.getOutputTimeFn(), false,
-        ClosingBehavior.FIRE_IF_NON_EMPTY);
-  }
-
-  public WindowFn<T, W> getWindowFn() {
-    return windowFn;
-  }
-
-  public ExecutableTrigger<W> getTrigger() {
-    return trigger;
-  }
-
-  public boolean isTriggerSpecified() {
-    return triggerSpecified;
-  }
-
-  public Duration getAllowedLateness() {
-    return allowedLateness;
-  }
-
-  public boolean isAllowedLatenessSpecified() {
-    return allowedLatenessSpecified;
-  }
-
-  public AccumulationMode getMode() {
-    return mode;
-  }
-
-  public boolean isModeSpecified() {
-    return modeSpecified;
-  }
-
-  public ClosingBehavior getClosingBehavior() {
-    return closingBehavior;
-  }
-
-  public OutputTimeFn<? super W> getOutputTimeFn() {
-    return outputTimeFn;
-  }
-
-  public boolean isOutputTimeFnSpecified() {
-    return outputTimeFnSpecified;
-  }
-
-  /**
-   * Returns a {@link WindowingStrategy} identical to {@code this} but with the trigger set to
-   * {@code wildcardTrigger}.
-   */
-  public WindowingStrategy<T, W> withTrigger(Trigger<?> wildcardTrigger) {
-    @SuppressWarnings("unchecked")
-    Trigger<W> typedTrigger = (Trigger<W>) wildcardTrigger;
-    return new WindowingStrategy<T, W>(
-        windowFn,
-        ExecutableTrigger.create(typedTrigger), true,
-        mode, modeSpecified,
-        allowedLateness, allowedLatenessSpecified,
-        outputTimeFn, outputTimeFnSpecified,
-        closingBehavior);
-  }
-
-  /**
-   * Returns a {@link WindowingStrategy} identical to {@code this} but with the accumulation mode
-   * set to {@code mode}.
-   */
-  public WindowingStrategy<T, W> withMode(AccumulationMode mode) {
-    return new WindowingStrategy<T, W>(
-        windowFn,
-        trigger, triggerSpecified,
-        mode, true,
-        allowedLateness, allowedLatenessSpecified,
-        outputTimeFn, outputTimeFnSpecified,
-        closingBehavior);
-  }
-
-  /**
-   * Returns a {@link WindowingStrategy} identical to {@code this} but with the window function
-   * set to {@code wildcardWindowFn}.
-   */
-  public WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) {
-    @SuppressWarnings("unchecked")
-    WindowFn<T, W> typedWindowFn = (WindowFn<T, W>) wildcardWindowFn;
-
-    // The onus of type correctness falls on the callee.
-    @SuppressWarnings("unchecked")
-    OutputTimeFn<? super W> newOutputTimeFn = (OutputTimeFn<? super W>)
-        (outputTimeFnSpecified ? outputTimeFn : typedWindowFn.getOutputTimeFn());
-
-    return new WindowingStrategy<T, W>(
-        typedWindowFn,
-        trigger, triggerSpecified,
-        mode, modeSpecified,
-        allowedLateness, allowedLatenessSpecified,
-        newOutputTimeFn, outputTimeFnSpecified,
-        closingBehavior);
-  }
-
-  /**
-   * Returns a {@link WindowingStrategy} identical to {@code this} but with the allowed lateness
-   * set to {@code allowedLateness}.
-   */
-  public WindowingStrategy<T, W> withAllowedLateness(Duration allowedLateness) {
-    return new WindowingStrategy<T, W>(
-        windowFn,
-        trigger, triggerSpecified,
-        mode, modeSpecified,
-        allowedLateness, true,
-        outputTimeFn, outputTimeFnSpecified,
-        closingBehavior);
-  }
-
-  public WindowingStrategy<T, W> withClosingBehavior(ClosingBehavior closingBehavior) {
-    return new WindowingStrategy<T, W>(
-        windowFn,
-        trigger, triggerSpecified,
-        mode, modeSpecified,
-        allowedLateness, allowedLatenessSpecified,
-        outputTimeFn, outputTimeFnSpecified,
-        closingBehavior);
-  }
-
-  @Experimental(Experimental.Kind.OUTPUT_TIME)
-  public WindowingStrategy<T, W> withOutputTimeFn(OutputTimeFn<?> outputTimeFn) {
-
-    @SuppressWarnings("unchecked")
-    OutputTimeFn<? super W> typedOutputTimeFn = (OutputTimeFn<? super W>) outputTimeFn;
-
-    return new WindowingStrategy<T, W>(
-        windowFn,
-        trigger, triggerSpecified,
-        mode, modeSpecified,
-        allowedLateness, allowedLatenessSpecified,
-        typedOutputTimeFn, true,
-        closingBehavior);
-  }
-
-  @Override
-  public String toString() {
-    return MoreObjects.toStringHelper(this)
-        .add("windowFn", windowFn)
-        .add("allowedLateness", allowedLateness)
-        .add("trigger", trigger)
-        .add("accumulationMode", mode)
-        .add("outputTimeFn", outputTimeFn)
-        .toString();
-  }
-
-  @Override
-  public boolean equals(Object object) {
-    if (!(object instanceof WindowingStrategy)) {
-      return false;
-    }
-    WindowingStrategy<?, ?> other = (WindowingStrategy<?, ?>) object;
-    return
-        isTriggerSpecified() == other.isTriggerSpecified()
-        && isAllowedLatenessSpecified() == other.isAllowedLatenessSpecified()
-        && isModeSpecified() == other.isModeSpecified()
-        && getMode().equals(other.getMode())
-        && getAllowedLateness().equals(other.getAllowedLateness())
-        && getClosingBehavior().equals(other.getClosingBehavior())
-        && getTrigger().equals(other.getTrigger())
-        && getWindowFn().equals(other.getWindowFn());
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(triggerSpecified, allowedLatenessSpecified, modeSpecified,
-        windowFn, trigger, mode, allowedLateness, closingBehavior);
-  }
-}