You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2017/03/27 20:17:02 UTC

[1/2] beam git commit: [BEAM-1778] Second clean up pass of dataflow references/URLs in Java SDK

Repository: beam
Updated Branches:
  refs/heads/master 1ea3b35ba -> ced1e5c3a


[BEAM-1778] Second clean up pass of dataflow references/URLs in Java SDK


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/07a50b48
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/07a50b48
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/07a50b48

Branch: refs/heads/master
Commit: 07a50b48af9c176732ff172e3d612052d4e15386
Parents: 87c8ef0
Author: melissa <me...@google.com>
Authored: Thu Mar 23 16:49:41 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Mar 27 13:15:48 2017 -0700

----------------------------------------------------------------------
 .../main/java/org/apache/beam/sdk/io/AvroSource.java |  2 +-
 .../java/org/apache/beam/sdk/io/BoundedSource.java   |  4 +---
 .../main/java/org/apache/beam/sdk/io/PubsubIO.java   |  6 ------
 .../org/apache/beam/sdk/io/PubsubUnboundedSink.java  |  2 --
 .../apache/beam/sdk/io/PubsubUnboundedSource.java    |  2 --
 .../main/java/org/apache/beam/sdk/io/TFRecordIO.java |  6 +++---
 .../main/java/org/apache/beam/sdk/io/XmlSink.java    |  4 ++--
 .../org/apache/beam/sdk/io/range/ByteKeyRange.java   |  4 +---
 .../java/org/apache/beam/sdk/options/GcpOptions.java |  2 +-
 .../beam/sdk/testing/SerializableMatchers.java       |  2 +-
 .../org/apache/beam/sdk/testing/StreamingIT.java     |  2 +-
 .../java/org/apache/beam/sdk/util/CoderUtils.java    | 15 +++++++--------
 .../beam/sdk/coders/protobuf/ProtobufUtilTest.java   |  1 -
 .../apache/beam/sdk/runners/PipelineRunnerTest.java  |  2 +-
 .../io/gcp/bigquery/BigQueryTableRowIterator.java    |  3 +--
 .../beam/sdk/io/gcp/datastore/DatastoreV1.java       |  2 +-
 .../org/apache/beam/sdk/io/hdfs/HDFSFileSink.java    |  3 +--
 .../java/org/apache/beam/sdk/io/jdbc/JdbcIO.java     |  4 ----
 .../java/org/apache/beam/sdk/io/kafka/KafkaIO.java   |  4 +---
 .../resources/META-INF/maven/archetype-metadata.xml  |  2 +-
 .../resources/META-INF/maven/archetype-metadata.xml  |  2 +-
 .../resources/META-INF/maven/archetype-metadata.xml  |  2 +-
 22 files changed, 26 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
index fe3ac5c..0c52dea 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
@@ -109,7 +109,7 @@ import org.apache.commons.compress.utils.CountingInputStream;
  * than the end offset of the source.
  *
  * <p>To use XZ-encoded Avro files, please include an explicit dependency on {@code xz-1.5.jar},
- * which has been marked as optional in the Maven {@code sdk/pom.xml} for Google Cloud Dataflow:
+ * which has been marked as optional in the Maven {@code sdk/pom.xml}.
  *
  * <pre>{@code
  * <dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
index 8e5145c..8538e7f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
@@ -104,9 +104,7 @@ public abstract class BoundedSource<T> extends Source<T> {
    *
    * <p>Sources which support dynamic work rebalancing should use
    * {@link org.apache.beam.sdk.io.range.RangeTracker} to manage the (source-specific)
-   * range of positions that is being split. If your source supports dynamic work rebalancing,
-   * please use that class to implement it if possible; if not possible, please contact the team
-   * at <i>dataflow-feedback@google.com</i>.
+   * range of positions that is being split.
    */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   public abstract static class BoundedReader<T> extends Source.Reader<T> {

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
index 806b7da..c1ad353 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
@@ -839,9 +839,6 @@ public class PubsubIO {
      * <p>TODO: Consider replacing with BoundedReadFromUnboundedSource on top
      * of PubsubUnboundedSource.
      *
-     * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow
-     * service in streaming mode.
-     *
      * <p>Public so can be suppressed by runners.
      */
     public class PubsubBoundedReader extends DoFn<Void, T> {
@@ -1132,9 +1129,6 @@ public class PubsubIO {
     /**
      * Writer to Pubsub which batches messages from bounded collections.
      *
-     * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow
-     * service in streaming mode.
-     *
      * <p>Public so can be suppressed by runners.
      */
     public class PubsubBoundedWriter extends DoFn<T, Void> {

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
index f41b5b7..55605b3 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
@@ -87,8 +87,6 @@ import org.joda.time.Duration;
  * <li>A failed bundle will cause messages to be resent. Thus we rely on the Pubsub consumer
  * to dedup messages.
  * </ul>
- *
- * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow service.
  */
 public class PubsubUnboundedSink<T> extends PTransform<PCollection<T>, PDone> {
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
index 90bcc76..1184968 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
@@ -106,8 +106,6 @@ import org.slf4j.LoggerFactory;
  * are blocking. We rely on the underlying runner to allow multiple
  * {@link UnboundedSource.UnboundedReader} instances to execute concurrently and thus hide latency.
  * </ul>
- *
- * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow service.
  */
 public class PubsubUnboundedSource<T> extends PTransform<PBegin, PCollection<T>> {
   private static final Logger LOG = LoggerFactory.getLogger(PubsubUnboundedSource.class);

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
index 243506c..0552236 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
@@ -68,8 +68,8 @@ public class TFRecordIO {
      * Returns a transform for reading TFRecord files that reads from the file(s)
      * with the given filename or filename pattern. This can be a local path (if running locally),
      * or a Google Cloud Storage filename or filename pattern of the form
-     * {@code "gs://<bucket>/<filepath>"} (if running locally or via the Google Cloud Dataflow
-     * service). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
+     * {@code "gs://<bucket>/<filepath>"} (if running locally or using remote
+     * execution). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
      * >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
      */
     public static Bound from(String filepattern) {
@@ -284,7 +284,7 @@ public class TFRecordIO {
      * with the given prefix. This can be a local filename
      * (if running locally), or a Google Cloud Storage filename of
      * the form {@code "gs://<bucket>/<filepath>"}
-     * (if running locally or via the Google Cloud Dataflow service).
+     * (if running locally or using remote execution).
      *
      * <p>The files written will begin with this prefix, followed by
      * a shard identifier (see {@link TFRecordIO.Write.Bound#withNumShards(int)}, and end

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
index 0f25aea..6937e93 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
@@ -146,7 +146,7 @@ public class XmlSink {
    * Returns an XmlSink that writes objects as XML entities.
    *
    * <p>Output files will have the name {@literal {baseOutputFilename}-0000i-of-0000n.xml} where n
-   * is the number of output bundles that the Dataflow service divides the output into.
+   * is the number of output bundles.
    *
    * @param klass the class of the elements to write.
    * @param rootElementName the enclosing root element.
@@ -183,7 +183,7 @@ public class XmlSink {
      * Returns an XmlSink that writes to files with the given prefix.
      *
      * <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is
-     * the number of output bundles that the Dataflow service divides the output into.
+     * the number of output bundles.
      */
     public Bound<T> toFilenamePrefix(String baseOutputFilename) {
       return new Bound<>(classToBind, rootElementName, baseOutputFilename);

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
index 0212e8a..d5b2919 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
@@ -48,9 +48,7 @@ import org.slf4j.LoggerFactory;
  *
  * <p>The primary role of {@link ByteKeyRange} is to provide functionality for
  * {@link #estimateFractionForKey(ByteKey)}, {@link #interpolateKey(double)}, and
- * {@link #split(int)}, which are used for Google Cloud Dataflow's
- * <a href="https://cloud.google.com/dataflow/service/dataflow-service-desc#AutoScaling">Autoscaling
- * and Dynamic Work Rebalancing</a> features.
+ * {@link #split(int)}.
  *
  * <p>{@link ByteKeyRange} implements these features by treating a {@link ByteKey}'s underlying
  * {@code byte[]} as the binary expansion of floating point numbers in the range {@code [0.0, 1.0]}.

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
index c04e4f0..d01406f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
@@ -55,7 +55,7 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
   /**
    * Project id to use when launching jobs.
    */
-  @Description("Project id. Required when running a Dataflow in the cloud. "
+  @Description("Project id. Required when using Google Cloud Platform services. "
       + "See https://cloud.google.com/storage/docs/projects for further details.")
   @Default.InstanceFactory(DefaultProjectFactory.class)
   String getProject();

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
index 7f72805..ade146d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
@@ -41,7 +41,7 @@ import org.hamcrest.Matchers;
  * documentation there. Values retained by a {@link SerializableMatcher} are required to be
  * serializable, either via Java serialization or via a provided {@link Coder}.
  *
- * <p>The following matchers are novel to Dataflow:
+ * <p>The following matchers are novel to Apache Beam:
  * <ul>
  * <li>{@link #kvWithKey} for matching just the key of a {@link KV}.
  * <li>{@link #kvWithValue} for matching just the value of a {@link KV}.

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
index 4922d83..427b908 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
@@ -18,7 +18,7 @@
 package org.apache.beam.sdk.testing;
 
 /**
- * Category tag used to mark tests which execute using the Dataflow runner
+ * Category tag used to mark tests which execute
  * in streaming mode. Example usage:
  * <pre><code>
  *    {@literal @}Test

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
index 5d03574..6d97868 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
@@ -229,17 +229,16 @@ public final class CoderUtils {
 
   /**
    * A {@link com.fasterxml.jackson.databind.Module} that adds the type
-   * resolver needed for Coder definitions created by the Dataflow service.
+   * resolver needed for Coder definitions.
    */
   static final class Jackson2Module extends SimpleModule {
     /**
      * The Coder custom type resolver.
      *
-     * <p>This resolver resolves coders.  If the Coder ID is a particular
-     * well-known identifier supplied by the Dataflow service, it's replaced
-     * with the corresponding class.  All other Coder instances are resolved
-     * by class name, using the package org.apache.beam.sdk.coders
-     * if there are no "."s in the ID.
+     * <p>This resolver resolves coders. If the Coder ID is a particular
+     * well-known identifier, it's replaced with the corresponding class.
+     * All other Coder instances are resolved by class name, using the package
+     * org.apache.beam.sdk.coders if there are no "."s in the ID.
      */
     private static final class Resolver extends TypeIdResolverBase {
       @SuppressWarnings("unused") // Used via @JsonTypeIdResolver annotation on Mixin
@@ -307,14 +306,14 @@ public final class CoderUtils {
      * {@link ObjectMapper}.
      *
      * <p>This is done via a mixin so that this resolver is <i>only</i> used
-     * during deserialization requested by the Dataflow SDK.
+     * during deserialization requested by the Apache Beam SDK.
      */
     @JsonTypeIdResolver(Resolver.class)
     @JsonTypeInfo(use = Id.CUSTOM, include = As.PROPERTY, property = PropertyNames.OBJECT_TYPE_NAME)
     private static final class Mixin {}
 
     public Jackson2Module() {
-      super("DataflowCoders");
+      super("BeamCoders");
       setMixInAnnotation(Coder.class, Mixin.class);
     }
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
index 9736824..1408048 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
@@ -118,7 +118,6 @@ public class ProtobufUtilTest {
 
   @Test
   public void testVerifyProto2() {
-    // Everything in Dataflow's Proto2TestMessages uses Proto2 syntax.
     checkProto2Syntax(MessageA.class, ExtensionRegistry.getEmptyRegistry());
     checkProto2Syntax(MessageB.class, ExtensionRegistry.getEmptyRegistry());
     checkProto2Syntax(MessageC.class, ExtensionRegistry.getEmptyRegistry());

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
index e980497..71ef311 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
@@ -36,7 +36,7 @@ import org.mockito.Mock;
 import org.mockito.MockitoAnnotations;
 
 /**
- * Tests for DataflowRunner.
+ * Tests for PipelineRunner.
  */
 @RunWith(JUnit4.class)
 public class PipelineRunnerTest {

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
index 5edc78c..59f2bb6 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
@@ -139,8 +139,7 @@ class BigQueryTableRowIterator implements AutoCloseable {
     while (true) {
       if (iteratorOverCurrentBatch != null && iteratorOverCurrentBatch.hasNext()) {
         // Embed schema information into the raw row, so that values have an
-        // associated key.  This matches how rows are read when using the
-        // DataflowRunner.
+        // associated key.
         current = getTypedTableRow(schema.getFields(), iteratorOverCurrentBatch.next());
         return true;
       }

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
index 400860f..73ac8df 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
@@ -132,7 +132,7 @@ import org.slf4j.LoggerFactory;
  *         .withQuery(query));
  * } </pre>
  *
- * <p><b>Note:</b> Normally, a Cloud Dataflow job will read from Cloud Datastore in parallel across
+ * <p><b>Note:</b> A runner may read from Cloud Datastore in parallel across
  * many workers. However, when the {@link Query} is configured with a limit using
  * {@link com.google.datastore.v1.Query.Builder#setLimit(Int32Value)} or if the Query contains
  * inequality filters like {@code GREATER_THAN, LESS_THAN} etc., then all returned results

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
index 0118249..9b085ca 100644
--- a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
+++ b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
@@ -408,8 +408,7 @@ public abstract class HDFSFileSink<T, K, V> extends Sink<T> {
       FileOutputFormat.setOutputPath(job, new Path(path));
 
       // Each Writer is responsible for writing one bundle of elements and is represented by one
-      // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow
-      // handles retrying of failed bundles, each task has one attempt only.
+      // unique Hadoop task based on uId/hash. All tasks share the same job ID.
       JobID jobId = job.getJobID();
       TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash);
       context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0));

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
index 1b83fe9..4754c98 100644
--- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
+++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
@@ -306,10 +306,6 @@ public class JdbcIO {
       return input
           .apply(Create.of(getQuery()))
           .apply(ParDo.of(new ReadFn<>(this))).setCoder(getCoder())
-          // generate a random key followed by a GroupByKey and then ungroup
-          // to prevent fusion
-          // see https://cloud.google.com/dataflow/service/dataflow-service-desc#preventing-fusion
-          // for details
           .apply(ParDo.of(new DoFn<T, KV<Integer, T>>() {
             private Random random;
             @Setup

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
index 890fb2b..83cef4b 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -151,8 +151,7 @@ import org.slf4j.LoggerFactory;
  *
  * <h3>Partition Assignment and Checkpointing</h3>
  * The Kafka partitions are evenly distributed among splits (workers).
- * Dataflow checkpointing is fully supported and
- * each split can resume from previous checkpoint. See
+ * Checkpointing is fully supported and each split can resume from previous checkpoint. See
  * {@link UnboundedKafkaSource#generateInitialSplits(int, PipelineOptions)} for more details on
  * splits and checkpoint support.
  *
@@ -818,7 +817,6 @@ public class KafkaIO {
 
         checkState(checkpointMark.getPartitions().size() == partitions.size(),
             "checkPointMark and assignedPartitions should match");
-        // we could consider allowing a mismatch, though it is not expected in current Dataflow
 
         for (int i = 0; i < partitions.size(); i++) {
           PartitionMark ckptMark = checkpointMark.getPartitions().get(i);

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
index dbdd614..2781a43 100644
--- a/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ b/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -17,7 +17,7 @@
 -->
 <archetype-descriptor
     xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
-    name="Google Cloud Dataflow Example Pipelines Archetype"
+    name="Apache Beam Example Pipelines Archetype"
     xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
index a130b65..7f0430a 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -17,7 +17,7 @@
 -->
 <archetype-descriptor
     xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
-    name="Google Cloud Dataflow Example Pipelines Archetype"
+    name="Apache Beam Example Pipelines Archetype"
     xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 

http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
index 46c21c3..e550960 100644
--- a/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -17,7 +17,7 @@
 -->
 <archetype-descriptor
     xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
-    name="Google Cloud Dataflow Starter Pipeline Archetype"
+    name="Apache Beam Starter Pipeline Archetype"
     xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
   <requiredProperties>


[2/2] beam git commit: This closes #2311: Second clean up pass of dataflow references/URLs in Java SDK

Posted by ke...@apache.org.
This closes #2311: Second clean up pass of dataflow references/URLs in Java SDK

  [BEAM-1778] Second clean up pass of dataflow references/URLs in Java SDK


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ced1e5c3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ced1e5c3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ced1e5c3

Branch: refs/heads/master
Commit: ced1e5c3a1ee1e90ffe5843f7b4e8daf2cb3d46e
Parents: 1ea3b35 07a50b4
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Mar 27 13:16:01 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Mar 27 13:16:01 2017 -0700

----------------------------------------------------------------------
 .../main/java/org/apache/beam/sdk/io/AvroSource.java |  2 +-
 .../java/org/apache/beam/sdk/io/BoundedSource.java   |  4 +---
 .../main/java/org/apache/beam/sdk/io/PubsubIO.java   |  6 ------
 .../org/apache/beam/sdk/io/PubsubUnboundedSink.java  |  2 --
 .../apache/beam/sdk/io/PubsubUnboundedSource.java    |  2 --
 .../main/java/org/apache/beam/sdk/io/TFRecordIO.java |  6 +++---
 .../main/java/org/apache/beam/sdk/io/XmlSink.java    |  4 ++--
 .../org/apache/beam/sdk/io/range/ByteKeyRange.java   |  4 +---
 .../java/org/apache/beam/sdk/options/GcpOptions.java |  2 +-
 .../beam/sdk/testing/SerializableMatchers.java       |  2 +-
 .../org/apache/beam/sdk/testing/StreamingIT.java     |  2 +-
 .../java/org/apache/beam/sdk/util/CoderUtils.java    | 15 +++++++--------
 .../beam/sdk/coders/protobuf/ProtobufUtilTest.java   |  1 -
 .../apache/beam/sdk/runners/PipelineRunnerTest.java  |  2 +-
 .../io/gcp/bigquery/BigQueryTableRowIterator.java    |  3 +--
 .../beam/sdk/io/gcp/datastore/DatastoreV1.java       |  2 +-
 .../org/apache/beam/sdk/io/hdfs/HDFSFileSink.java    |  3 +--
 .../java/org/apache/beam/sdk/io/jdbc/JdbcIO.java     |  4 ----
 .../java/org/apache/beam/sdk/io/kafka/KafkaIO.java   |  4 +---
 .../resources/META-INF/maven/archetype-metadata.xml  |  2 +-
 .../resources/META-INF/maven/archetype-metadata.xml  |  2 +-
 .../resources/META-INF/maven/archetype-metadata.xml  |  2 +-
 22 files changed, 26 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/ced1e5c3/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
----------------------------------------------------------------------