You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2017/03/27 20:17:02 UTC
[1/2] beam git commit: [BEAM-1778] Second clean up pass of dataflow
references/URLs in Java SDK
Repository: beam
Updated Branches:
refs/heads/master 1ea3b35ba -> ced1e5c3a
[BEAM-1778] Second clean up pass of dataflow references/URLs in Java SDK
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/07a50b48
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/07a50b48
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/07a50b48
Branch: refs/heads/master
Commit: 07a50b48af9c176732ff172e3d612052d4e15386
Parents: 87c8ef0
Author: melissa <me...@google.com>
Authored: Thu Mar 23 16:49:41 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Mar 27 13:15:48 2017 -0700
----------------------------------------------------------------------
.../main/java/org/apache/beam/sdk/io/AvroSource.java | 2 +-
.../java/org/apache/beam/sdk/io/BoundedSource.java | 4 +---
.../main/java/org/apache/beam/sdk/io/PubsubIO.java | 6 ------
.../org/apache/beam/sdk/io/PubsubUnboundedSink.java | 2 --
.../apache/beam/sdk/io/PubsubUnboundedSource.java | 2 --
.../main/java/org/apache/beam/sdk/io/TFRecordIO.java | 6 +++---
.../main/java/org/apache/beam/sdk/io/XmlSink.java | 4 ++--
.../org/apache/beam/sdk/io/range/ByteKeyRange.java | 4 +---
.../java/org/apache/beam/sdk/options/GcpOptions.java | 2 +-
.../beam/sdk/testing/SerializableMatchers.java | 2 +-
.../org/apache/beam/sdk/testing/StreamingIT.java | 2 +-
.../java/org/apache/beam/sdk/util/CoderUtils.java | 15 +++++++--------
.../beam/sdk/coders/protobuf/ProtobufUtilTest.java | 1 -
.../apache/beam/sdk/runners/PipelineRunnerTest.java | 2 +-
.../io/gcp/bigquery/BigQueryTableRowIterator.java | 3 +--
.../beam/sdk/io/gcp/datastore/DatastoreV1.java | 2 +-
.../org/apache/beam/sdk/io/hdfs/HDFSFileSink.java | 3 +--
.../java/org/apache/beam/sdk/io/jdbc/JdbcIO.java | 4 ----
.../java/org/apache/beam/sdk/io/kafka/KafkaIO.java | 4 +---
.../resources/META-INF/maven/archetype-metadata.xml | 2 +-
.../resources/META-INF/maven/archetype-metadata.xml | 2 +-
.../resources/META-INF/maven/archetype-metadata.xml | 2 +-
22 files changed, 26 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
index fe3ac5c..0c52dea 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
@@ -109,7 +109,7 @@ import org.apache.commons.compress.utils.CountingInputStream;
* than the end offset of the source.
*
* <p>To use XZ-encoded Avro files, please include an explicit dependency on {@code xz-1.5.jar},
- * which has been marked as optional in the Maven {@code sdk/pom.xml} for Google Cloud Dataflow:
+ * which has been marked as optional in the Maven {@code sdk/pom.xml}.
*
* <pre>{@code
* <dependency>
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
index 8e5145c..8538e7f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BoundedSource.java
@@ -104,9 +104,7 @@ public abstract class BoundedSource<T> extends Source<T> {
*
* <p>Sources which support dynamic work rebalancing should use
* {@link org.apache.beam.sdk.io.range.RangeTracker} to manage the (source-specific)
- * range of positions that is being split. If your source supports dynamic work rebalancing,
- * please use that class to implement it if possible; if not possible, please contact the team
- * at <i>dataflow-feedback@google.com</i>.
+ * range of positions that is being split.
*/
@Experimental(Experimental.Kind.SOURCE_SINK)
public abstract static class BoundedReader<T> extends Source.Reader<T> {
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
index 806b7da..c1ad353 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubIO.java
@@ -839,9 +839,6 @@ public class PubsubIO {
* <p>TODO: Consider replacing with BoundedReadFromUnboundedSource on top
* of PubsubUnboundedSource.
*
- * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow
- * service in streaming mode.
- *
* <p>Public so can be suppressed by runners.
*/
public class PubsubBoundedReader extends DoFn<Void, T> {
@@ -1132,9 +1129,6 @@ public class PubsubIO {
/**
* Writer to Pubsub which batches messages from bounded collections.
*
- * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow
- * service in streaming mode.
- *
* <p>Public so can be suppressed by runners.
*/
public class PubsubBoundedWriter extends DoFn<T, Void> {
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
index f41b5b7..55605b3 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSink.java
@@ -87,8 +87,6 @@ import org.joda.time.Duration;
* <li>A failed bundle will cause messages to be resent. Thus we rely on the Pubsub consumer
* to dedup messages.
* </ul>
- *
- * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow service.
*/
public class PubsubUnboundedSink<T> extends PTransform<PCollection<T>, PDone> {
/**
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
index 90bcc76..1184968 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/PubsubUnboundedSource.java
@@ -106,8 +106,6 @@ import org.slf4j.LoggerFactory;
* are blocking. We rely on the underlying runner to allow multiple
* {@link UnboundedSource.UnboundedReader} instances to execute concurrently and thus hide latency.
* </ul>
- *
- * <p>NOTE: This is not the implementation used when running on the Google Cloud Dataflow service.
*/
public class PubsubUnboundedSource<T> extends PTransform<PBegin, PCollection<T>> {
private static final Logger LOG = LoggerFactory.getLogger(PubsubUnboundedSource.class);
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
index 243506c..0552236 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
@@ -68,8 +68,8 @@ public class TFRecordIO {
* Returns a transform for reading TFRecord files that reads from the file(s)
* with the given filename or filename pattern. This can be a local path (if running locally),
* or a Google Cloud Storage filename or filename pattern of the form
- * {@code "gs://<bucket>/<filepath>"} (if running locally or via the Google Cloud Dataflow
- * service). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
+ * {@code "gs://<bucket>/<filepath>"} (if running locally or using remote
+ * execution). Standard <a href="http://docs.oracle.com/javase/tutorial/essential/io/find.html"
* >Java Filesystem glob patterns</a> ("*", "?", "[..]") are supported.
*/
public static Bound from(String filepattern) {
@@ -284,7 +284,7 @@ public class TFRecordIO {
* with the given prefix. This can be a local filename
* (if running locally), or a Google Cloud Storage filename of
* the form {@code "gs://<bucket>/<filepath>"}
- * (if running locally or via the Google Cloud Dataflow service).
+ * (if running locally or using remote execution).
*
* <p>The files written will begin with this prefix, followed by
* a shard identifier (see {@link TFRecordIO.Write.Bound#withNumShards(int)}, and end
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
index 0f25aea..6937e93 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
@@ -146,7 +146,7 @@ public class XmlSink {
* Returns an XmlSink that writes objects as XML entities.
*
* <p>Output files will have the name {@literal {baseOutputFilename}-0000i-of-0000n.xml} where n
- * is the number of output bundles that the Dataflow service divides the output into.
+ * is the number of output bundles.
*
* @param klass the class of the elements to write.
* @param rootElementName the enclosing root element.
@@ -183,7 +183,7 @@ public class XmlSink {
* Returns an XmlSink that writes to files with the given prefix.
*
* <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is
- * the number of output bundles that the Dataflow service divides the output into.
+ * the number of output bundles.
*/
public Bound<T> toFilenamePrefix(String baseOutputFilename) {
return new Bound<>(classToBind, rootElementName, baseOutputFilename);
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
index 0212e8a..d5b2919 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/ByteKeyRange.java
@@ -48,9 +48,7 @@ import org.slf4j.LoggerFactory;
*
* <p>The primary role of {@link ByteKeyRange} is to provide functionality for
* {@link #estimateFractionForKey(ByteKey)}, {@link #interpolateKey(double)}, and
- * {@link #split(int)}, which are used for Google Cloud Dataflow's
- * <a href="https://cloud.google.com/dataflow/service/dataflow-service-desc#AutoScaling">Autoscaling
- * and Dynamic Work Rebalancing</a> features.
+ * {@link #split(int)}.
*
* <p>{@link ByteKeyRange} implements these features by treating a {@link ByteKey}'s underlying
* {@code byte[]} as the binary expansion of floating point numbers in the range {@code [0.0, 1.0]}.
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
index c04e4f0..d01406f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/GcpOptions.java
@@ -55,7 +55,7 @@ public interface GcpOptions extends GoogleApiDebugOptions, PipelineOptions {
/**
* Project id to use when launching jobs.
*/
- @Description("Project id. Required when running a Dataflow in the cloud. "
+ @Description("Project id. Required when using Google Cloud Platform services. "
+ "See https://cloud.google.com/storage/docs/projects for further details.")
@Default.InstanceFactory(DefaultProjectFactory.class)
String getProject();
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
index 7f72805..ade146d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/SerializableMatchers.java
@@ -41,7 +41,7 @@ import org.hamcrest.Matchers;
* documentation there. Values retained by a {@link SerializableMatcher} are required to be
* serializable, either via Java serialization or via a provided {@link Coder}.
*
- * <p>The following matchers are novel to Dataflow:
+ * <p>The following matchers are novel to Apache Beam:
* <ul>
* <li>{@link #kvWithKey} for matching just the key of a {@link KV}.
* <li>{@link #kvWithValue} for matching just the value of a {@link KV}.
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
index 4922d83..427b908 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
@@ -18,7 +18,7 @@
package org.apache.beam.sdk.testing;
/**
- * Category tag used to mark tests which execute using the Dataflow runner
+ * Category tag used to mark tests which execute
* in streaming mode. Example usage:
* <pre><code>
* {@literal @}Test
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
index 5d03574..6d97868 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/CoderUtils.java
@@ -229,17 +229,16 @@ public final class CoderUtils {
/**
* A {@link com.fasterxml.jackson.databind.Module} that adds the type
- * resolver needed for Coder definitions created by the Dataflow service.
+ * resolver needed for Coder definitions.
*/
static final class Jackson2Module extends SimpleModule {
/**
* The Coder custom type resolver.
*
- * <p>This resolver resolves coders. If the Coder ID is a particular
- * well-known identifier supplied by the Dataflow service, it's replaced
- * with the corresponding class. All other Coder instances are resolved
- * by class name, using the package org.apache.beam.sdk.coders
- * if there are no "."s in the ID.
+ * <p>This resolver resolves coders. If the Coder ID is a particular
+ * well-known identifier, it's replaced with the corresponding class.
+ * All other Coder instances are resolved by class name, using the package
+ * org.apache.beam.sdk.coders if there are no "."s in the ID.
*/
private static final class Resolver extends TypeIdResolverBase {
@SuppressWarnings("unused") // Used via @JsonTypeIdResolver annotation on Mixin
@@ -307,14 +306,14 @@ public final class CoderUtils {
* {@link ObjectMapper}.
*
* <p>This is done via a mixin so that this resolver is <i>only</i> used
- * during deserialization requested by the Dataflow SDK.
+ * during deserialization requested by the Apache Beam SDK.
*/
@JsonTypeIdResolver(Resolver.class)
@JsonTypeInfo(use = Id.CUSTOM, include = As.PROPERTY, property = PropertyNames.OBJECT_TYPE_NAME)
private static final class Mixin {}
public Jackson2Module() {
- super("DataflowCoders");
+ super("BeamCoders");
setMixInAnnotation(Coder.class, Mixin.class);
}
}
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
index 9736824..1408048 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/protobuf/ProtobufUtilTest.java
@@ -118,7 +118,6 @@ public class ProtobufUtilTest {
@Test
public void testVerifyProto2() {
- // Everything in Dataflow's Proto2TestMessages uses Proto2 syntax.
checkProto2Syntax(MessageA.class, ExtensionRegistry.getEmptyRegistry());
checkProto2Syntax(MessageB.class, ExtensionRegistry.getEmptyRegistry());
checkProto2Syntax(MessageC.class, ExtensionRegistry.getEmptyRegistry());
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
index e980497..71ef311 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/runners/PipelineRunnerTest.java
@@ -36,7 +36,7 @@ import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
/**
- * Tests for DataflowRunner.
+ * Tests for PipelineRunner.
*/
@RunWith(JUnit4.class)
public class PipelineRunnerTest {
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
index 5edc78c..59f2bb6 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableRowIterator.java
@@ -139,8 +139,7 @@ class BigQueryTableRowIterator implements AutoCloseable {
while (true) {
if (iteratorOverCurrentBatch != null && iteratorOverCurrentBatch.hasNext()) {
// Embed schema information into the raw row, so that values have an
- // associated key. This matches how rows are read when using the
- // DataflowRunner.
+ // associated key.
current = getTypedTableRow(schema.getFields(), iteratorOverCurrentBatch.next());
return true;
}
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
index 400860f..73ac8df 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
@@ -132,7 +132,7 @@ import org.slf4j.LoggerFactory;
* .withQuery(query));
* } </pre>
*
- * <p><b>Note:</b> Normally, a Cloud Dataflow job will read from Cloud Datastore in parallel across
+ * <p><b>Note:</b> A runner may read from Cloud Datastore in parallel across
* many workers. However, when the {@link Query} is configured with a limit using
* {@link com.google.datastore.v1.Query.Builder#setLimit(Int32Value)} or if the Query contains
* inequality filters like {@code GREATER_THAN, LESS_THAN} etc., then all returned results
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
index 0118249..9b085ca 100644
--- a/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
+++ b/sdks/java/io/hdfs/src/main/java/org/apache/beam/sdk/io/hdfs/HDFSFileSink.java
@@ -408,8 +408,7 @@ public abstract class HDFSFileSink<T, K, V> extends Sink<T> {
FileOutputFormat.setOutputPath(job, new Path(path));
// Each Writer is responsible for writing one bundle of elements and is represented by one
- // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow
- // handles retrying of failed bundles, each task has one attempt only.
+ // unique Hadoop task based on uId/hash. All tasks share the same job ID.
JobID jobId = job.getJobID();
TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash);
context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0));
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
index 1b83fe9..4754c98 100644
--- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
+++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java
@@ -306,10 +306,6 @@ public class JdbcIO {
return input
.apply(Create.of(getQuery()))
.apply(ParDo.of(new ReadFn<>(this))).setCoder(getCoder())
- // generate a random key followed by a GroupByKey and then ungroup
- // to prevent fusion
- // see https://cloud.google.com/dataflow/service/dataflow-service-desc#preventing-fusion
- // for details
.apply(ParDo.of(new DoFn<T, KV<Integer, T>>() {
private Random random;
@Setup
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
index 890fb2b..83cef4b 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -151,8 +151,7 @@ import org.slf4j.LoggerFactory;
*
* <h3>Partition Assignment and Checkpointing</h3>
* The Kafka partitions are evenly distributed among splits (workers).
- * Dataflow checkpointing is fully supported and
- * each split can resume from previous checkpoint. See
+ * Checkpointing is fully supported and each split can resume from previous checkpoint. See
* {@link UnboundedKafkaSource#generateInitialSplits(int, PipelineOptions)} for more details on
* splits and checkpoint support.
*
@@ -818,7 +817,6 @@ public class KafkaIO {
checkState(checkpointMark.getPartitions().size() == partitions.size(),
"checkPointMark and assignedPartitions should match");
- // we could consider allowing a mismatch, though it is not expected in current Dataflow
for (int i = 0; i < partitions.size(); i++) {
PartitionMark ckptMark = checkpointMark.getPartitions().get(i);
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
index dbdd614..2781a43 100644
--- a/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ b/sdks/java/maven-archetypes/examples-java8/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -17,7 +17,7 @@
-->
<archetype-descriptor
xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
- name="Google Cloud Dataflow Example Pipelines Archetype"
+ name="Apache Beam Example Pipelines Archetype"
xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
index a130b65..7f0430a 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -17,7 +17,7 @@
-->
<archetype-descriptor
xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
- name="Google Cloud Dataflow Example Pipelines Archetype"
+ name="Apache Beam Example Pipelines Archetype"
xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
http://git-wip-us.apache.org/repos/asf/beam/blob/07a50b48/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
index 46c21c3..e550960 100644
--- a/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
+++ b/sdks/java/maven-archetypes/starter/src/main/resources/META-INF/maven/archetype-metadata.xml
@@ -17,7 +17,7 @@
-->
<archetype-descriptor
xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
- name="Google Cloud Dataflow Starter Pipeline Archetype"
+ name="Apache Beam Starter Pipeline Archetype"
xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<requiredProperties>
[2/2] beam git commit: This closes #2311: Second clean up pass of
dataflow references/URLs in Java SDK
Posted by ke...@apache.org.
This closes #2311: Second clean up pass of dataflow references/URLs in Java SDK
[BEAM-1778] Second clean up pass of dataflow references/URLs in Java SDK
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ced1e5c3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ced1e5c3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ced1e5c3
Branch: refs/heads/master
Commit: ced1e5c3a1ee1e90ffe5843f7b4e8daf2cb3d46e
Parents: 1ea3b35 07a50b4
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Mar 27 13:16:01 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Mar 27 13:16:01 2017 -0700
----------------------------------------------------------------------
.../main/java/org/apache/beam/sdk/io/AvroSource.java | 2 +-
.../java/org/apache/beam/sdk/io/BoundedSource.java | 4 +---
.../main/java/org/apache/beam/sdk/io/PubsubIO.java | 6 ------
.../org/apache/beam/sdk/io/PubsubUnboundedSink.java | 2 --
.../apache/beam/sdk/io/PubsubUnboundedSource.java | 2 --
.../main/java/org/apache/beam/sdk/io/TFRecordIO.java | 6 +++---
.../main/java/org/apache/beam/sdk/io/XmlSink.java | 4 ++--
.../org/apache/beam/sdk/io/range/ByteKeyRange.java | 4 +---
.../java/org/apache/beam/sdk/options/GcpOptions.java | 2 +-
.../beam/sdk/testing/SerializableMatchers.java | 2 +-
.../org/apache/beam/sdk/testing/StreamingIT.java | 2 +-
.../java/org/apache/beam/sdk/util/CoderUtils.java | 15 +++++++--------
.../beam/sdk/coders/protobuf/ProtobufUtilTest.java | 1 -
.../apache/beam/sdk/runners/PipelineRunnerTest.java | 2 +-
.../io/gcp/bigquery/BigQueryTableRowIterator.java | 3 +--
.../beam/sdk/io/gcp/datastore/DatastoreV1.java | 2 +-
.../org/apache/beam/sdk/io/hdfs/HDFSFileSink.java | 3 +--
.../java/org/apache/beam/sdk/io/jdbc/JdbcIO.java | 4 ----
.../java/org/apache/beam/sdk/io/kafka/KafkaIO.java | 4 +---
.../resources/META-INF/maven/archetype-metadata.xml | 2 +-
.../resources/META-INF/maven/archetype-metadata.xml | 2 +-
.../resources/META-INF/maven/archetype-metadata.xml | 2 +-
22 files changed, 26 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/ced1e5c3/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
----------------------------------------------------------------------