You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2016/03/24 21:42:45 UTC
[1/7] incubator-beam git commit: Move tempLocation to PipelineOptions.
Repository: incubator-beam
Updated Branches:
refs/heads/master 1c21aa2d5 -> c1de175bd
Move tempLocation to PipelineOptions.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/8bc0659a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/8bc0659a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/8bc0659a
Branch: refs/heads/master
Commit: 8bc0659af754786677446f2f9941702f9e9ee5be
Parents: 45309ca
Author: Pei He <pe...@gmail.com>
Authored: Mon Mar 14 16:02:32 2016 -0700
Committer: Pei He <pe...@gmail.com>
Committed: Mon Mar 14 16:53:49 2016 -0700
----------------------------------------------------------------------
.../FlinkGroupAlsoByWindowWrapper.java | 11 ++++++++-
.../sdk/options/DataflowPipelineOptions.java | 26 +++-----------------
.../dataflow/sdk/options/PipelineOptions.java | 14 +++++++++++
.../sdk/runners/DataflowPipelineRunner.java | 4 +++
.../sdk/runners/DataflowPipelineRunnerTest.java | 6 ++---
5 files changed, 33 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/8bc0659a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
index e115a15..b413d7a 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
@@ -476,6 +476,15 @@ public class FlinkGroupAlsoByWindowWrapper<K, VIN, VACC, VOUT>
@Override
public void setStableUniqueNames(CheckEnabled enabled) {
}
+
+ @Override
+ public String getTempLocation() {
+ return null;
+ }
+
+ @Override
+ public void setTempLocation(String tempLocation) {
+ }
};
}
return options;
@@ -628,4 +637,4 @@ public class FlinkGroupAlsoByWindowWrapper<K, VIN, VACC, VOUT>
// restore the timerInternals.
this.timerInternals.restoreTimerInternals(reader, inputKvCoder, windowCoder);
}
-}
\ No newline at end of file
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/8bc0659a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 1aa4342..6794032 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@ -34,8 +34,6 @@ public interface DataflowPipelineOptions extends
GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions,
DataflowProfilingOptions {
- static final String DATAFLOW_STORAGE_LOCATION = "Dataflow Storage Location";
-
@Description("Project id. Required when running a Dataflow in the cloud. "
+ "See https://cloud.google.com/storage/docs/projects for further details.")
@Override
@@ -46,36 +44,18 @@ public interface DataflowPipelineOptions extends
void setProject(String value);
/**
- * GCS path for temporary files, e.g. gs://bucket/object
- *
- * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
- *
- * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
- * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
- * {@link #getStagingLocation()}.
- */
- @Description("GCS path for temporary files, eg \"gs://bucket/object\". "
- + "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
- + "At least one of tempLocation or stagingLocation must be set. If tempLocation is unset, "
- + "defaults to using stagingLocation.")
- @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
- String getTempLocation();
- void setTempLocation(String value);
-
- /**
* GCS path for staging local files, e.g. gs://bucket/object
*
* <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
*
- * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
- * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
- * {@link #getStagingLocation()}.
+ * <p>At least one of {@link PipelineOptions#getTempLocation()} or {@link #getStagingLocation()}
+ * must be set. If {@link #getStagingLocation()} is not set, then the Dataflow
+ * pipeline defaults to using {@link PipelineOptions#getTempLocation()}.
*/
@Description("GCS path for staging local files, e.g. \"gs://bucket/object\". "
+ "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
+ "At least one of stagingLocation or tempLocation must be set. If stagingLocation is unset, "
+ "defaults to using tempLocation.")
- @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
String getStagingLocation();
void setStagingLocation(String value);
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/8bc0659a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 923033d..4c33a22 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@ -245,4 +245,18 @@ public interface PipelineOptions {
@Default.Enum("WARNING")
CheckEnabled getStableUniqueNames();
void setStableUniqueNames(CheckEnabled enabled);
+
+ /**
+ * A pipeline level default location for storing temporary files.
+ *
+ * <p>This can be a path of any file system.
+ *
+ * <p>{@link #getTempLocation()} can be used as a default location in other
+ * {@link PipelineOptions}.
+ *
+ * <p>If it is unset, {@link PipelineRunner} can override it.
+ */
+ @Description("A pipeline level default location for storing temporary files.")
+ String getTempLocation();
+ void setTempLocation(String value);
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/8bc0659a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
----------------------------------------------------------------------
diff --git a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index c90b904..d716b95 100644
--- a/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@ -251,6 +251,10 @@ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob>
}
PathValidator validator = dataflowOptions.getPathValidator();
+ Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
+ && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
+ "Missing required value: at least one of tempLocation or stagingLocation must be set.");
+
if (dataflowOptions.getStagingLocation() != null) {
validator.validateOutputFilePrefixSupported(dataflowOptions.getStagingLocation());
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/8bc0659a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index c5f2d3f..300d5d5 100644
--- a/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdk/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@ -649,10 +649,8 @@ public class DataflowPipelineRunnerTest {
options.setProject("foo-project");
thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("Missing required value for group");
- thrown.expectMessage(DataflowPipelineOptions.DATAFLOW_STORAGE_LOCATION);
- thrown.expectMessage("getStagingLocation");
- thrown.expectMessage("getTempLocation");
+ thrown.expectMessage(
+ "Missing required value: at least one of tempLocation or stagingLocation must be set.");
DataflowPipelineRunner.fromOptions(options);
}
[5/7] incubator-beam git commit: Merge branch 'master' into
temp-option
Posted by ke...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c4515687/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
----------------------------------------------------------------------
diff --cc sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
index 0000000,1aa4342..6794032
mode 000000,100644..100644
--- a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
+++ b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/DataflowPipelineOptions.java
@@@ -1,0 -1,134 +1,114 @@@
+ /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+ package com.google.cloud.dataflow.sdk.options;
+
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipeline;
+ import com.google.common.base.MoreObjects;
+
+ import org.joda.time.DateTimeUtils;
+ import org.joda.time.DateTimeZone;
+ import org.joda.time.format.DateTimeFormat;
+ import org.joda.time.format.DateTimeFormatter;
+
+ /**
+ * Options that can be used to configure the {@link DataflowPipeline}.
+ */
+ @Description("Options that configure the Dataflow pipeline.")
+ public interface DataflowPipelineOptions extends
+ PipelineOptions, GcpOptions, ApplicationNameOptions, DataflowPipelineDebugOptions,
+ DataflowPipelineWorkerPoolOptions, BigQueryOptions,
+ GcsOptions, StreamingOptions, CloudDebuggerOptions, DataflowWorkerLoggingOptions,
+ DataflowProfilingOptions {
+
- static final String DATAFLOW_STORAGE_LOCATION = "Dataflow Storage Location";
-
+ @Description("Project id. Required when running a Dataflow in the cloud. "
+ + "See https://cloud.google.com/storage/docs/projects for further details.")
+ @Override
+ @Validation.Required
+ @Default.InstanceFactory(DefaultProjectFactory.class)
+ String getProject();
+ @Override
+ void setProject(String value);
+
+ /**
- * GCS path for temporary files, e.g. gs://bucket/object
- *
- * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
- *
- * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
- * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
- * {@link #getStagingLocation()}.
- */
- @Description("GCS path for temporary files, eg \"gs://bucket/object\". "
- + "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
- + "At least one of tempLocation or stagingLocation must be set. If tempLocation is unset, "
- + "defaults to using stagingLocation.")
- @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
- String getTempLocation();
- void setTempLocation(String value);
-
- /**
+ * GCS path for staging local files, e.g. gs://bucket/object
+ *
+ * <p>Must be a valid Cloud Storage URL, beginning with the prefix "gs://"
+ *
- * <p>At least one of {@link #getTempLocation()} or {@link #getStagingLocation()} must be set. If
- * {@link #getTempLocation()} is not set, then the Dataflow pipeline defaults to using
- * {@link #getStagingLocation()}.
++ * <p>At least one of {@link PipelineOptions#getTempLocation()} or {@link #getStagingLocation()}
++ * must be set. If {@link #getStagingLocation()} is not set, then the Dataflow
++ * pipeline defaults to using {@link PipelineOptions#getTempLocation()}.
+ */
+ @Description("GCS path for staging local files, e.g. \"gs://bucket/object\". "
+ + "Must be a valid Cloud Storage URL, beginning with the prefix \"gs://\". "
+ + "At least one of stagingLocation or tempLocation must be set. If stagingLocation is unset, "
+ + "defaults to using tempLocation.")
- @Validation.Required(groups = {DATAFLOW_STORAGE_LOCATION})
+ String getStagingLocation();
+ void setStagingLocation(String value);
+
+ /**
+ * The Dataflow job name is used as an idempotence key within the Dataflow service.
+ * If there is an existing job that is currently active, another active job with the same
+ * name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.
+ */
+ @Description("The Dataflow job name is used as an idempotence key within the Dataflow service. "
+ + "If there is an existing job that is currently active, another active job with the same "
+ + "name will not be able to be created. Defaults to using the ApplicationName-UserName-Date.")
+ @Default.InstanceFactory(JobNameFactory.class)
+ String getJobName();
+ void setJobName(String value);
+
+ /**
+ * Whether to update the currently running pipeline with the same name as this one.
+ */
+ @Override
+ @SuppressWarnings("deprecation") // base class member deprecated in favor of this one.
+ @Description(
+ "If set, replace the existing pipeline with the name specified by --jobName with "
+ + "this pipeline, preserving state.")
+ boolean getUpdate();
+ @Override
+ @SuppressWarnings("deprecation") // base class member deprecated in favor of this one.
+ void setUpdate(boolean value);
+
+ /**
+ * Returns a normalized job name constructed from {@link ApplicationNameOptions#getAppName()}, the
+ * local system user name (if available), and the current time. The normalization makes sure that
+ * the job name matches the required pattern of [a-z]([-a-z0-9]*[a-z0-9])? and length limit of 40
+ * characters.
+ *
+ * <p>This job name factory is only able to generate one unique name per second per application
+ * and user combination.
+ */
+ public static class JobNameFactory implements DefaultValueFactory<String> {
+ private static final DateTimeFormatter FORMATTER =
+ DateTimeFormat.forPattern("MMddHHmmss").withZone(DateTimeZone.UTC);
+
+ @Override
+ public String create(PipelineOptions options) {
+ String appName = options.as(ApplicationNameOptions.class).getAppName();
+ String normalizedAppName = appName == null || appName.length() == 0 ? "dataflow"
+ : appName.toLowerCase()
+ .replaceAll("[^a-z0-9]", "0")
+ .replaceAll("^[^a-z]", "a");
+ String userName = MoreObjects.firstNonNull(System.getProperty("user.name"), "");
+ String normalizedUserName = userName.toLowerCase()
+ .replaceAll("[^a-z0-9]", "0");
+ String datePart = FORMATTER.print(DateTimeUtils.currentTimeMillis());
+ return normalizedAppName + "-" + normalizedUserName + "-" + datePart;
+ }
+ }
+ }
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c4515687/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
----------------------------------------------------------------------
diff --cc sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
index 0000000,8ff1fa9..41fa45c
mode 000000,100644..100644
--- a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
+++ b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
@@@ -1,0 -1,249 +1,263 @@@
+ /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+ package com.google.cloud.dataflow.sdk.options;
+
+ import com.google.auto.service.AutoService;
+ import com.google.cloud.dataflow.sdk.Pipeline;
+ import com.google.cloud.dataflow.sdk.options.GoogleApiDebugOptions.GoogleApiTracer;
+ import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Deserializer;
+ import com.google.cloud.dataflow.sdk.options.ProxyInvocationHandler.Serializer;
+ import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
+ import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+ import com.google.cloud.dataflow.sdk.transforms.DoFn;
+ import com.google.cloud.dataflow.sdk.transforms.DoFn.Context;
+
+ import com.fasterxml.jackson.annotation.JsonIgnore;
+ import com.fasterxml.jackson.databind.ObjectMapper;
+ import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+ import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
+ import java.lang.reflect.Proxy;
+ import java.util.ServiceLoader;
+
+ import javax.annotation.concurrent.ThreadSafe;
+
+ /**
+ * PipelineOptions are used to configure Pipelines. You can extend {@link PipelineOptions}
+ * to create custom configuration options specific to your {@link Pipeline},
+ * for both local execution and execution via a {@link PipelineRunner}.
+ *
+ * <p>{@link PipelineOptions} and their subinterfaces represent a collection of properties
+ * which can be manipulated in a type safe manner. {@link PipelineOptions} is backed by a
+ * dynamic {@link Proxy} which allows for type safe manipulation of properties in an extensible
+ * fashion through plain old Java interfaces.
+ *
+ * <p>{@link PipelineOptions} can be created with {@link PipelineOptionsFactory#create()}
+ * and {@link PipelineOptionsFactory#as(Class)}. They can be created
+ * from command-line arguments with {@link PipelineOptionsFactory#fromArgs(String[])}.
+ * They can be converted to another type by invoking {@link PipelineOptions#as(Class)} and
+ * can be accessed from within a {@link DoFn} by invoking
+ * {@link Context#getPipelineOptions()}.
+ *
+ * <p>For example:
+ * <pre>{@code
+ * // The most common way to construct PipelineOptions is via command-line argument parsing:
+ * public static void main(String[] args) {
+ * // Will parse the arguments passed into the application and construct a PipelineOptions
+ * // Note that --help will print registered options, and --help=PipelineOptionsClassName
+ * // will print out usage for the specific class.
+ * PipelineOptions options =
+ * PipelineOptionsFactory.fromArgs(args).create();
+ *
+ * Pipeline p = Pipeline.create(options);
+ * ...
+ * p.run();
+ * }
+ *
+ * // To create options for the DirectPipeline:
+ * DirectPipelineOptions directPipelineOptions =
+ * PipelineOptionsFactory.as(DirectPipelineOptions.class);
+ * directPipelineOptions.setStreaming(true);
+ *
+ * // To cast from one type to another using the as(Class) method:
+ * DataflowPipelineOptions dataflowPipelineOptions =
+ * directPipelineOptions.as(DataflowPipelineOptions.class);
+ *
+ * // Options for the same property are shared between types
+ * // The statement below will print out "true"
+ * System.out.println(dataflowPipelineOptions.isStreaming());
+ *
+ * // Prints out registered options.
+ * PipelineOptionsFactory.printHelp(System.out);
+ *
+ * // Prints out options which are available to be set on DataflowPipelineOptions
+ * PipelineOptionsFactory.printHelp(System.out, DataflowPipelineOptions.class);
+ * }</pre>
+ *
+ * <h2>Defining Your Own PipelineOptions</h2>
+ *
+ * Defining your own {@link PipelineOptions} is the way for you to make configuration
+ * options available for both local execution and execution via a {@link PipelineRunner}.
+ * By having PipelineOptionsFactory as your command-line interpreter, you will provide
+ * a standardized way for users to interact with your application via the command-line.
+ *
+ * <p>To define your own {@link PipelineOptions}, you create an interface which
+ * extends {@link PipelineOptions} and define getter/setter pairs. These
+ * getter/setter pairs define a collection of
+ * <a href="https://docs.oracle.com/javase/tutorial/javabeans/writing/properties.html">
+ * JavaBean properties</a>.
+ *
+ * <p>For example:
+ * <pre>{@code
+ * // Creates a user defined property called "myProperty"
+ * public interface MyOptions extends PipelineOptions {
+ * String getMyProperty();
+ * void setMyProperty(String value);
+ * }
+ * }</pre>
+ *
+ * <p>Note: Please see the section on Registration below when using custom property types.
+ *
+ * <h3>Restrictions</h3>
+ *
+ * Since PipelineOptions can be "cast" to multiple types dynamically using
+ * {@link PipelineOptions#as(Class)}, a property must conform to the following set of restrictions:
+ * <ul>
+ * <li>Any property with the same name must have the same return type for all derived
+ * interfaces of {@link PipelineOptions}.
+ * <li>Every bean property of any interface derived from {@link PipelineOptions} must have a
+ * getter and setter method.
+ * <li>Every method must conform to being a getter or setter for a JavaBean.
+ * <li>The derived interface of {@link PipelineOptions} must be composable with every interface
+ * part registered with the PipelineOptionsFactory.
+ * <li>Only getters may be annotated with {@link JsonIgnore @JsonIgnore}.
+ * <li>If any getter is annotated with {@link JsonIgnore @JsonIgnore}, then all getters for
+ * this property must be annotated with {@link JsonIgnore @JsonIgnore}.
+ * </ul>
+ *
+ * <h3>Annotations For PipelineOptions</h3>
+ *
+ * {@link Description @Description} can be used to annotate an interface or a getter
+ * with useful information which is output when {@code --help}
+ * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
+ *
+ * <p>{@link Default @Default} represents a set of annotations that can be used to annotate getter
+ * properties on {@link PipelineOptions} with information representing the default value to be
+ * returned if no value is specified. Any default implementation (using the {@code default} keyword)
+ * is ignored.
+ *
+ * <p>{@link Hidden @Hidden} hides an option from being listed when {@code --help}
+ * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
+ *
+ * <p>{@link Validation @Validation} represents a set of annotations that can be used to annotate
+ * getter properties on {@link PipelineOptions} with information representing the validation
+ * criteria to be used when validating with the {@link PipelineOptionsValidator}. Validation
+ * will be performed if during construction of the {@link PipelineOptions},
+ * {@link PipelineOptionsFactory#withValidation()} is invoked.
+ *
+ * <p>{@link JsonIgnore @JsonIgnore} is used to prevent a property from being serialized and
+ * available during execution of {@link DoFn}. See the Serialization section below for more
+ * details.
+ *
+ * <h2>Registration Of PipelineOptions</h2>
+ *
+ * Registration of {@link PipelineOptions} by an application guarantees that the
+ * {@link PipelineOptions} is composable during execution of their {@link Pipeline} and
+ * meets the restrictions listed above or will fail during registration. Registration
+ * also lists the registered {@link PipelineOptions} when {@code --help}
+ * is invoked via {@link PipelineOptionsFactory#fromArgs(String[])}.
+ *
+ * <p>Registration can be performed by invoking {@link PipelineOptionsFactory#register} within
+ * a users application or via automatic registration by creating a {@link ServiceLoader} entry
+ * and a concrete implementation of the {@link PipelineOptionsRegistrar} interface.
+ *
+ * <p>It is optional but recommended to use one of the many build time tools such as
+ * {@link AutoService} to generate the necessary META-INF files automatically.
+ *
+ * <p>A list of registered options can be fetched from
+ * {@link PipelineOptionsFactory#getRegisteredOptions()}.
+ *
+ * <h2>Serialization Of PipelineOptions</h2>
+ *
+ * {@link PipelineRunner}s require support for options to be serialized. Each property
+ * within {@link PipelineOptions} must be able to be serialized using Jackson's
+ * {@link ObjectMapper} or the getter method for the property annotated with
+ * {@link JsonIgnore @JsonIgnore}.
+ *
+ * <p>Jackson supports serialization of many types and supports a useful set of
+ * <a href="https://github.com/FasterXML/jackson-annotations">annotations</a> to aid in
+ * serialization of custom types. We point you to the public
+ * <a href="https://github.com/FasterXML/jackson">Jackson documentation</a> when attempting
+ * to add serialization support for your custom types. See {@link GoogleApiTracer} for an
+ * example using the Jackson annotations to serialize and deserialize a custom type.
+ *
+ * <p>Note: It is an error to have the same property available in multiple interfaces with only
+ * some of them being annotated with {@link JsonIgnore @JsonIgnore}. It is also an error to mark a
+ * setter for a property with {@link JsonIgnore @JsonIgnore}.
+ */
+ @JsonSerialize(using = Serializer.class)
+ @JsonDeserialize(using = Deserializer.class)
+ @ThreadSafe
+ public interface PipelineOptions {
+ /**
+ * Transforms this object into an object of type {@code <T>} saving each property
+ * that has been manipulated. {@code <T>} must extend {@link PipelineOptions}.
+ *
+ * <p>If {@code <T>} is not registered with the {@link PipelineOptionsFactory}, then we
+ * attempt to verify that {@code <T>} is composable with every interface that this
+ * instance of the {@code PipelineOptions} has seen.
+ *
+ * @param kls The class of the type to transform to.
+ * @return An object of type kls.
+ */
+ <T extends PipelineOptions> T as(Class<T> kls);
+
+ /**
+ * Makes a deep clone of this object, and transforms the cloned object into the specified
+ * type {@code kls}. See {@link #as} for more information about the conversion.
+ *
+ * <p>Properties that are marked with {@code @JsonIgnore} will not be cloned.
+ */
+ <T extends PipelineOptions> T cloneAs(Class<T> kls);
+
+ /**
+ * The pipeline runner that will be used to execute the pipeline.
+ * For registered runners, the class name can be specified, otherwise the fully
+ * qualified name needs to be specified.
+ */
+ @Validation.Required
+ @Description("The pipeline runner that will be used to execute the pipeline. "
+ + "For registered runners, the class name can be specified, otherwise the fully "
+ + "qualified name needs to be specified.")
+ @Default.Class(DirectPipelineRunner.class)
+ Class<? extends PipelineRunner<?>> getRunner();
+ void setRunner(Class<? extends PipelineRunner<?>> kls);
+
+ /**
+ * Enumeration of the possible states for a given check.
+ */
+ public static enum CheckEnabled {
+ OFF,
+ WARNING,
+ ERROR;
+ }
+
+ /**
+ * Whether to check for stable unique names on each transform. This is necessary to
+ * support updating of pipelines.
+ */
+ @Validation.Required
+ @Description("Whether to check for stable unique names on each transform. This is necessary to "
+ + "support updating of pipelines.")
+ @Default.Enum("WARNING")
+ CheckEnabled getStableUniqueNames();
+ void setStableUniqueNames(CheckEnabled enabled);
++
++ /**
++ * A pipeline level default location for storing temporary files.
++ *
++ * <p>This can be a path of any file system.
++ *
++ * <p>{@link #getTempLocation()} can be used as a default location in other
++ * {@link PipelineOptions}.
++ *
++ * <p>If it is unset, {@link PipelineRunner} can override it.
++ */
++ @Description("A pipeline level default location for storing temporary files.")
++ String getTempLocation();
++ void setTempLocation(String value);
+ }
[6/7] incubator-beam git commit: Merge branch 'master' into
temp-option
Posted by ke...@apache.org.
Merge branch 'master' into temp-option
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/c4515687
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/c4515687
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/c4515687
Branch: refs/heads/master
Commit: c45156870d018aee2bae4ea4d26a2dd1f64417cc
Parents: 911d295 9c61dd2
Author: Pei He <pe...@gmail.com>
Authored: Thu Mar 24 11:12:06 2016 -0700
Committer: Pei He <pe...@gmail.com>
Committed: Thu Mar 24 11:12:06 2016 -0700
----------------------------------------------------------------------
examples/README.md | 95 -
examples/java/README.md | 95 +
examples/java/pom.xml | 394 +++
.../dataflow/examples/DebuggingWordCount.java | 182 ++
.../dataflow/examples/MinimalWordCount.java | 117 +
.../dataflow/examples/WindowedWordCount.java | 269 ++
.../cloud/dataflow/examples/WordCount.java | 206 ++
.../examples/common/DataflowExampleOptions.java | 34 +
.../examples/common/DataflowExampleUtils.java | 485 +++
.../common/ExampleBigQueryTableOptions.java | 53 +
...xamplePubsubTopicAndSubscriptionOptions.java | 44 +
.../common/ExamplePubsubTopicOptions.java | 44 +
.../examples/common/PubsubFileInjector.java | 153 +
.../examples/complete/AutoComplete.java | 516 +++
.../cloud/dataflow/examples/complete/README.md | 44 +
.../examples/complete/StreamingWordExtract.java | 163 +
.../cloud/dataflow/examples/complete/TfIdf.java | 431 +++
.../examples/complete/TopWikipediaSessions.java | 223 ++
.../examples/complete/TrafficMaxLaneFlow.java | 425 +++
.../examples/complete/TrafficRoutes.java | 459 +++
.../examples/cookbook/BigQueryTornadoes.java | 179 ++
.../cookbook/CombinePerKeyExamples.java | 223 ++
.../examples/cookbook/DatastoreWordCount.java | 269 ++
.../examples/cookbook/DeDupExample.java | 100 +
.../examples/cookbook/FilterExamples.java | 266 ++
.../examples/cookbook/JoinExamples.java | 185 ++
.../examples/cookbook/MaxPerKeyExamples.java | 173 +
.../cloud/dataflow/examples/cookbook/README.md | 55 +
.../examples/cookbook/TriggerExample.java | 564 ++++
.../examples/DebuggingWordCountTest.java | 45 +
.../cloud/dataflow/examples/WordCountTest.java | 85 +
.../examples/complete/AutoCompleteTest.java | 181 ++
.../dataflow/examples/complete/TfIdfTest.java | 67 +
.../complete/TopWikipediaSessionsTest.java | 62 +
.../cookbook/BigQueryTornadoesTest.java | 80 +
.../cookbook/CombinePerKeyExamplesTest.java | 90 +
.../examples/cookbook/DeDupExampleTest.java | 83 +
.../examples/cookbook/FilterExamplesTest.java | 85 +
.../examples/cookbook/JoinExamplesTest.java | 114 +
.../cookbook/MaxPerKeyExamplesTest.java | 85 +
.../examples/cookbook/TriggerExampleTest.java | 139 +
examples/java8/pom.xml | 279 ++
.../examples/MinimalWordCountJava8.java | 68 +
.../examples/complete/game/GameStats.java | 339 ++
.../examples/complete/game/HourlyTeamScore.java | 193 ++
.../examples/complete/game/LeaderBoard.java | 237 ++
.../dataflow/examples/complete/game/README.md | 113 +
.../examples/complete/game/UserScore.java | 239 ++
.../complete/game/injector/Injector.java | 415 +++
.../complete/game/injector/InjectorUtils.java | 101 +
.../injector/RetryHttpInitializerWrapper.java | 126 +
.../complete/game/utils/WriteToBigQuery.java | 134 +
.../game/utils/WriteWindowedToBigQuery.java | 76 +
.../examples/MinimalWordCountJava8Test.java | 103 +
.../examples/complete/game/GameStatsTest.java | 76 +
.../complete/game/HourlyTeamScoreTest.java | 111 +
.../examples/complete/game/UserScoreTest.java | 154 +
examples/pom.xml | 394 ---
.../dataflow/examples/DebuggingWordCount.java | 182 --
.../dataflow/examples/MinimalWordCount.java | 117 -
.../dataflow/examples/WindowedWordCount.java | 269 --
.../cloud/dataflow/examples/WordCount.java | 206 --
.../examples/common/DataflowExampleOptions.java | 34 -
.../examples/common/DataflowExampleUtils.java | 485 ---
.../common/ExampleBigQueryTableOptions.java | 53 -
...xamplePubsubTopicAndSubscriptionOptions.java | 44 -
.../common/ExamplePubsubTopicOptions.java | 44 -
.../examples/common/PubsubFileInjector.java | 153 -
.../examples/complete/AutoComplete.java | 516 ---
.../cloud/dataflow/examples/complete/README.md | 44 -
.../examples/complete/StreamingWordExtract.java | 163 -
.../cloud/dataflow/examples/complete/TfIdf.java | 431 ---
.../examples/complete/TopWikipediaSessions.java | 223 --
.../examples/complete/TrafficMaxLaneFlow.java | 425 ---
.../examples/complete/TrafficRoutes.java | 459 ---
.../examples/cookbook/BigQueryTornadoes.java | 179 --
.../cookbook/CombinePerKeyExamples.java | 223 --
.../examples/cookbook/DatastoreWordCount.java | 269 --
.../examples/cookbook/DeDupExample.java | 100 -
.../examples/cookbook/FilterExamples.java | 266 --
.../examples/cookbook/JoinExamples.java | 185 --
.../examples/cookbook/MaxPerKeyExamples.java | 173 -
.../cloud/dataflow/examples/cookbook/README.md | 55 -
.../examples/cookbook/TriggerExample.java | 564 ----
.../examples/DebuggingWordCountTest.java | 45 -
.../cloud/dataflow/examples/WordCountTest.java | 85 -
.../examples/complete/AutoCompleteTest.java | 181 --
.../dataflow/examples/complete/TfIdfTest.java | 67 -
.../complete/TopWikipediaSessionsTest.java | 62 -
.../cookbook/BigQueryTornadoesTest.java | 80 -
.../cookbook/CombinePerKeyExamplesTest.java | 90 -
.../examples/cookbook/DeDupExampleTest.java | 83 -
.../examples/cookbook/FilterExamplesTest.java | 85 -
.../examples/cookbook/JoinExamplesTest.java | 114 -
.../cookbook/MaxPerKeyExamplesTest.java | 85 -
.../examples/cookbook/TriggerExampleTest.java | 139 -
java8examples/pom.xml | 278 --
.../examples/MinimalWordCountJava8.java | 68 -
.../examples/complete/game/GameStats.java | 339 --
.../examples/complete/game/HourlyTeamScore.java | 193 --
.../examples/complete/game/LeaderBoard.java | 237 --
.../dataflow/examples/complete/game/README.md | 113 -
.../examples/complete/game/UserScore.java | 239 --
.../complete/game/injector/Injector.java | 415 ---
.../complete/game/injector/InjectorUtils.java | 101 -
.../injector/RetryHttpInitializerWrapper.java | 126 -
.../complete/game/utils/WriteToBigQuery.java | 134 -
.../game/utils/WriteWindowedToBigQuery.java | 76 -
.../examples/MinimalWordCountJava8Test.java | 103 -
.../examples/complete/game/GameStatsTest.java | 76 -
.../complete/game/HourlyTeamScoreTest.java | 111 -
.../examples/complete/game/UserScoreTest.java | 154 -
java8tests/pom.xml | 183 --
.../sdk/transforms/CombineJava8Test.java | 133 -
.../sdk/transforms/FilterJava8Test.java | 118 -
.../transforms/FlatMapElementsJava8Test.java | 84 -
.../sdk/transforms/MapElementsJava8Test.java | 77 -
.../sdk/transforms/PartitionJava8Test.java | 74 -
.../transforms/RemoveDuplicatesJava8Test.java | 98 -
.../sdk/transforms/WithKeysJava8Test.java | 73 -
.../sdk/transforms/WithTimestampsJava8Test.java | 65 -
javadoc/README.md | 4 -
javadoc/apiclient-docs/package-list | 34 -
javadoc/avro-docs/package-list | 30 -
javadoc/bq-docs/package-list | 2 -
javadoc/dataflow-sdk-docs/package-list | 11 -
javadoc/datastore-docs/package-list | 2 -
javadoc/guava-docs/package-list | 15 -
javadoc/hamcrest-docs/package-list | 10 -
javadoc/jackson-annotations-docs/package-list | 1 -
javadoc/jackson-databind-docs/package-list | 20 -
javadoc/joda-docs/package-list | 7 -
javadoc/junit-docs/package-list | 7 -
javadoc/oauth-docs/package-list | 11 -
javadoc/overview.html | 31 -
maven-archetypes/examples/pom.xml | 56 -
.../META-INF/maven/archetype-metadata.xml | 29 -
.../main/resources/archetype-resources/pom.xml | 204 --
.../src/main/java/DebuggingWordCount.java | 182 --
.../src/main/java/MinimalWordCount.java | 115 -
.../src/main/java/WindowedWordCount.java | 262 --
.../src/main/java/WordCount.java | 204 --
.../java/common/DataflowExampleOptions.java | 29 -
.../main/java/common/DataflowExampleUtils.java | 398 ---
.../common/ExampleBigQueryTableOptions.java | 53 -
.../java/common/ExamplePubsubTopicOptions.java | 49 -
.../main/java/common/PubsubFileInjector.java | 153 -
.../src/test/java/DebuggingWordCountTest.java | 44 -
.../src/test/java/WordCountTest.java | 85 -
.../projects/basic/archetype.properties | 5 -
.../src/test/resources/projects/basic/goal.txt | 1 -
maven-archetypes/starter/pom.xml | 56 -
.../META-INF/maven/archetype-metadata.xml | 21 -
.../main/resources/archetype-resources/pom.xml | 43 -
.../src/main/java/StarterPipeline.java | 67 -
.../projects/basic/archetype.properties | 5 -
.../src/test/resources/projects/basic/goal.txt | 1 -
.../resources/projects/basic/reference/pom.xml | 43 -
.../src/main/java/it/pkg/StarterPipeline.java | 67 -
pom.xml | 117 +-
runners/flink/examples/pom.xml | 52 +-
runners/flink/pom.xml | 55 +-
runners/flink/runner/pom.xml | 57 +-
runners/pom.xml | 45 +-
runners/spark/pom.xml | 101 +-
sdk/pom.xml | 770 -----
.../com/google/cloud/dataflow/sdk/Pipeline.java | 502 ---
.../cloud/dataflow/sdk/PipelineResult.java | 95 -
.../dataflow/sdk/annotations/Experimental.java | 80 -
.../dataflow/sdk/annotations/package-info.java | 20 -
.../cloud/dataflow/sdk/coders/AtomicCoder.java | 51 -
.../cloud/dataflow/sdk/coders/AvroCoder.java | 714 -----
.../sdk/coders/BigEndianIntegerCoder.java | 99 -
.../dataflow/sdk/coders/BigEndianLongCoder.java | 99 -
.../dataflow/sdk/coders/ByteArrayCoder.java | 138 -
.../cloud/dataflow/sdk/coders/ByteCoder.java | 111 -
.../dataflow/sdk/coders/ByteStringCoder.java | 106 -
.../sdk/coders/CannotProvideCoderException.java | 95 -
.../google/cloud/dataflow/sdk/coders/Coder.java | 298 --
.../dataflow/sdk/coders/CoderException.java | 36 -
.../dataflow/sdk/coders/CoderFactories.java | 274 --
.../cloud/dataflow/sdk/coders/CoderFactory.java | 43 -
.../dataflow/sdk/coders/CoderProvider.java | 33 -
.../dataflow/sdk/coders/CoderProviders.java | 164 -
.../dataflow/sdk/coders/CoderRegistry.java | 843 -----
.../dataflow/sdk/coders/CollectionCoder.java | 73 -
.../cloud/dataflow/sdk/coders/CustomCoder.java | 137 -
.../cloud/dataflow/sdk/coders/DefaultCoder.java | 66 -
.../dataflow/sdk/coders/DelegateCoder.java | 164 -
.../sdk/coders/DeterministicStandardCoder.java | 38 -
.../cloud/dataflow/sdk/coders/DoubleCoder.java | 113 -
.../dataflow/sdk/coders/DurationCoder.java | 97 -
.../cloud/dataflow/sdk/coders/EntityCoder.java | 86 -
.../cloud/dataflow/sdk/coders/InstantCoder.java | 113 -
.../dataflow/sdk/coders/IterableCoder.java | 78 -
.../dataflow/sdk/coders/IterableLikeCoder.java | 278 --
.../cloud/dataflow/sdk/coders/JAXBCoder.java | 135 -
.../cloud/dataflow/sdk/coders/KvCoder.java | 162 -
.../cloud/dataflow/sdk/coders/KvCoderBase.java | 61 -
.../cloud/dataflow/sdk/coders/ListCoder.java | 77 -
.../cloud/dataflow/sdk/coders/MapCoder.java | 160 -
.../cloud/dataflow/sdk/coders/MapCoderBase.java | 54 -
.../dataflow/sdk/coders/NullableCoder.java | 175 -
.../cloud/dataflow/sdk/coders/Proto2Coder.java | 361 ---
.../dataflow/sdk/coders/SerializableCoder.java | 183 --
.../cloud/dataflow/sdk/coders/SetCoder.java | 94 -
.../dataflow/sdk/coders/StandardCoder.java | 229 --
.../sdk/coders/StringDelegateCoder.java | 86 -
.../dataflow/sdk/coders/StringUtf8Coder.java | 139 -
.../sdk/coders/StructuralByteArray.java | 56 -
.../dataflow/sdk/coders/TableRowJsonCoder.java | 82 -
.../sdk/coders/TextualIntegerCoder.java | 69 -
.../cloud/dataflow/sdk/coders/VarIntCoder.java | 97 -
.../cloud/dataflow/sdk/coders/VarLongCoder.java | 96 -
.../cloud/dataflow/sdk/coders/VoidCoder.java | 76 -
.../cloud/dataflow/sdk/coders/package-info.java | 44 -
.../sdk/coders/protobuf/ProtoCoder.java | 404 ---
.../sdk/coders/protobuf/ProtobufUtil.java | 171 -
.../sdk/coders/protobuf/package-info.java | 23 -
.../google/cloud/dataflow/sdk/io/AvroIO.java | 810 -----
.../cloud/dataflow/sdk/io/AvroSource.java | 647 ----
.../cloud/dataflow/sdk/io/BigQueryIO.java | 1499 ---------
.../cloud/dataflow/sdk/io/BlockBasedSource.java | 237 --
.../sdk/io/BoundedReadFromUnboundedSource.java | 271 --
.../cloud/dataflow/sdk/io/BoundedSource.java | 277 --
.../cloud/dataflow/sdk/io/CompressedSource.java | 413 ---
.../cloud/dataflow/sdk/io/CountingInput.java | 191 --
.../cloud/dataflow/sdk/io/CountingSource.java | 397 ---
.../cloud/dataflow/sdk/io/DatastoreIO.java | 957 ------
.../cloud/dataflow/sdk/io/FileBasedSink.java | 864 -----
.../cloud/dataflow/sdk/io/FileBasedSource.java | 648 ----
.../dataflow/sdk/io/OffsetBasedSource.java | 326 --
.../google/cloud/dataflow/sdk/io/PubsubIO.java | 1044 ------
.../com/google/cloud/dataflow/sdk/io/Read.java | 253 --
.../dataflow/sdk/io/ShardNameTemplate.java | 75 -
.../com/google/cloud/dataflow/sdk/io/Sink.java | 252 --
.../google/cloud/dataflow/sdk/io/Source.java | 193 --
.../google/cloud/dataflow/sdk/io/TextIO.java | 992 ------
.../cloud/dataflow/sdk/io/UnboundedSource.java | 253 --
.../com/google/cloud/dataflow/sdk/io/Write.java | 213 --
.../google/cloud/dataflow/sdk/io/XmlSink.java | 310 --
.../google/cloud/dataflow/sdk/io/XmlSource.java | 541 ----
.../dataflow/sdk/io/bigtable/BigtableIO.java | 987 ------
.../sdk/io/bigtable/BigtableService.java | 108 -
.../sdk/io/bigtable/BigtableServiceImpl.java | 241 --
.../dataflow/sdk/io/bigtable/package-info.java | 22 -
.../cloud/dataflow/sdk/io/package-info.java | 37 -
.../cloud/dataflow/sdk/io/range/ByteKey.java | 173 -
.../dataflow/sdk/io/range/ByteKeyRange.java | 376 ---
.../sdk/io/range/ByteKeyRangeTracker.java | 117 -
.../sdk/io/range/OffsetRangeTracker.java | 182 --
.../dataflow/sdk/io/range/RangeTracker.java | 220 --
.../dataflow/sdk/io/range/package-info.java | 23 -
.../sdk/options/ApplicationNameOptions.java | 33 -
.../dataflow/sdk/options/BigQueryOptions.java | 31 -
.../BlockingDataflowPipelineOptions.java | 49 -
.../sdk/options/CloudDebuggerOptions.java | 43 -
.../options/DataflowPipelineDebugOptions.java | 259 --
.../sdk/options/DataflowPipelineOptions.java | 114 -
.../DataflowPipelineWorkerPoolOptions.java | 254 --
.../sdk/options/DataflowProfilingOptions.java | 46 -
.../options/DataflowWorkerHarnessOptions.java | 50 -
.../options/DataflowWorkerLoggingOptions.java | 153 -
.../cloud/dataflow/sdk/options/Default.java | 153 -
.../sdk/options/DefaultValueFactory.java | 38 -
.../cloud/dataflow/sdk/options/Description.java | 35 -
.../sdk/options/DirectPipelineOptions.java | 75 -
.../cloud/dataflow/sdk/options/GcpOptions.java | 291 --
.../cloud/dataflow/sdk/options/GcsOptions.java | 113 -
.../sdk/options/GoogleApiDebugOptions.java | 87 -
.../cloud/dataflow/sdk/options/Hidden.java | 33 -
.../dataflow/sdk/options/PipelineOptions.java | 263 --
.../sdk/options/PipelineOptionsFactory.java | 1537 ---------
.../sdk/options/PipelineOptionsRegistrar.java | 36 -
.../sdk/options/PipelineOptionsValidator.java | 102 -
.../sdk/options/ProxyInvocationHandler.java | 441 ---
.../dataflow/sdk/options/StreamingOptions.java | 30 -
.../cloud/dataflow/sdk/options/Validation.java | 46 -
.../dataflow/sdk/options/package-info.java | 25 -
.../google/cloud/dataflow/sdk/package-info.java | 33 -
.../runners/AggregatorPipelineExtractor.java | 97 -
.../runners/AggregatorRetrievalException.java | 32 -
.../dataflow/sdk/runners/AggregatorValues.java | 52 -
.../runners/BlockingDataflowPipelineRunner.java | 181 --
.../DataflowJobAlreadyExistsException.java | 34 -
.../DataflowJobAlreadyUpdatedException.java | 33 -
.../runners/DataflowJobCancelledException.java | 38 -
.../sdk/runners/DataflowJobException.java | 40 -
.../runners/DataflowJobExecutionException.java | 34 -
.../runners/DataflowJobUpdatedException.java | 51 -
.../dataflow/sdk/runners/DataflowPipeline.java | 59 -
.../sdk/runners/DataflowPipelineJob.java | 389 ---
.../sdk/runners/DataflowPipelineRegistrar.java | 58 -
.../sdk/runners/DataflowPipelineRunner.java | 3007 ------------------
.../runners/DataflowPipelineRunnerHooks.java | 37 -
.../sdk/runners/DataflowPipelineTranslator.java | 1104 -------
.../sdk/runners/DataflowServiceException.java | 32 -
.../dataflow/sdk/runners/DirectPipeline.java | 55 -
.../sdk/runners/DirectPipelineRegistrar.java | 53 -
.../sdk/runners/DirectPipelineRunner.java | 1156 -------
.../dataflow/sdk/runners/PipelineRunner.java | 76 -
.../sdk/runners/PipelineRunnerRegistrar.java | 40 -
.../sdk/runners/RecordingPipelineVisitor.java | 54 -
.../sdk/runners/TransformHierarchy.java | 104 -
.../dataflow/sdk/runners/TransformTreeNode.java | 252 --
.../sdk/runners/dataflow/AssignWindows.java | 88 -
.../runners/dataflow/BigQueryIOTranslator.java | 125 -
.../sdk/runners/dataflow/CustomSources.java | 118 -
.../dataflow/DataflowAggregatorTransforms.java | 79 -
.../dataflow/DataflowMetricUpdateExtractor.java | 110 -
.../runners/dataflow/PubsubIOTranslator.java | 107 -
.../sdk/runners/dataflow/ReadTranslator.java | 103 -
.../sdk/runners/dataflow/package-info.java | 20 -
.../inprocess/BoundedReadEvaluatorFactory.java | 152 -
.../CachedThreadPoolExecutorServiceFactory.java | 42 -
.../dataflow/sdk/runners/inprocess/Clock.java | 29 -
.../runners/inprocess/CompletionCallback.java | 33 -
.../ConsumerTrackingPipelineVisitor.java | 173 -
.../inprocess/EmptyTransformEvaluator.java | 49 -
.../sdk/runners/inprocess/EvaluatorKey.java | 55 -
.../inprocess/ExecutorServiceFactory.java | 32 -
.../ExecutorServiceParallelExecutor.java | 432 ---
.../inprocess/FlattenEvaluatorFactory.java | 83 -
.../runners/inprocess/ForwardingPTransform.java | 54 -
.../inprocess/GroupByKeyEvaluatorFactory.java | 252 --
.../inprocess/InMemoryWatermarkManager.java | 1310 --------
.../sdk/runners/inprocess/InProcessBundle.java | 121 -
.../inprocess/InProcessBundleOutputManager.java | 50 -
.../sdk/runners/inprocess/InProcessCreate.java | 209 --
.../inprocess/InProcessEvaluationContext.java | 405 ---
.../inprocess/InProcessExecutionContext.java | 106 -
.../runners/inprocess/InProcessExecutor.java | 46 -
.../inprocess/InProcessPipelineOptions.java | 90 -
.../inprocess/InProcessPipelineRunner.java | 343 --
.../inprocess/InProcessSideInputContainer.java | 230 --
.../inprocess/InProcessTimerInternals.java | 84 -
.../inprocess/InProcessTransformResult.java | 75 -
.../inprocess/KeyedPValueTrackingVisitor.java | 95 -
.../sdk/runners/inprocess/NanosOffsetClock.java | 58 -
.../inprocess/ParDoInProcessEvaluator.java | 109 -
.../inprocess/ParDoMultiEvaluatorFactory.java | 90 -
.../inprocess/ParDoSingleEvaluatorFactory.java | 87 -
.../sdk/runners/inprocess/StepAndKey.java | 68 -
.../runners/inprocess/StepTransformResult.java | 157 -
.../runners/inprocess/TransformEvaluator.java | 45 -
.../inprocess/TransformEvaluatorFactory.java | 42 -
.../inprocess/TransformEvaluatorRegistry.java | 72 -
.../runners/inprocess/TransformExecutor.java | 114 -
.../inprocess/TransformExecutorService.java | 34 -
.../inprocess/TransformExecutorServices.java | 153 -
.../UnboundedReadEvaluatorFactory.java | 168 -
.../runners/inprocess/ViewEvaluatorFactory.java | 121 -
.../inprocess/WatermarkCallbackExecutor.java | 143 -
.../dataflow/sdk/runners/package-info.java | 33 -
.../dataflow/sdk/runners/worker/IsmFormat.java | 946 ------
.../sdk/runners/worker/package-info.java | 24 -
.../dataflow/sdk/testing/CoderProperties.java | 349 --
.../dataflow/sdk/testing/DataflowAssert.java | 825 -----
.../dataflow/sdk/testing/RunnableOnService.java | 30 -
.../sdk/testing/SerializableMatcher.java | 36 -
.../sdk/testing/SerializableMatchers.java | 1180 -------
.../dataflow/sdk/testing/SourceTestUtils.java | 642 ----
.../testing/TestDataflowPipelineOptions.java | 26 -
.../sdk/testing/TestDataflowPipelineRunner.java | 220 --
.../dataflow/sdk/testing/TestPipeline.java | 193 --
.../dataflow/sdk/testing/WindowFnTestUtils.java | 325 --
.../dataflow/sdk/testing/package-info.java | 21 -
.../dataflow/sdk/transforms/Aggregator.java | 78 -
.../sdk/transforms/AggregatorRetriever.java | 36 -
.../sdk/transforms/AppliedPTransform.java | 100 -
.../sdk/transforms/ApproximateQuantiles.java | 766 -----
.../sdk/transforms/ApproximateUnique.java | 419 ---
.../cloud/dataflow/sdk/transforms/Combine.java | 2240 -------------
.../dataflow/sdk/transforms/CombineFnBase.java | 283 --
.../dataflow/sdk/transforms/CombineFns.java | 1100 -------
.../sdk/transforms/CombineWithContext.java | 277 --
.../cloud/dataflow/sdk/transforms/Count.java | 135 -
.../cloud/dataflow/sdk/transforms/Create.java | 426 ---
.../cloud/dataflow/sdk/transforms/DoFn.java | 563 ----
.../dataflow/sdk/transforms/DoFnReflector.java | 668 ----
.../dataflow/sdk/transforms/DoFnTester.java | 495 ---
.../sdk/transforms/DoFnWithContext.java | 416 ---
.../cloud/dataflow/sdk/transforms/Filter.java | 234 --
.../sdk/transforms/FlatMapElements.java | 145 -
.../cloud/dataflow/sdk/transforms/Flatten.java | 219 --
.../dataflow/sdk/transforms/GroupByKey.java | 575 ----
.../transforms/IntraBundleParallelization.java | 346 --
.../cloud/dataflow/sdk/transforms/Keys.java | 68 -
.../cloud/dataflow/sdk/transforms/KvSwap.java | 73 -
.../dataflow/sdk/transforms/MapElements.java | 112 -
.../cloud/dataflow/sdk/transforms/Max.java | 255 --
.../cloud/dataflow/sdk/transforms/Mean.java | 202 --
.../cloud/dataflow/sdk/transforms/Min.java | 255 --
.../dataflow/sdk/transforms/PTransform.java | 324 --
.../cloud/dataflow/sdk/transforms/ParDo.java | 1321 --------
.../dataflow/sdk/transforms/Partition.java | 173 -
.../sdk/transforms/RemoveDuplicates.java | 158 -
.../cloud/dataflow/sdk/transforms/Sample.java | 246 --
.../sdk/transforms/SerializableComparator.java | 28 -
.../sdk/transforms/SerializableFunction.java | 31 -
.../dataflow/sdk/transforms/SimpleFunction.java | 54 -
.../cloud/dataflow/sdk/transforms/Sum.java | 188 --
.../cloud/dataflow/sdk/transforms/Top.java | 559 ----
.../cloud/dataflow/sdk/transforms/Values.java | 68 -
.../cloud/dataflow/sdk/transforms/View.java | 470 ---
.../cloud/dataflow/sdk/transforms/WithKeys.java | 140 -
.../dataflow/sdk/transforms/WithTimestamps.java | 129 -
.../cloud/dataflow/sdk/transforms/Write.java | 27 -
.../sdk/transforms/display/DisplayData.java | 530 ---
.../sdk/transforms/display/HasDisplayData.java | 53 -
.../sdk/transforms/join/CoGbkResult.java | 463 ---
.../sdk/transforms/join/CoGbkResultSchema.java | 134 -
.../sdk/transforms/join/CoGroupByKey.java | 211 --
.../transforms/join/KeyedPCollectionTuple.java | 247 --
.../sdk/transforms/join/RawUnionValue.java | 51 -
.../sdk/transforms/join/UnionCoder.java | 147 -
.../sdk/transforms/join/package-info.java | 21 -
.../dataflow/sdk/transforms/package-info.java | 43 -
.../sdk/transforms/windowing/AfterAll.java | 117 -
.../windowing/AfterDelayFromFirstElement.java | 322 --
.../sdk/transforms/windowing/AfterEach.java | 135 -
.../sdk/transforms/windowing/AfterFirst.java | 119 -
.../sdk/transforms/windowing/AfterPane.java | 145 -
.../windowing/AfterProcessingTime.java | 97 -
.../AfterSynchronizedProcessingTime.java | 75 -
.../transforms/windowing/AfterWatermark.java | 397 ---
.../sdk/transforms/windowing/BoundedWindow.java | 46 -
.../transforms/windowing/CalendarWindows.java | 348 --
.../transforms/windowing/DefaultTrigger.java | 95 -
.../sdk/transforms/windowing/FixedWindows.java | 116 -
.../sdk/transforms/windowing/GlobalWindow.java | 68 -
.../sdk/transforms/windowing/GlobalWindows.java | 63 -
.../transforms/windowing/IntervalWindow.java | 201 --
.../transforms/windowing/InvalidWindows.java | 87 -
.../MergeOverlappingIntervalWindows.java | 86 -
.../windowing/NonMergingWindowFn.java | 35 -
.../transforms/windowing/OrFinallyTrigger.java | 100 -
.../sdk/transforms/windowing/OutputTimeFn.java | 319 --
.../sdk/transforms/windowing/OutputTimeFns.java | 168 -
.../sdk/transforms/windowing/PaneInfo.java | 384 ---
.../windowing/PartitioningWindowFn.java | 61 -
.../sdk/transforms/windowing/Repeatedly.java | 100 -
.../sdk/transforms/windowing/Sessions.java | 112 -
.../transforms/windowing/SlidingWindows.java | 214 --
.../sdk/transforms/windowing/Trigger.java | 544 ----
.../transforms/windowing/TriggerBuilder.java | 29 -
.../sdk/transforms/windowing/Window.java | 662 ----
.../sdk/transforms/windowing/WindowFn.java | 221 --
.../sdk/transforms/windowing/package-info.java | 49 -
.../dataflow/sdk/util/ActiveWindowSet.java | 171 -
.../cloud/dataflow/sdk/util/ApiSurface.java | 642 ----
.../dataflow/sdk/util/AppEngineEnvironment.java | 61 -
.../dataflow/sdk/util/AppliedCombineFn.java | 130 -
.../dataflow/sdk/util/AssignWindowsDoFn.java | 67 -
...AttemptAndTimeBoundedExponentialBackOff.java | 168 -
.../util/AttemptBoundedExponentialBackOff.java | 83 -
.../cloud/dataflow/sdk/util/AvroUtils.java | 345 --
.../dataflow/sdk/util/BaseExecutionContext.java | 155 -
.../dataflow/sdk/util/BatchTimerInternals.java | 138 -
.../sdk/util/BigQueryTableInserter.java | 434 ---
.../sdk/util/BigQueryTableRowIterator.java | 469 ---
.../cloud/dataflow/sdk/util/BitSetCoder.java | 59 -
.../BufferedElementCountingOutputStream.java | 184 --
.../cloud/dataflow/sdk/util/CloudKnownType.java | 138 -
.../cloud/dataflow/sdk/util/CloudObject.java | 184 --
.../cloud/dataflow/sdk/util/CoderUtils.java | 327 --
.../sdk/util/CombineContextFactory.java | 107 -
.../cloud/dataflow/sdk/util/CombineFnUtil.java | 154 -
.../dataflow/sdk/util/CounterAggregator.java | 96 -
.../dataflow/sdk/util/CredentialFactory.java | 29 -
.../cloud/dataflow/sdk/util/Credentials.java | 192 --
.../sdk/util/DataflowPathValidator.java | 97 -
.../dataflow/sdk/util/DataflowReleaseInfo.java | 87 -
.../sdk/util/DirectModeExecutionContext.java | 130 -
.../sdk/util/DirectSideInputReader.java | 73 -
.../cloud/dataflow/sdk/util/DoFnInfo.java | 67 -
.../cloud/dataflow/sdk/util/DoFnRunner.java | 60 -
.../cloud/dataflow/sdk/util/DoFnRunnerBase.java | 558 ----
.../cloud/dataflow/sdk/util/DoFnRunners.java | 142 -
.../dataflow/sdk/util/ExecutableTrigger.java | 159 -
.../dataflow/sdk/util/ExecutionContext.java | 102 -
.../sdk/util/ExposedByteArrayInputStream.java | 51 -
.../sdk/util/ExposedByteArrayOutputStream.java | 115 -
.../dataflow/sdk/util/FileIOChannelFactory.java | 135 -
.../dataflow/sdk/util/FinishedTriggers.java | 42 -
.../sdk/util/FinishedTriggersBitSet.java | 68 -
.../dataflow/sdk/util/FinishedTriggersSet.java | 74 -
.../dataflow/sdk/util/GcpCredentialFactory.java | 45 -
.../dataflow/sdk/util/GcsIOChannelFactory.java | 86 -
.../cloud/dataflow/sdk/util/GcsStager.java | 53 -
.../google/cloud/dataflow/sdk/util/GcsUtil.java | 406 ---
.../util/GroupAlsoByWindowViaWindowSetDoFn.java | 104 -
.../sdk/util/GroupAlsoByWindowsDoFn.java | 58 -
.../GroupAlsoByWindowsViaOutputBufferDoFn.java | 98 -
.../dataflow/sdk/util/IOChannelFactory.java | 101 -
.../cloud/dataflow/sdk/util/IOChannelUtils.java | 204 --
.../sdk/util/IllegalMutationException.java | 52 -
.../dataflow/sdk/util/InstanceBuilder.java | 269 --
.../util/IntervalBoundedExponentialBackOff.java | 87 -
.../cloud/dataflow/sdk/util/KeyedWorkItem.java | 41 -
.../dataflow/sdk/util/KeyedWorkItemCoder.java | 120 -
.../cloud/dataflow/sdk/util/KeyedWorkItems.java | 120 -
.../sdk/util/LateDataDroppingDoFnRunner.java | 145 -
.../dataflow/sdk/util/MapAggregatorValues.java | 48 -
.../sdk/util/MergingActiveWindowSet.java | 544 ----
.../cloud/dataflow/sdk/util/MimeTypes.java | 23 -
.../cloud/dataflow/sdk/util/MonitoringUtil.java | 233 --
.../dataflow/sdk/util/MutationDetector.java | 31 -
.../dataflow/sdk/util/MutationDetectors.java | 182 --
.../cloud/dataflow/sdk/util/NonEmptyPanes.java | 148 -
.../sdk/util/NonMergingActiveWindowSet.java | 85 -
.../sdk/util/NoopCredentialFactory.java | 38 -
.../dataflow/sdk/util/NoopPathValidator.java | 48 -
.../dataflow/sdk/util/NullSideInputReader.java | 61 -
.../dataflow/sdk/util/OutputReference.java | 42 -
.../sdk/util/PCollectionViewWindow.java | 67 -
.../dataflow/sdk/util/PCollectionViews.java | 426 ---
.../google/cloud/dataflow/sdk/util/PTuple.java | 160 -
.../cloud/dataflow/sdk/util/PackageUtil.java | 327 --
.../dataflow/sdk/util/PaneInfoTracker.java | 151 -
.../cloud/dataflow/sdk/util/PathValidator.java | 47 -
.../sdk/util/PerKeyCombineFnRunner.java | 147 -
.../sdk/util/PerKeyCombineFnRunners.java | 257 --
.../cloud/dataflow/sdk/util/PropertyNames.java | 107 -
.../dataflow/sdk/util/RandomAccessData.java | 352 --
.../cloud/dataflow/sdk/util/ReduceFn.java | 128 -
.../sdk/util/ReduceFnContextFactory.java | 495 ---
.../cloud/dataflow/sdk/util/ReduceFnRunner.java | 843 -----
.../sdk/util/ReifyTimestampAndWindowsDoFn.java | 46 -
.../cloud/dataflow/sdk/util/Reshuffle.java | 145 -
.../dataflow/sdk/util/ReshuffleTrigger.java | 61 -
.../sdk/util/RetryHttpRequestInitializer.java | 250 --
.../dataflow/sdk/util/SerializableUtils.java | 159 -
.../cloud/dataflow/sdk/util/Serializer.java | 145 -
.../sdk/util/ShardingWritableByteChannel.java | 118 -
.../dataflow/sdk/util/SideInputReader.java | 48 -
.../dataflow/sdk/util/SimpleDoFnRunner.java | 55 -
.../google/cloud/dataflow/sdk/util/Stager.java | 29 -
.../cloud/dataflow/sdk/util/StreamUtils.java | 68 -
.../cloud/dataflow/sdk/util/StringUtils.java | 242 --
.../google/cloud/dataflow/sdk/util/Structs.java | 384 ---
.../dataflow/sdk/util/SystemDoFnInternal.java | 37 -
.../cloud/dataflow/sdk/util/SystemReduceFn.java | 133 -
.../cloud/dataflow/sdk/util/TestCredential.java | 51 -
.../cloud/dataflow/sdk/util/TimeDomain.java | 41 -
.../cloud/dataflow/sdk/util/TimeUtil.java | 164 -
.../cloud/dataflow/sdk/util/TimerInternals.java | 269 --
.../google/cloud/dataflow/sdk/util/Timers.java | 60 -
.../cloud/dataflow/sdk/util/Transport.java | 205 --
.../sdk/util/TriggerContextFactory.java | 522 ---
.../cloud/dataflow/sdk/util/TriggerRunner.java | 223 --
.../dataflow/sdk/util/UnownedInputStream.java | 76 -
.../dataflow/sdk/util/UnownedOutputStream.java | 56 -
.../sdk/util/UploadIdResponseInterceptor.java | 61 -
.../dataflow/sdk/util/UserCodeException.java | 94 -
.../dataflow/sdk/util/ValueWithRecordId.java | 154 -
.../google/cloud/dataflow/sdk/util/Values.java | 88 -
.../google/cloud/dataflow/sdk/util/VarInt.java | 115 -
.../cloud/dataflow/sdk/util/WatermarkHold.java | 450 ---
.../cloud/dataflow/sdk/util/Weighted.java | 27 -
.../cloud/dataflow/sdk/util/WeightedValue.java | 45 -
.../cloud/dataflow/sdk/util/WindowTracing.java | 36 -
.../cloud/dataflow/sdk/util/WindowedValue.java | 720 -----
.../dataflow/sdk/util/WindowingInternals.java | 82 -
.../dataflow/sdk/util/WindowingStrategy.java | 268 --
.../cloud/dataflow/sdk/util/ZipFiles.java | 294 --
.../cloud/dataflow/sdk/util/common/Counter.java | 1103 -------
.../sdk/util/common/CounterProvider.java | 26 -
.../dataflow/sdk/util/common/CounterSet.java | 177 --
.../util/common/ElementByteSizeObservable.java | 41 -
.../ElementByteSizeObservableIterable.java | 63 -
.../ElementByteSizeObservableIterator.java | 36 -
.../util/common/ElementByteSizeObserver.java | 92 -
.../sdk/util/common/PeekingReiterator.java | 98 -
.../sdk/util/common/ReflectHelpers.java | 209 --
.../dataflow/sdk/util/common/Reiterable.java | 27 -
.../dataflow/sdk/util/common/Reiterator.java | 39 -
.../dataflow/sdk/util/common/package-info.java | 18 -
.../sdk/util/common/worker/StateSampler.java | 365 ---
.../sdk/util/common/worker/package-info.java | 18 -
.../cloud/dataflow/sdk/util/gcsfs/GcsPath.java | 619 ----
.../dataflow/sdk/util/gcsfs/package-info.java | 18 -
.../cloud/dataflow/sdk/util/package-info.java | 18 -
.../util/state/AccumulatorCombiningState.java | 51 -
.../cloud/dataflow/sdk/util/state/BagState.java | 26 -
.../dataflow/sdk/util/state/CombiningState.java | 40 -
.../CopyOnAccessInMemoryStateInternals.java | 454 ---
.../sdk/util/state/InMemoryStateInternals.java | 414 ---
.../sdk/util/state/MergingStateAccessor.java | 40 -
.../dataflow/sdk/util/state/ReadableState.java | 53 -
.../cloud/dataflow/sdk/util/state/State.java | 30 -
.../dataflow/sdk/util/state/StateAccessor.java | 36 -
.../dataflow/sdk/util/state/StateContext.java | 41 -
.../dataflow/sdk/util/state/StateContexts.java | 107 -
.../dataflow/sdk/util/state/StateInternals.java | 55 -
.../dataflow/sdk/util/state/StateMerging.java | 254 --
.../dataflow/sdk/util/state/StateNamespace.java | 54 -
.../sdk/util/state/StateNamespaceForTest.java | 63 -
.../sdk/util/state/StateNamespaces.java | 277 --
.../dataflow/sdk/util/state/StateTable.java | 89 -
.../cloud/dataflow/sdk/util/state/StateTag.java | 96 -
.../dataflow/sdk/util/state/StateTags.java | 579 ----
.../dataflow/sdk/util/state/ValueState.java | 35 -
.../sdk/util/state/WatermarkHoldState.java | 42 -
.../google/cloud/dataflow/sdk/values/KV.java | 130 -
.../cloud/dataflow/sdk/values/PBegin.java | 87 -
.../cloud/dataflow/sdk/values/PCollection.java | 250 --
.../dataflow/sdk/values/PCollectionList.java | 238 --
.../dataflow/sdk/values/PCollectionTuple.java | 264 --
.../dataflow/sdk/values/PCollectionView.java | 64 -
.../google/cloud/dataflow/sdk/values/PDone.java | 47 -
.../cloud/dataflow/sdk/values/PInput.java | 56 -
.../cloud/dataflow/sdk/values/POutput.java | 76 -
.../dataflow/sdk/values/POutputValueBase.java | 102 -
.../cloud/dataflow/sdk/values/PValue.java | 38 -
.../cloud/dataflow/sdk/values/PValueBase.java | 155 -
.../dataflow/sdk/values/TimestampedValue.java | 155 -
.../cloud/dataflow/sdk/values/TupleTag.java | 196 --
.../cloud/dataflow/sdk/values/TupleTagList.java | 148 -
.../dataflow/sdk/values/TypeDescriptor.java | 351 --
.../cloud/dataflow/sdk/values/TypedPValue.java | 197 --
.../cloud/dataflow/sdk/values/package-info.java | 52 -
sdk/src/main/proto/README.md | 27 -
.../main/proto/proto2_coder_test_messages.proto | 51 -
.../google/cloud/dataflow/sdk/sdk.properties | 5 -
.../cloud/dataflow/sdk/DataflowMatchers.java | 65 -
.../google/cloud/dataflow/sdk/PipelineTest.java | 296 --
.../google/cloud/dataflow/sdk/TestUtils.java | 213 --
.../cloud/dataflow/sdk/WindowMatchers.java | 137 -
.../dataflow/sdk/coders/AvroCoderTest.java | 754 -----
.../sdk/coders/BigEndianIntegerCoderTest.java | 90 -
.../sdk/coders/BigEndianLongCoderTest.java | 94 -
.../dataflow/sdk/coders/ByteArrayCoderTest.java | 144 -
.../dataflow/sdk/coders/ByteCoderTest.java | 91 -
.../sdk/coders/ByteStringCoderTest.java | 121 -
.../dataflow/sdk/coders/CoderFactoriesTest.java | 100 -
.../dataflow/sdk/coders/CoderProvidersTest.java | 71 -
.../dataflow/sdk/coders/CoderRegistryTest.java | 521 ---
.../cloud/dataflow/sdk/coders/CoderTest.java | 78 -
.../sdk/coders/CollectionCoderTest.java | 93 -
.../dataflow/sdk/coders/CustomCoderTest.java | 135 -
.../dataflow/sdk/coders/DefaultCoderTest.java | 128 -
.../dataflow/sdk/coders/DelegateCoderTest.java | 141 -
.../dataflow/sdk/coders/DoubleCoderTest.java | 96 -
.../dataflow/sdk/coders/DurationCoderTest.java | 86 -
.../dataflow/sdk/coders/EntityCoderTest.java | 108 -
.../dataflow/sdk/coders/InstantCoderTest.java | 116 -
.../dataflow/sdk/coders/IterableCoderTest.java | 109 -
.../dataflow/sdk/coders/JAXBCoderTest.java | 99 -
.../cloud/dataflow/sdk/coders/KvCoderTest.java | 118 -
.../dataflow/sdk/coders/ListCoderTest.java | 134 -
.../cloud/dataflow/sdk/coders/MapCoderTest.java | 106 -
.../dataflow/sdk/coders/NullableCoderTest.java | 132 -
.../sdk/coders/PrintBase64Encodings.java | 81 -
.../dataflow/sdk/coders/Proto2CoderTest.java | 145 -
.../sdk/coders/SerializableCoderTest.java | 222 --
.../cloud/dataflow/sdk/coders/SetCoderTest.java | 86 -
.../dataflow/sdk/coders/StandardCoderTest.java | 176 -
.../sdk/coders/StringDelegateCoderTest.java | 72 -
.../sdk/coders/StringUtf8CoderTest.java | 80 -
.../sdk/coders/StructuralByteArrayTest.java | 39 -
.../sdk/coders/TableRowJsonCoderTest.java | 86 -
.../sdk/coders/TextualIntegerCoderTest.java | 90 -
.../dataflow/sdk/coders/VarIntCoderTest.java | 91 -
.../dataflow/sdk/coders/VarLongCoderTest.java | 94 -
.../sdk/coders/protobuf/ProtoCoderTest.java | 182 --
.../sdk/coders/protobuf/ProtobufUtilTest.java | 195 --
.../sdk/io/AvroIOGeneratedClassTest.java | 374 ---
.../cloud/dataflow/sdk/io/AvroIOTest.java | 226 --
.../cloud/dataflow/sdk/io/AvroSourceTest.java | 692 ----
.../cloud/dataflow/sdk/io/BigQueryIOTest.java | 445 ---
.../io/BoundedReadFromUnboundedSourceTest.java | 132 -
.../dataflow/sdk/io/CompressedSourceTest.java | 430 ---
.../dataflow/sdk/io/CountingInputTest.java | 125 -
.../dataflow/sdk/io/CountingSourceTest.java | 216 --
.../cloud/dataflow/sdk/io/DatastoreIOTest.java | 631 ----
.../dataflow/sdk/io/FileBasedSinkTest.java | 512 ---
.../dataflow/sdk/io/FileBasedSourceTest.java | 914 ------
.../dataflow/sdk/io/OffsetBasedSourceTest.java | 278 --
.../cloud/dataflow/sdk/io/PubsubIOTest.java | 233 --
.../google/cloud/dataflow/sdk/io/ReadTest.java | 144 -
.../cloud/dataflow/sdk/io/TextIOTest.java | 562 ----
.../google/cloud/dataflow/sdk/io/WriteTest.java | 341 --
.../cloud/dataflow/sdk/io/XmlSinkTest.java | 235 --
.../cloud/dataflow/sdk/io/XmlSourceTest.java | 822 -----
.../sdk/io/bigtable/BigtableIOTest.java | 688 ----
.../range/ByteKeyRangeEstimateFractionTest.java | 69 -
.../range/ByteKeyRangeInterpolateKeyTest.java | 73 -
.../dataflow/sdk/io/range/ByteKeyRangeTest.java | 396 ---
.../sdk/io/range/ByteKeyRangeTrackerTest.java | 118 -
.../dataflow/sdk/io/range/ByteKeyTest.java | 178 --
.../sdk/io/range/OffsetRangeTrackerTest.java | 186 --
.../com/google/cloud/dataflow/sdk/io/user.avsc | 10 -
.../DataflowPipelineDebugOptionsTest.java | 40 -
.../options/DataflowPipelineOptionsTest.java | 91 -
.../options/DataflowProfilingOptionsTest.java | 47 -
.../DataflowWorkerLoggingOptionsTest.java | 73 -
.../dataflow/sdk/options/GcpOptionsTest.java | 123 -
.../sdk/options/GoogleApiDebugOptionsTest.java | 147 -
.../sdk/options/PipelineOptionsFactoryTest.java | 1154 -------
.../sdk/options/PipelineOptionsTest.java | 126 -
.../options/PipelineOptionsValidatorTest.java | 310 --
.../sdk/options/ProxyInvocationHandlerTest.java | 691 ----
.../AggregatorPipelineExtractorTest.java | 228 --
.../BlockingDataflowPipelineRunnerTest.java | 301 --
.../sdk/runners/DataflowPipelineJobTest.java | 603 ----
.../runners/DataflowPipelineRegistrarTest.java | 72 -
.../sdk/runners/DataflowPipelineRunnerTest.java | 1368 --------
.../sdk/runners/DataflowPipelineTest.java | 44 -
.../runners/DataflowPipelineTranslatorTest.java | 889 ------
.../runners/DirectPipelineRegistrarTest.java | 69 -
.../sdk/runners/DirectPipelineRunnerTest.java | 210 --
.../sdk/runners/DirectPipelineTest.java | 34 -
.../sdk/runners/PipelineRunnerTest.java | 82 -
.../dataflow/sdk/runners/TransformTreeTest.java | 194 --
.../sdk/runners/dataflow/CustomSourcesTest.java | 273 --
.../runners/dataflow/TestCountingSource.java | 235 --
.../BoundedReadEvaluatorFactoryTest.java | 287 --
.../ConsumerTrackingPipelineVisitorTest.java | 233 --
.../inprocess/FlattenEvaluatorFactoryTest.java | 136 -
.../inprocess/ForwardingPTransformTest.java | 100 -
.../GroupByKeyEvaluatorFactoryTest.java | 178 --
.../inprocess/InMemoryWatermarkManagerTest.java | 1111 -------
.../runners/inprocess/InProcessBundleTest.java | 143 -
.../runners/inprocess/InProcessCreateTest.java | 199 --
.../InProcessEvaluationContextTest.java | 544 ----
.../inprocess/InProcessPipelineRunnerTest.java | 77 -
.../InProcessSideInputContainerTest.java | 370 ---
.../inprocess/InProcessTimerInternalsTest.java | 131 -
.../KeyedPValueTrackingVisitorTest.java | 189 --
.../sdk/runners/inprocess/MockClock.java | 60 -
.../ParDoMultiEvaluatorFactoryTest.java | 412 ---
.../ParDoSingleEvaluatorFactoryTest.java | 310 --
.../TransformExecutorServicesTest.java | 134 -
.../inprocess/TransformExecutorTest.java | 312 --
.../UnboundedReadEvaluatorFactoryTest.java | 327 --
.../inprocess/ViewEvaluatorFactoryTest.java | 96 -
.../WatermarkCallbackExecutorTest.java | 126 -
.../sdk/testing/CoderPropertiesTest.java | 214 --
.../sdk/testing/DataflowAssertTest.java | 326 --
.../sdk/testing/DataflowJUnitTestRunner.java | 129 -
.../dataflow/sdk/testing/ExpectedLogs.java | 306 --
.../dataflow/sdk/testing/ExpectedLogsTest.java | 153 -
.../sdk/testing/FastNanoClockAndSleeper.java | 47 -
.../testing/FastNanoClockAndSleeperTest.java | 47 -
.../sdk/testing/PCollectionViewTesting.java | 295 --
.../sdk/testing/ResetDateTimeProvider.java | 41 -
.../sdk/testing/ResetDateTimeProviderTest.java | 55 -
.../sdk/testing/RestoreSystemProperties.java | 51 -
.../testing/RestoreSystemPropertiesTest.java | 50 -
.../sdk/testing/SerializableMatchersTest.java | 165 -
.../sdk/testing/SystemNanoTimeSleeper.java | 68 -
.../sdk/testing/SystemNanoTimeSleeperTest.java | 53 -
.../testing/TestDataflowPipelineRunnerTest.java | 317 --
.../dataflow/sdk/testing/TestPipelineTest.java | 93 -
.../transforms/ApproximateQuantilesTest.java | 299 --
.../sdk/transforms/ApproximateUniqueTest.java | 291 --
.../dataflow/sdk/transforms/CombineFnsTest.java | 413 ---
.../dataflow/sdk/transforms/CombineTest.java | 1137 -------
.../dataflow/sdk/transforms/CountTest.java | 121 -
.../dataflow/sdk/transforms/CreateTest.java | 240 --
.../sdk/transforms/DoFnContextTest.java | 68 -
.../DoFnDelegatingAggregatorTest.java | 143 -
.../sdk/transforms/DoFnReflectorTest.java | 493 ---
.../cloud/dataflow/sdk/transforms/DoFnTest.java | 206 --
.../dataflow/sdk/transforms/DoFnTesterTest.java | 253 --
.../sdk/transforms/DoFnWithContextTest.java | 225 --
.../dataflow/sdk/transforms/FilterTest.java | 160 -
.../sdk/transforms/FlatMapElementsTest.java | 124 -
.../dataflow/sdk/transforms/FlattenTest.java | 369 ---
.../dataflow/sdk/transforms/GroupByKeyTest.java | 438 ---
.../IntraBundleParallelizationTest.java | 250 --
.../cloud/dataflow/sdk/transforms/KeysTest.java | 83 -
.../dataflow/sdk/transforms/KvSwapTest.java | 91 -
.../sdk/transforms/MapElementsTest.java | 134 -
.../cloud/dataflow/sdk/transforms/MaxTest.java | 66 -
.../cloud/dataflow/sdk/transforms/MeanTest.java | 72 -
.../cloud/dataflow/sdk/transforms/MinTest.java | 66 -
.../cloud/dataflow/sdk/transforms/NoOpDoFn.java | 143 -
.../dataflow/sdk/transforms/PTransformTest.java | 41 -
.../dataflow/sdk/transforms/ParDoTest.java | 1541 ---------
.../dataflow/sdk/transforms/PartitionTest.java | 140 -
.../sdk/transforms/RemoveDuplicatesTest.java | 131 -
.../dataflow/sdk/transforms/SampleTest.java | 260 --
.../sdk/transforms/SimpleStatsFnsTest.java | 129 -
.../cloud/dataflow/sdk/transforms/SumTest.java | 66 -
.../cloud/dataflow/sdk/transforms/TopTest.java | 259 --
.../dataflow/sdk/transforms/ValuesTest.java | 93 -
.../cloud/dataflow/sdk/transforms/ViewTest.java | 1548 ---------
.../dataflow/sdk/transforms/WithKeysTest.java | 127 -
.../sdk/transforms/WithTimestampsTest.java | 210 --
.../transforms/display/DisplayDataMatchers.java | 98 -
.../display/DisplayDataMatchersTest.java | 81 -
.../sdk/transforms/display/DisplayDataTest.java | 633 ----
.../transforms/join/CoGbkResultCoderTest.java | 85 -
.../sdk/transforms/join/CoGbkResultTest.java | 124 -
.../sdk/transforms/join/CoGroupByKeyTest.java | 507 ---
.../sdk/transforms/join/UnionCoderTest.java | 48 -
.../sdk/transforms/windowing/AfterAllTest.java | 151 -
.../sdk/transforms/windowing/AfterEachTest.java | 122 -
.../transforms/windowing/AfterFirstTest.java | 175 -
.../sdk/transforms/windowing/AfterPaneTest.java | 126 -
.../windowing/AfterProcessingTimeTest.java | 157 -
.../AfterSynchronizedProcessingTimeTest.java | 121 -
.../windowing/AfterWatermarkTest.java | 338 --
.../windowing/CalendarWindowsTest.java | 260 --
.../windowing/DefaultTriggerTest.java | 176 -
.../transforms/windowing/FixedWindowsTest.java | 124 -
.../windowing/IntervalWindowTest.java | 94 -
.../windowing/OrFinallyTriggerTest.java | 209 --
.../sdk/transforms/windowing/PaneInfoTest.java | 75 -
.../transforms/windowing/RepeatedlyTest.java | 128 -
.../sdk/transforms/windowing/SessionsTest.java | 156 -
.../windowing/SlidingWindowsTest.java | 193 --
.../sdk/transforms/windowing/TriggerTest.java | 117 -
.../sdk/transforms/windowing/WindowTest.java | 226 --
.../sdk/transforms/windowing/WindowingTest.java | 244 --
.../cloud/dataflow/sdk/util/ApiSurfaceTest.java | 187 --
...mptAndTimeBoundedExponentialBackOffTest.java | 212 --
.../AttemptBoundedExponentialBackOffTest.java | 85 -
.../cloud/dataflow/sdk/util/AvroUtilsTest.java | 225 --
.../sdk/util/BatchTimerInternalsTest.java | 116 -
.../sdk/util/BigQueryTableInserterTest.java | 239 --
.../sdk/util/BigQueryTableRowIteratorTest.java | 255 --
.../dataflow/sdk/util/BigQueryUtilTest.java | 479 ---
...BufferedElementCountingOutputStreamTest.java | 205 --
.../cloud/dataflow/sdk/util/CoderUtilsTest.java | 229 --
.../dataflow/sdk/util/CombineFnUtilTest.java | 62 -
.../sdk/util/CounterAggregatorTest.java | 253 --
.../sdk/util/DataflowPathValidatorTest.java | 92 -
.../sdk/util/ExecutableTriggerTest.java | 130 -
.../util/ExposedByteArrayInputStreamTest.java | 78 -
.../util/ExposedByteArrayOutputStreamTest.java | 245 --
.../sdk/util/FileIOChannelFactoryTest.java | 226 --
.../sdk/util/FinishedTriggersBitSetTest.java | 54 -
.../sdk/util/FinishedTriggersProperties.java | 109 -
.../sdk/util/FinishedTriggersSetTest.java | 60 -
.../sdk/util/GcsIOChannelFactoryTest.java | 43 -
.../cloud/dataflow/sdk/util/GcsUtilTest.java | 490 ---
.../sdk/util/GroupAlsoByWindowsProperties.java | 718 -----
...oupAlsoByWindowsViaOutputBufferDoFnTest.java | 111 -
.../dataflow/sdk/util/IOChannelUtilsTest.java | 94 -
.../dataflow/sdk/util/InstanceBuilderTest.java | 115 -
.../IntervalBoundedExponentialBackOffTest.java | 99 -
.../sdk/util/KeyedWorkItemCoderTest.java | 61 -
.../util/LateDataDroppingDoFnRunnerTest.java | 115 -
.../sdk/util/MergingActiveWindowSetTest.java | 175 -
.../dataflow/sdk/util/MonitoringUtilTest.java | 146 -
.../sdk/util/MutationDetectorsTest.java | 148 -
.../cloud/dataflow/sdk/util/PTupleTest.java | 40 -
.../dataflow/sdk/util/PackageUtilTest.java | 482 ---
.../dataflow/sdk/util/RandomAccessDataTest.java | 205 --
.../dataflow/sdk/util/ReduceFnRunnerTest.java | 1049 ------
.../cloud/dataflow/sdk/util/ReduceFnTester.java | 776 -----
.../cloud/dataflow/sdk/util/ReshuffleTest.java | 208 --
.../dataflow/sdk/util/ReshuffleTriggerTest.java | 58 -
.../util/RetryHttpRequestInitializerTest.java | 296 --
.../sdk/util/SerializableUtilsTest.java | 165 -
.../cloud/dataflow/sdk/util/SerializerTest.java | 162 -
.../dataflow/sdk/util/SimpleDoFnRunnerTest.java | 86 -
.../dataflow/sdk/util/StreamUtilsTest.java | 71 -
.../dataflow/sdk/util/StringUtilsTest.java | 145 -
.../cloud/dataflow/sdk/util/StructsTest.java | 206 --
.../cloud/dataflow/sdk/util/TimeUtilTest.java | 73 -
.../dataflow/sdk/util/TimerInternalsTest.java | 52 -
.../cloud/dataflow/sdk/util/TriggerTester.java | 585 ----
.../sdk/util/UnownedInputStreamTest.java | 76 -
.../sdk/util/UnownedOutputStreamTest.java | 57 -
.../util/UploadIdResponseInterceptorTest.java | 99 -
.../sdk/util/UserCodeExceptionTest.java | 176 -
.../cloud/dataflow/sdk/util/VarIntTest.java | 277 --
.../dataflow/sdk/util/WindowedValueTest.java | 57 -
.../cloud/dataflow/sdk/util/ZipFilesTest.java | 311 --
.../sdk/util/common/CounterSetTest.java | 225 --
.../dataflow/sdk/util/common/CounterTest.java | 589 ----
.../sdk/util/common/CounterTestUtils.java | 56 -
.../sdk/util/common/ReflectHelpersTest.java | 126 -
.../dataflow/sdk/util/gcsfs/GcsPathTest.java | 333 --
.../CopyOnAccessInMemoryStateInternalsTest.java | 553 ----
.../util/state/InMemoryStateInternalsTest.java | 348 --
.../sdk/util/state/StateNamespacesTest.java | 129 -
.../dataflow/sdk/util/state/StateTagTest.java | 173 -
.../cloud/dataflow/sdk/values/KVTest.java | 112 -
.../sdk/values/PCollectionListTest.java | 47 -
.../sdk/values/PCollectionTupleTest.java | 93 -
.../cloud/dataflow/sdk/values/PDoneTest.java | 102 -
.../cloud/dataflow/sdk/values/TupleTagTest.java | 87 -
.../dataflow/sdk/values/TypeDescriptorTest.java | 193 --
.../dataflow/sdk/values/TypedPValueTest.java | 164 -
.../PipelineOptionsFactoryJava8Test.java | 90 -
sdks/java/core/pom.xml | 772 +++++
.../com/google/cloud/dataflow/sdk/Pipeline.java | 502 +++
.../cloud/dataflow/sdk/PipelineResult.java | 95 +
.../dataflow/sdk/annotations/Experimental.java | 80 +
.../dataflow/sdk/annotations/package-info.java | 20 +
.../cloud/dataflow/sdk/coders/AtomicCoder.java | 51 +
.../cloud/dataflow/sdk/coders/AvroCoder.java | 714 +++++
.../sdk/coders/BigEndianIntegerCoder.java | 99 +
.../dataflow/sdk/coders/BigEndianLongCoder.java | 99 +
.../dataflow/sdk/coders/ByteArrayCoder.java | 138 +
.../cloud/dataflow/sdk/coders/ByteCoder.java | 111 +
.../dataflow/sdk/coders/ByteStringCoder.java | 106 +
.../sdk/coders/CannotProvideCoderException.java | 95 +
.../google/cloud/dataflow/sdk/coders/Coder.java | 298 ++
.../dataflow/sdk/coders/CoderException.java | 36 +
.../dataflow/sdk/coders/CoderFactories.java | 274 ++
.../cloud/dataflow/sdk/coders/CoderFactory.java | 43 +
.../dataflow/sdk/coders/CoderProvider.java | 33 +
.../dataflow/sdk/coders/CoderProviders.java | 164 +
.../dataflow/sdk/coders/CoderRegistry.java | 843 +++++
.../dataflow/sdk/coders/CollectionCoder.java | 73 +
.../cloud/dataflow/sdk/coders/CustomCoder.java | 137 +
.../cloud/dataflow/sdk/coders/DefaultCoder.java | 66 +
.../dataflow/sdk/coders/DelegateCoder.java | 164 +
.../sdk/coders/DeterministicStandardCoder.java | 38 +
.../cloud/dataflow/sdk/coders/DoubleCoder.java | 113 +
.../dataflow/sdk/coders/DurationCoder.java | 97 +
.../cloud/dataflow/sdk/coders/EntityCoder.java | 86 +
.../cloud/dataflow/sdk/coders/InstantCoder.java | 113 +
.../dataflow/sdk/coders/IterableCoder.java | 78 +
.../dataflow/sdk/coders/IterableLikeCoder.java | 278 ++
.../cloud/dataflow/sdk/coders/JAXBCoder.java | 135 +
.../cloud/dataflow/sdk/coders/KvCoder.java | 162 +
.../cloud/dataflow/sdk/coders/KvCoderBase.java | 61 +
.../cloud/dataflow/sdk/coders/ListCoder.java | 77 +
.../cloud/dataflow/sdk/coders/MapCoder.java | 160 +
.../cloud/dataflow/sdk/coders/MapCoderBase.java | 54 +
.../dataflow/sdk/coders/NullableCoder.java | 175 +
.../cloud/dataflow/sdk/coders/Proto2Coder.java | 361 +++
.../dataflow/sdk/coders/SerializableCoder.java | 183 ++
.../cloud/dataflow/sdk/coders/SetCoder.java | 94 +
.../dataflow/sdk/coders/StandardCoder.java | 229 ++
.../sdk/coders/StringDelegateCoder.java | 86 +
.../dataflow/sdk/coders/StringUtf8Coder.java | 139 +
.../sdk/coders/StructuralByteArray.java | 56 +
.../dataflow/sdk/coders/TableRowJsonCoder.java | 82 +
.../sdk/coders/TextualIntegerCoder.java | 69 +
.../cloud/dataflow/sdk/coders/VarIntCoder.java | 97 +
.../cloud/dataflow/sdk/coders/VarLongCoder.java | 96 +
.../cloud/dataflow/sdk/coders/VoidCoder.java | 76 +
.../cloud/dataflow/sdk/coders/package-info.java | 44 +
.../sdk/coders/protobuf/ProtoCoder.java | 404 +++
.../sdk/coders/protobuf/ProtobufUtil.java | 171 +
.../sdk/coders/protobuf/package-info.java | 23 +
.../google/cloud/dataflow/sdk/io/AvroIO.java | 810 +++++
.../cloud/dataflow/sdk/io/AvroSource.java | 647 ++++
.../cloud/dataflow/sdk/io/BigQueryIO.java | 1499 +++++++++
.../cloud/dataflow/sdk/io/BlockBasedSource.java | 237 ++
.../sdk/io/BoundedReadFromUnboundedSource.java | 271 ++
.../cloud/dataflow/sdk/io/BoundedSource.java | 277 ++
.../cloud/dataflow/sdk/io/CompressedSource.java | 413 +++
.../cloud/dataflow/sdk/io/CountingInput.java | 191 ++
.../cloud/dataflow/sdk/io/CountingSource.java | 397 +++
.../cloud/dataflow/sdk/io/DatastoreIO.java | 957 ++++++
.../cloud/dataflow/sdk/io/FileBasedSink.java | 864 +++++
.../cloud/dataflow/sdk/io/FileBasedSource.java | 648 ++++
.../dataflow/sdk/io/OffsetBasedSource.java | 326 ++
.../google/cloud/dataflow/sdk/io/PubsubIO.java | 1044 ++++++
.../com/google/cloud/dataflow/sdk/io/Read.java | 253 ++
.../dataflow/sdk/io/ShardNameTemplate.java | 75 +
.../com/google/cloud/dataflow/sdk/io/Sink.java | 252 ++
.../google/cloud/dataflow/sdk/io/Source.java | 193 ++
.../google/cloud/dataflow/sdk/io/TextIO.java | 992 ++++++
.../cloud/dataflow/sdk/io/UnboundedSource.java | 253 ++
.../com/google/cloud/dataflow/sdk/io/Write.java | 213 ++
.../google/cloud/dataflow/sdk/io/XmlSink.java | 310 ++
.../google/cloud/dataflow/sdk/io/XmlSource.java | 541 ++++
.../dataflow/sdk/io/bigtable/BigtableIO.java | 987 ++++++
.../sdk/io/bigtable/BigtableService.java | 108 +
.../sdk/io/bigtable/BigtableServiceImpl.java | 241 ++
.../dataflow/sdk/io/bigtable/package-info.java | 22 +
.../cloud/dataflow/sdk/io/package-info.java | 37 +
.../cloud/dataflow/sdk/io/range/ByteKey.java | 173 +
.../dataflow/sdk/io/range/ByteKeyRange.java | 376 +++
.../sdk/io/range/ByteKeyRangeTracker.java | 117 +
.../sdk/io/range/OffsetRangeTracker.java | 182 ++
.../dataflow/sdk/io/range/RangeTracker.java | 220 ++
.../dataflow/sdk/io/range/package-info.java | 23 +
.../sdk/options/ApplicationNameOptions.java | 33 +
.../dataflow/sdk/options/BigQueryOptions.java | 31 +
.../BlockingDataflowPipelineOptions.java | 49 +
.../sdk/options/CloudDebuggerOptions.java | 43 +
.../options/DataflowPipelineDebugOptions.java | 259 ++
.../sdk/options/DataflowPipelineOptions.java | 114 +
.../DataflowPipelineWorkerPoolOptions.java | 254 ++
.../sdk/options/DataflowProfilingOptions.java | 46 +
.../options/DataflowWorkerHarnessOptions.java | 50 +
.../options/DataflowWorkerLoggingOptions.java | 153 +
.../cloud/dataflow/sdk/options/Default.java | 153 +
.../sdk/options/DefaultValueFactory.java | 38 +
.../cloud/dataflow/sdk/options/Description.java | 35 +
.../sdk/options/DirectPipelineOptions.java | 75 +
.../cloud/dataflow/sdk/options/GcpOptions.java | 291 ++
.../cloud/dataflow/sdk/options/GcsOptions.java | 113 +
.../sdk/options/GoogleApiDebugOptions.java | 87 +
.../cloud/dataflow/sdk/options/Hidden.java | 33 +
.../dataflow/sdk/options/PipelineOptions.java | 263 ++
.../sdk/options/PipelineOptionsFactory.java | 1537 +++++++++
.../sdk/options/PipelineOptionsRegistrar.java | 36 +
.../sdk/options/PipelineOptionsValidator.java | 102 +
.../sdk/options/ProxyInvocationHandler.java | 441 +++
.../dataflow/sdk/options/StreamingOptions.java | 30 +
.../cloud/dataflow/sdk/options/Validation.java | 46 +
.../dataflow/sdk/options/package-info.java | 25 +
.../google/cloud/dataflow/sdk/package-info.java | 33 +
.../runners/AggregatorPipelineExtractor.java | 97 +
.../runners/AggregatorRetrievalException.java | 32 +
.../dataflow/sdk/runners/AggregatorValues.java | 52 +
.../runners/BlockingDataflowPipelineRunner.java | 181 ++
.../DataflowJobAlreadyExistsException.java | 34 +
.../DataflowJobAlreadyUpdatedException.java | 33 +
.../runners/DataflowJobCancelledException.java | 38 +
.../sdk/runners/DataflowJobException.java | 40 +
.../runners/DataflowJobExecutionException.java | 34 +
.../runners/DataflowJobUpdatedException.java | 51 +
.../dataflow/sdk/runners/DataflowPipeline.java | 59 +
.../sdk/runners/DataflowPipelineJob.java | 389 +++
.../sdk/runners/DataflowPipelineRegistrar.java | 58 +
.../sdk/runners/DataflowPipelineRunner.java | 3007 ++++++++++++++++++
.../runners/DataflowPipelineRunnerHooks.java | 37 +
.../sdk/runners/DataflowPipelineTranslator.java | 1104 +++++++
.../sdk/runners/DataflowServiceException.java | 32 +
.../dataflow/sdk/runners/DirectPipeline.java | 55 +
.../sdk/runners/DirectPipelineRegistrar.java | 53 +
.../sdk/runners/DirectPipelineRunner.java | 1156 +++++++
.../dataflow/sdk/runners/PipelineRunner.java | 76 +
.../sdk/runners/PipelineRunnerRegistrar.java | 40 +
.../sdk/runners/RecordingPipelineVisitor.java | 54 +
.../sdk/runners/TransformHierarchy.java | 104 +
.../dataflow/sdk/runners/TransformTreeNode.java | 252 ++
.../sdk/runners/dataflow/AssignWindows.java | 88 +
.../runners/dataflow/BigQueryIOTranslator.java | 125 +
.../sdk/runners/dataflow/CustomSources.java | 118 +
.../dataflow/DataflowAggregatorTransforms.java | 79 +
.../dataflow/DataflowMetricUpdateExtractor.java | 110 +
.../runners/dataflow/PubsubIOTranslator.java | 107 +
.../sdk/runners/dataflow/ReadTranslator.java | 103 +
.../sdk/runners/dataflow/package-info.java | 20 +
.../inprocess/BoundedReadEvaluatorFactory.java | 152 +
.../CachedThreadPoolExecutorServiceFactory.java | 42 +
.../dataflow/sdk/runners/inprocess/Clock.java | 29 +
.../runners/inprocess/CompletionCallback.java | 33 +
.../ConsumerTrackingPipelineVisitor.java | 173 +
.../inprocess/EmptyTransformEvaluator.java | 49 +
.../sdk/runners/inprocess/EvaluatorKey.java | 55 +
.../inprocess/ExecutorServiceFactory.java | 32 +
.../ExecutorServiceParallelExecutor.java | 432 +++
.../inprocess/FlattenEvaluatorFactory.java | 83 +
.../runners/inprocess/ForwardingPTransform.java | 54 +
.../inprocess/GroupByKeyEvaluatorFactory.java | 252 ++
.../inprocess/InMemoryWatermarkManager.java | 1310 ++++++++
.../sdk/runners/inprocess/InProcessBundle.java | 121 +
.../inprocess/InProcessBundleOutputManager.java | 50 +
.../sdk/runners/inprocess/InProcessCreate.java | 209 ++
.../inprocess/InProcessEvaluationContext.java | 405 +++
.../inprocess/InProcessExecutionContext.java | 106 +
.../runners/inprocess/InProcessExecutor.java | 46 +
.../inprocess/InProcessPipelineOptions.java | 90 +
.../inprocess/InProcessPipelineRunner.java | 343 ++
.../inprocess/InProcessSideInputContainer.java | 230 ++
.../inprocess/InProcessTimerInternals.java | 84 +
.../inprocess/InProcessTransformResult.java | 75 +
.../inprocess/KeyedPValueTrackingVisitor.java | 95 +
.../sdk/runners/inprocess/NanosOffsetClock.java | 58 +
.../inprocess/ParDoInProcessEvaluator.java | 109 +
.../inprocess/ParDoMultiEvaluatorFactory.java | 90 +
.../inprocess/ParDoSingleEvaluatorFactory.java | 87 +
.../sdk/runners/inprocess/StepAndKey.java | 68 +
.../runners/inprocess/StepTransformResult.java | 157 +
.../runners/inprocess/TransformEvaluator.java | 45 +
.../inprocess/TransformEvaluatorFactory.java | 42 +
.../inprocess/TransformEvaluatorRegistry.java | 72 +
.../runners/inprocess/TransformExecutor.java | 114 +
.../inprocess/TransformExecutorService.java | 34 +
.../inprocess/TransformExecutorServices.java | 153 +
.../UnboundedReadEvaluatorFactory.java | 168 +
.../runners/inprocess/ViewEvaluatorFactory.java | 121 +
.../inprocess/WatermarkCallbackExecutor.java | 143 +
.../dataflow/sdk/runners/package-info.java | 33 +
.../dataflow/sdk/runners/worker/IsmFormat.java | 946 ++++++
.../sdk/runners/worker/package-info.java | 24 +
.../dataflow/sdk/testing/CoderProperties.java | 349 ++
.../dataflow/sdk/testing/DataflowAssert.java | 825 +++++
.../dataflow/sdk/testing/RunnableOnService.java | 30 +
.../sdk/testing/SerializableMatcher.java | 36 +
.../sdk/testing/SerializableMatchers.java | 1180 +++++++
.../dataflow/sdk/testing/SourceTestUtils.java | 642 ++++
.../testing/TestDataflowPipelineOptions.java | 26 +
.../sdk/testing/TestDataflowPipelineRunner.java | 220 ++
.../dataflow/sdk/testing/TestPipeline.java | 193 ++
.../dataflow/sdk/testing/WindowFnTestUtils.java | 325 ++
.../dataflow/sdk/testing/package-info.java | 21 +
.../dataflow/sdk/transforms/Aggregator.java | 78 +
.../sdk/transforms/AggregatorRetriever.java | 36 +
.../sdk/transforms/AppliedPTransform.java | 100 +
.../sdk/transforms/ApproximateQuantiles.java | 766 +++++
.../sdk/transforms/ApproximateUnique.java | 419 +++
.../cloud/dataflow/sdk/transforms/Combine.java | 2240 +++++++++++++
.../dataflow/sdk/transforms/CombineFnBase.java | 283 ++
.../dataflow/sdk/transforms/CombineFns.java | 1100 +++++++
.../sdk/transforms/CombineWithContext.java | 277 ++
.../cloud/dataflow/sdk/transforms/Count.java | 135 +
.../cloud/dataflow/sdk/transforms/Create.java | 426 +++
.../cloud/dataflow/sdk/transforms/DoFn.java | 563 ++++
.../dataflow/sdk/transforms/DoFnReflector.java | 668 ++++
.../dataflow/sdk/transforms/DoFnTester.java | 495 +++
.../sdk/transforms/DoFnWithContext.java | 416 +++
.../cloud/dataflow/sdk/transforms/Filter.java | 234 ++
.../sdk/transforms/FlatMapElements.java | 145 +
.../cloud/dataflow/sdk/transforms/Flatten.java | 219 ++
.../dataflow/sdk/transforms/GroupByKey.java | 575 ++++
.../transforms/IntraBundleParallelization.java | 346 ++
.../cloud/dataflow/sdk/transforms/Keys.java | 68 +
.../cloud/dataflow/sdk/transforms/KvSwap.java | 73 +
.../dataflow/sdk/transforms/MapElements.java | 112 +
.../cloud/dataflow/sdk/transforms/Max.java | 255 ++
.../cloud/dataflow/sdk/transforms/Mean.java | 202 ++
.../cloud/dataflow/sdk/transforms/Min.java | 255 ++
.../dataflow/sdk/transforms/PTransform.java | 324 ++
.../cloud/dataflow/sdk/transforms/ParDo.java | 1321 ++++++++
.../dataflow/sdk/transforms/Partition.java | 173 +
.../sdk/transforms/RemoveDuplicates.java | 158 +
.../cloud/dataflow/sdk/transforms/Sample.java | 246 ++
.../sdk/transforms/SerializableComparator.java | 28 +
.../sdk/transforms/SerializableFunction.java | 31 +
.../dataflow/sdk/transforms/SimpleFunction.java | 54 +
.../cloud/dataflow/sdk/transforms/Sum.java | 188 ++
.../cloud/dataflow/sdk/transforms/Top.java | 559 ++++
.../cloud/dataflow/sdk/transforms/Values.java | 68 +
.../cloud/dataflow/sdk/transforms/View.java | 470 +++
.../cloud/dataflow/sdk/transforms/WithKeys.java | 140 +
.../dataflow/sdk/transforms/WithTimestamps.java | 129 +
.../cloud/dataflow/sdk/transforms/Write.java | 27 +
.../sdk/transforms/display/DisplayData.java | 530 +++
.../sdk/transforms/display/HasDisplayData.java | 53 +
.../sdk/transforms/join/CoGbkResult.java | 463 +++
.../sdk/transforms/join/CoGbkResultSchema.java | 134 +
.../sdk/transforms/join/CoGroupByKey.java | 211 ++
.../transforms/join/KeyedPCollectionTuple.java | 247 ++
.../sdk/transforms/join/RawUnionValue.java | 51 +
.../sdk/transforms/join/UnionCoder.java | 147 +
.../sdk/transforms/join/package-info.java | 21 +
.../dataflow/sdk/transforms/package-info.java | 43 +
.../sdk/transforms/windowing/AfterAll.java | 117 +
.../windowing/AfterDelayFromFirstElement.java | 322 ++
.../sdk/transforms/windowing/AfterEach.java | 135 +
.../sdk/transforms/windowing/AfterFirst.java | 119 +
.../sdk/transforms/windowing/AfterPane.java | 145 +
.../windowing/AfterProcessingTime.java | 97 +
.../AfterSynchronizedProcessingTime.java | 75 +
.../transforms/windowing/AfterWatermark.java | 397 +++
.../sdk/transforms/windowing/BoundedWindow.java | 46 +
.../transforms/windowing/CalendarWindows.java | 348 ++
.../transforms/windowing/DefaultTrigger.java | 95 +
.../sdk/transforms/windowing/FixedWindows.java | 116 +
.../sdk/transforms/windowing/GlobalWindow.java | 68 +
.../sdk/transforms/windowing/GlobalWindows.java | 63 +
.../transforms/windowing/IntervalWindow.java | 201 ++
.../transforms/windowing/InvalidWindows.java | 87 +
.../MergeOverlappingIntervalWindows.java | 86 +
.../windowing/NonMergingWindowFn.java | 35 +
.../transforms/windowing/OrFinallyTrigger.java | 100 +
.../sdk/transforms/windowing/OutputTimeFn.java | 319 ++
.../sdk/transforms/windowing/OutputTimeFns.java | 168 +
.../sdk/transforms/windowing/PaneInfo.java | 384 +++
.../windowing/PartitioningWindowFn.java | 61 +
.../sdk/transforms/windowing/Repeatedly.java | 100 +
.../sdk/transforms/windowing/Sessions.java | 112 +
.../transforms/windowing/SlidingWindows.java | 214 ++
.../sdk/transforms/windowing/Trigger.java | 544 ++++
.../transforms/windowing/TriggerBuilder.java | 29 +
.../sdk/transforms/windowing/Window.java | 662 ++++
.../sdk/transforms/windowing/WindowFn.java | 221 ++
.../sdk/transforms/windowing/package-info.java | 49 +
.../dataflow/sdk/util/ActiveWindowSet.java | 171 +
.../cloud/dataflow/sdk/util/ApiSurface.java | 642 ++++
.../dataflow/sdk/util/AppEngineEnvironment.java | 61 +
.../dataflow/sdk/util/AppliedCombineFn.java | 130 +
.../dataflow/sdk/util/AssignWindowsDoFn.java | 67 +
...AttemptAndTimeBoundedExponentialBackOff.java | 168 +
.../util/AttemptBoundedExponentialBackOff.java | 83 +
.../cloud/dataflow/sdk/util/AvroUtils.java | 345 ++
.../dataflow/sdk/util/BaseExecutionContext.java | 155 +
.../dataflow/sdk/util/BatchTimerInternals.java | 138 +
.../sdk/util/BigQueryTableInserter.java | 434 +++
.../sdk/util/BigQueryTableRowIterator.java | 469 +++
.../cloud/dataflow/sdk/util/BitSetCoder.java | 59 +
.../BufferedElementCountingOutputStream.java | 184 ++
.../cloud/dataflow/sdk/util/CloudKnownType.java | 138 +
.../cloud/dataflow/sdk/util/CloudObject.java | 184 ++
.../cloud/dataflow/sdk/util/CoderUtils.java | 327 ++
.../sdk/util/CombineContextFactory.java | 107 +
.../cloud/dataflow/sdk/util/CombineFnUtil.java | 154 +
.../dataflow/sdk/util/CounterAggregator.java | 96 +
.../dataflow/sdk/util/CredentialFactory.java | 29 +
.../cloud/dataflow/sdk/util/Credentials.java | 192 ++
.../sdk/util/DataflowPathValidator.java | 97 +
.../dataflow/sdk/util/DataflowReleaseInfo.java | 87 +
.../sdk/util/DirectModeExecutionContext.java | 130 +
.../sdk/util/DirectSideInputReader.java | 73 +
.../cloud/dataflow/sdk/util/DoFnInfo.java | 67 +
.../cloud/dataflow/sdk/util/DoFnRunner.java | 60 +
.../cloud/dataflow/sdk/util/DoFnRunnerBase.java | 558 ++++
.../cloud/dataflow/sdk/util/DoFnRunners.java | 142 +
.../dataflow/sdk/util/ExecutableTrigger.java | 159 +
.../dataflow/sdk/util/ExecutionContext.java | 102 +
.../sdk/util/ExposedByteArrayInputStream.java | 51 +
.../sdk/util/ExposedByteArrayOutputStream.java | 115 +
.../dataflow/sdk/util/FileIOChannelFactory.java | 135 +
.../dataflow/sdk/util/FinishedTriggers.java | 42 +
.../sdk/util/FinishedTriggersBitSet.java | 68 +
.../dataflow/sdk/util/FinishedTriggersSet.java | 74 +
.../dataflow/sdk/util/GcpCredentialFactory.java | 45 +
.../dataflow/sdk/util/GcsIOChannelFactory.java | 86 +
.../cloud/dataflow/sdk/util/GcsStager.java | 53 +
.../google/cloud/dataflow/sdk/util/GcsUtil.java | 406 +++
.../util/GroupAlsoByWindowViaWindowSetDoFn.java | 104 +
.../sdk/util/GroupAlsoByWindowsDoFn.java | 58 +
.../GroupAlsoByWindowsViaOutputBufferDoFn.java | 98 +
.../dataflow/sdk/util/IOChannelFactory.java | 101 +
.../cloud/dataflow/sdk/util/IOChannelUtils.java | 204 ++
.../sdk/util/IllegalMutationException.java | 52 +
.../dataflow/sdk/util/InstanceBuilder.java | 269 ++
.../util/IntervalBoundedExponentialBackOff.java | 87 +
.../cloud/dataflow/sdk/util/KeyedWorkItem.java | 41 +
.../dataflow/sdk/util/KeyedWorkItemCoder.java | 120 +
.../cloud/dataflow/sdk/util/KeyedWorkItems.java | 120 +
.../sdk/util/LateDataDroppingDoFnRunner.java | 145 +
.../dataflow/sdk/util/MapAggregatorValues.java | 48 +
.../sdk/util/MergingActiveWindowSet.java | 544 ++++
.../cloud/dataflow/sdk/util/MimeTypes.java | 23 +
.../cloud/dataflow/sdk/util/MonitoringUtil.java | 233 ++
.../dataflow/sdk/util/MutationDetector.java | 31 +
.../dataflow/sdk/util/MutationDetectors.java | 182 ++
.../cloud/dataflow/sdk/util/NonEmptyPanes.java | 148 +
.../sdk/util/NonMergingActiveWindowSet.java | 85 +
.../sdk/util/NoopCredentialFactory.java | 38 +
.../dataflow/sdk/util/NoopPathValidator.java | 48 +
.../dataflow/sdk/util/NullSideInputReader.java | 61 +
.../dataflow/sdk/util/OutputReference.java | 42 +
.../sdk/util/PCollectionViewWindow.java | 67 +
.../dataflow/sdk/util/PCollectionViews.java | 426 +++
.../google/cloud/dataflow/sdk/util/PTuple.java | 160 +
.../cloud/dataflow/sdk/util/PackageUtil.java | 327 ++
.../dataflow/sdk/util/PaneInfoTracker.java | 151 +
.../cloud/dataflow/sdk/util/PathValidator.java | 47 +
.../sdk/util/PerKeyCombineFnRunner.java | 147 +
.../sdk/util/PerKeyCombineFnRunners.java | 257 ++
.../cloud/dataflow/sdk/util/PropertyNames.java | 107 +
.../dataflow/sdk/util/RandomAccessData.java | 352 ++
.../cloud/dataflow/sdk/util/ReduceFn.java | 128 +
.../sdk/util/ReduceFnContextFactory.java | 495 +++
.../cloud/dataflow/sdk/util/ReduceFnRunner.java | 843 +++++
.../sdk/util/ReifyTimestampAndWindowsDoFn.java | 46 +
.../cloud/dataflow/sdk/util/Reshuffle.java | 145 +
.../dataflow/sdk/util/ReshuffleTrigger.java | 61 +
.../sdk/util/RetryHttpRequestInitializer.java | 250 ++
.../dataflow/sdk/util/SerializableUtils.java | 159 +
.../cloud/dataflow/sdk/util/Serializer.java | 145 +
.../sdk/util/ShardingWritableByteChannel.java | 118 +
.../dataflow/sdk/util/SideInputReader.java | 48 +
.../dataflow/sdk/util/SimpleDoFnRunner.java | 55 +
.../google/cloud/dataflow/sdk/util/Stager.java | 29 +
.../cloud/dataflow/sdk/util/StreamUtils.java | 68 +
.../cloud/dataflow/sdk/util/StringUtils.java | 242 ++
.../google/cloud/dataflow/sdk/util/Structs.java | 384 +++
.../dataflow/sdk/util/SystemDoFnInternal.java | 37 +
.../cloud/dataflow/sdk/util/SystemReduceFn.java | 133 +
.../cloud/dataflow/sdk/util/TestCredential.java | 51 +
.../cloud/dataflow/sdk/util/TimeDomain.java | 41 +
.../cloud/dataflow/sdk/util/TimeUtil.java | 164 +
.../cloud/dataflow/sdk/util/TimerInternals.java | 269 ++
.../google/cloud/dataflow/sdk/util/Timers.java | 60 +
.../cloud/dataflow/sdk/util/Transport.java | 205 ++
.../sdk/util/TriggerContextFactory.java | 522 +++
.../cloud/dataflow/sdk/util/TriggerRunner.java | 223 ++
.../dataflow/sdk/util/UnownedInputStream.java | 76 +
.../dataflow/sdk/util/UnownedOutputStream.java | 56 +
.../sdk/util/UploadIdResponseInterceptor.java | 61 +
.../dataflow/sdk/util/UserCodeException.java | 94 +
.../dataflow/sdk/util/ValueWithRecordId.java | 154 +
.../google/cloud/dataflow/sdk/util/Values.java | 88 +
.../google/cloud/dataflow/sdk/util/VarInt.java | 115 +
.../cloud/dataflow/sdk/util/WatermarkHold.java | 450 +++
.../cloud/dataflow/sdk/util/Weighted.java | 27 +
.../cloud/dataflow/sdk/util/WeightedValue.java | 45 +
.../cloud/dataflow/sdk/util/WindowTracing.java | 36 +
.../cloud/dataflow/sdk/util/WindowedValue.java | 720 +++++
.../dataflow/sdk/util/WindowingInternals.java | 82 +
.../dataflow/sdk/util/WindowingStrategy.java | 268 ++
.../cloud/dataflow/sdk/util/ZipFiles.java | 294 ++
.../cloud/dataflow/sdk/util/common/Counter.java | 1103 +++++++
.../sdk/util/common/CounterProvider.java | 26 +
.../dataflow/sdk/util/common/CounterSet.java | 177 ++
.../util/common/ElementByteSizeObservable.java | 41 +
.../ElementByteSizeObservableIterable.java | 63 +
.../ElementByteSizeObservableIterator.java | 36 +
.../util/common/ElementByteSizeObserver.java | 92 +
.../sdk/util/common/PeekingReiterator.java | 98 +
.../sdk/util/common/ReflectHelpers.java | 209 ++
.../dataflow/sdk/util/common/Reiterable.java | 27 +
.../dataflow/sdk/util/common/Reiterator.java | 39 +
.../dataflow/sdk/util/common/package-info.java | 18 +
.../sdk/util/common/worker/StateSampler.java | 365 +++
.../sdk/util/common/worker/package-info.java | 18 +
.../cloud/dataflow/sdk/util/gcsfs/GcsPath.java | 619 ++++
.../dataflow/sdk/util/gcsfs/package-info.java | 18 +
.../cloud/dataflow/sdk/util/package-info.java | 18 +
.../util/state/AccumulatorCombiningState.java | 51 +
.../cloud/dataflow/sdk/util/state/BagState.java | 26 +
.../dataflow/sdk/util/state/CombiningState.java | 40 +
.../CopyOnAccessInMemoryStateInternals.java | 454 +++
.../sdk/util/state/InMemoryStateInternals.java | 414 +++
.../sdk/util/state/MergingStateAccessor.java | 40 +
.../dataflow/sdk/util/state/ReadableState.java | 53 +
.../cloud/dataflow/sdk/util/state/State.java | 30 +
.../dataflow/sdk/util/state/StateAccessor.java | 36 +
.../dataflow/sdk/util/state/StateContext.java | 41 +
.../dataflow/sdk/util/state/StateContexts.java | 107 +
.../dataflow/sdk/util/state/StateInternals.java | 55 +
.../dataflow/sdk/util/state/StateMerging.java | 254 ++
.../dataflow/sdk/util/state/StateNamespace.java | 54 +
.../sdk/util/state/StateNamespaceForTest.java | 63 +
.../sdk/util/state/StateNamespaces.java | 277 ++
.../dataflow/sdk/util/state/StateTable.java | 89 +
.../cloud/dataflow/sdk/util/state/StateTag.java | 96 +
.../dataflow/sdk/util/state/StateTags.java | 579 ++++
.../dataflow/sdk/util/state/ValueState.java | 35 +
.../sdk/util/state/WatermarkHoldState.java | 42 +
.../google/cloud/dataflow/sdk/values/KV.java | 130 +
.../cloud/dataflow/sdk/values/PBegin.java | 87 +
.../cloud/dataflow/sdk/values/PCollection.java | 250 ++
.../dataflow/sdk/values/PCollectionList.java | 238 ++
.../dataflow/sdk/values/PCollectionTuple.java | 264 ++
.../dataflow/sdk/values/PCollectionView.java | 64 +
.../google/cloud/dataflow/sdk/values/PDone.java | 47 +
.../cloud/dataflow/sdk/values/PInput.java | 56 +
.../cloud/dataflow/sdk/values/POutput.java | 76 +
.../dataflow/sdk/values/POutputValueBase.java | 102 +
.../cloud/dataflow/sdk/values/PValue.java | 38 +
.../cloud/dataflow/sdk/values/PValueBase.java | 155 +
.../dataflow/sdk/values/TimestampedValue.java | 155 +
.../cloud/dataflow/sdk/values/TupleTag.java | 196 ++
.../cloud/dataflow/sdk/values/TupleTagList.java | 148 +
.../dataflow/sdk/values/TypeDescriptor.java | 351 ++
.../cloud/dataflow/sdk/values/TypedPValue.java | 197 ++
.../cloud/dataflow/sdk/values/package-info.java | 52 +
sdks/java/core/src/main/proto/README.md | 27 +
.../main/proto/proto2_coder_test_messages.proto | 51 +
.../google/cloud/dataflow/sdk/sdk.properties | 5 +
.../cloud/dataflow/sdk/DataflowMatchers.java | 65 +
.../google/cloud/dataflow/sdk/PipelineTest.java | 296 ++
.../google/cloud/dataflow/sdk/TestUtils.java | 213 ++
.../cloud/dataflow/sdk/WindowMatchers.java | 137 +
.../dataflow/sdk/coders/AvroCoderTest.java | 754 +++++
.../sdk/coders/BigEndianIntegerCoderTest.java | 90 +
.../sdk/coders/BigEndianLongCoderTest.java | 94 +
.../dataflow/sdk/coders/ByteArrayCoderTest.java | 144 +
.../dataflow/sdk/coders/ByteCoderTest.java | 91 +
.../sdk/coders/ByteStringCoderTest.java | 121 +
.../dataflow/sdk/coders/CoderFactoriesTest.java | 100 +
.../dataflow/sdk/coders/CoderProvidersTest.java | 71 +
.../dataflow/sdk/coders/CoderRegistryTest.java | 521 +++
.../cloud/dataflow/sdk/coders/CoderTest.java | 78 +
.../sdk/coders/CollectionCoderTest.java | 93 +
.../dataflow/sdk/coders/CustomCoderTest.java | 135 +
.../dataflow/sdk/coders/DefaultCoderTest.java | 128 +
.../dataflow/sdk/coders/DelegateCoderTest.java | 141 +
.../dataflow/sdk/coders/DoubleCoderTest.java | 96 +
.../dataflow/sdk/coders/DurationCoderTest.java | 86 +
.../dataflow/sdk/coders/EntityCoderTest.java | 108 +
.../dataflow/sdk/coders/InstantCoderTest.java | 116 +
.../dataflow/sdk/coders/IterableCoderTest.java | 109 +
.../dataflow/sdk/coders/JAXBCoderTest.java | 99 +
.../cloud/dataflow/sdk/coders/KvCoderTest.java | 118 +
.../dataflow/sdk/coders/ListCoderTest.java | 134 +
.../cloud/dataflow/sdk/coders/MapCoderTest.java | 106 +
.../dataflow/sdk/coders/NullableCoderTest.java | 132 +
.../sdk/coders/PrintBase64Encodings.java | 81 +
.../dataflow/sdk/coders/Proto2CoderTest.java | 145 +
.../sdk/coders/SerializableCoderTest.java | 222 ++
.../cloud/dataflow/sdk/coders/SetCoderTest.java | 86 +
.../dataflow/sdk/coders/StandardCoderTest.java | 176 +
.../sdk/coders/StringDelegateCoderTest.java | 72 +
.../sdk/coders/StringUtf8CoderTest.java | 80 +
.../sdk/coders/StructuralByteArrayTest.java | 39 +
.../sdk/coders/TableRowJsonCoderTest.java | 86 +
.../sdk/coders/TextualIntegerCoderTest.java | 90 +
.../dataflow/sdk/coders/VarIntCoderTest.java | 91 +
.../dataflow/sdk/coders/VarLongCoderTest.java | 94 +
.../sdk/coders/protobuf/ProtoCoderTest.java | 182 ++
.../sdk/coders/protobuf/ProtobufUtilTest.java | 195 ++
.../sdk/io/AvroIOGeneratedClassTest.java | 374 +++
.../cloud/dataflow/sdk/io/AvroIOTest.java | 226 ++
.../cloud/dataflow/sdk/io/AvroSourceTest.java | 692 ++++
.../cloud/dataflow/sdk/io/BigQueryIOTest.java | 445 +++
.../io/BoundedReadFromUnboundedSourceTest.java | 132 +
.../dataflow/sdk/io/CompressedSourceTest.java | 430 +++
.../dataflow/sdk/io/CountingInputTest.java | 125 +
.../dataflow/sdk/io/CountingSourceTest.java | 216 ++
.../cloud/dataflow/sdk/io/DatastoreIOTest.java | 631 ++++
.../dataflow/sdk/io/FileBasedSinkTest.java | 512 +++
.../dataflow/sdk/io/FileBasedSourceTest.java | 914 ++++++
.../dataflow/sdk/io/OffsetBasedSourceTest.java | 278 ++
.../cloud/dataflow/sdk/io/PubsubIOTest.java | 233 ++
.../google/cloud/dataflow/sdk/io/ReadTest.java | 144 +
.../cloud/dataflow/sdk/io/TextIOTest.java | 562 ++++
.../google/cloud/dataflow/sdk/io/WriteTest.java | 341 ++
.../cloud/dataflow/sdk/io/XmlSinkTest.java | 235 ++
.../cloud/dataflow/sdk/io/XmlSourceTest.java | 822 +++++
.../sdk/io/bigtable/BigtableIOTest.java | 688 ++++
.../range/ByteKeyRangeEstimateFractionTest.java | 69 +
.../range/ByteKeyRangeInterpolateKeyTest.java | 73 +
.../dataflow/sdk/io/range/ByteKeyRangeTest.java | 396 +++
.../sdk/io/range/ByteKeyRangeTrackerTest.java | 118 +
.../dataflow/sdk/io/range/ByteKeyTest.java | 178 ++
.../sdk/io/range/OffsetRangeTrackerTest.java | 186 ++
.../com/google/cloud/dataflow/sdk/io/user.avsc | 10 +
.../DataflowPipelineDebugOptionsTest.java | 40 +
.../options/DataflowPipelineOptionsTest.java | 91 +
.../options/DataflowProfilingOptionsTest.java | 47 +
.../DataflowWorkerLoggingOptionsTest.java | 73 +
.../dataflow/sdk/options/GcpOptionsTest.java | 123 +
.../sdk/options/GoogleApiDebugOptionsTest.java | 147 +
.../sdk/options/PipelineOptionsFactoryTest.java | 1154 +++++++
.../sdk/options/PipelineOptionsTest.java | 126 +
.../options/PipelineOptionsValidatorTest.java | 310 ++
.../sdk/options/ProxyInvocationHandlerTest.java | 691 ++++
.../AggregatorPipelineExtractorTest.java | 228 ++
.../BlockingDataflowPipelineRunnerTest.java | 301 ++
.../sdk/runners/DataflowPipelineJobTest.java | 603 ++++
.../runners/DataflowPipelineRegistrarTest.java | 72 +
.../sdk/runners/DataflowPipelineRunnerTest.java | 1368 ++++++++
.../sdk/runners/DataflowPipelineTest.java | 44 +
.../runners/DataflowPipelineTranslatorTest.java | 889 ++++++
.../runners/DirectPipelineRegistrarTest.java | 69 +
.../sdk/runners/DirectPipelineRunnerTest.java | 210 ++
.../sdk/runners/DirectPipelineTest.java | 34 +
.../sdk/runners/PipelineRunnerTest.java | 82 +
.../dataflow/sdk/runners/TransformTreeTest.java | 194 ++
.../sdk/runners/dataflow/CustomSourcesTest.java | 273 ++
.../runners/dataflow/TestCountingSource.java | 235 ++
.../BoundedReadEvaluatorFactoryTest.java | 287 ++
.../ConsumerTrackingPipelineVisitorTest.java | 233 ++
.../inprocess/FlattenEvaluatorFactoryTest.java | 136 +
.../inprocess/ForwardingPTransformTest.java | 100 +
.../GroupByKeyEvaluatorFactoryTest.java | 178 ++
.../inprocess/InMemoryWatermarkManagerTest.java | 1111 +++++++
.../runners/inprocess/InProcessBundleTest.java | 143 +
.../runners/inprocess/InProcessCreateTest.java | 199 ++
.../InProcessEvaluationContextTest.java | 544 ++++
.../inprocess/InProcessPipelineRunnerTest.java | 77 +
.../InProcessSideInputContainerTest.java | 370 +++
.../inprocess/InProcessTimerInternalsTest.java | 131 +
.../KeyedPValueTrackingVisitorTest.java | 189 ++
.../sdk/runners/inprocess/MockClock.java | 60 +
.../ParDoMultiEvaluatorFactoryTest.java | 412 +++
.../ParDoSingleEvaluatorFactoryTest.java | 310 ++
.../TransformExecutorServicesTest.java | 134 +
.../inprocess/TransformExecutorTest.java | 312 ++
.../UnboundedReadEvaluatorFactoryTest.java | 327 ++
.../inprocess/ViewEvaluatorFactoryTest.java | 96 +
.../WatermarkCallbackExecutorTest.java | 126 +
.../sdk/testing/CoderPropertiesTest.java | 214 ++
.../sdk/testing/DataflowAssertTest.java | 326 ++
.../sdk/testing/DataflowJUnitTestRunner.java | 129 +
.../dataflow/sdk/testing/ExpectedLogs.java | 306 ++
.../dataflow/sdk/testing/ExpectedLogsTest.java | 153 +
.../sdk/testing/FastNanoClockAndSleeper.java | 47 +
.../testing/FastNanoClockAndSleeperTest.java | 47 +
.../sdk/testing/PCollectionViewTesting.java | 295 ++
.../sdk/testing/ResetDateTimeProvider.java | 41 +
.../sdk/testing/ResetDateTimeProviderTest.java | 55 +
.../sdk/testing/RestoreSystemProperties.java | 51 +
.../testing/RestoreSystemPropertiesTest.java | 50 +
.../sdk/testing/SerializableMatchersTest.java | 165 +
.../sdk/testing/SystemNanoTimeSleeper.java | 68 +
.../sdk/testing/SystemNanoTimeSleeperTest.java | 53 +
.../testing/TestDataflowPipelineRunnerTest.java | 317 ++
.../dataflow/sdk/testing/TestPipelineTest.java | 93 +
.../transforms/ApproximateQuantilesTest.java | 299 ++
.../sdk/transforms/ApproximateUniqueTest.java | 291 ++
.../dataflow/sdk/transforms/CombineFnsTest.java | 413 +++
.../dataflow/sdk/transforms/CombineTest.java | 1137 +++++++
.../dataflow/sdk/transforms/CountTest.java | 121 +
.../dataflow/sdk/transforms/CreateTest.java | 240 ++
.../sdk/transforms/DoFnContextTest.java | 68 +
.../DoFnDelegatingAggregatorTest.java | 143 +
.../sdk/transforms/DoFnReflectorTest.java | 493 +++
.../cloud/dataflow/sdk/transforms/DoFnTest.java | 206 ++
.../dataflow/sdk/transforms/DoFnTesterTest.java | 253 ++
.../sdk/transforms/DoFnWithContextTest.java | 225 ++
.../dataflow/sdk/transforms/FilterTest.java | 160 +
.../sdk/transforms/FlatMapElementsTest.java | 124 +
.../dataflow/sdk/transforms/FlattenTest.java | 369 +++
.../dataflow/sdk/transforms/GroupByKeyTest.java | 438 +++
.../IntraBundleParallelizationTest.java | 250 ++
.../cloud/dataflow/sdk/transforms/KeysTest.java | 83 +
.../dataflow/sdk/transforms/KvSwapTest.java | 91 +
.../sdk/transforms/MapElementsTest.java | 134 +
.../cloud/dataflow/sdk/transforms/MaxTest.java | 66 +
.../cloud/dataflow/sdk/transforms/MeanTest.java | 72 +
.../cloud/dataflow/sdk/transforms/MinTest.java | 66 +
.../cloud/dataflow/sdk/transforms/NoOpDoFn.java | 143 +
.../dataflow/sdk/transforms/PTransformTest.java | 41 +
.../dataflow/sdk/transforms/ParDoTest.java | 1541 +++++++++
.../dataflow/sdk/transforms/PartitionTest.java | 140 +
.../sdk/transforms/RemoveDuplicatesTest.java | 131 +
.../dataflow/sdk/transforms/SampleTest.java | 260 ++
.../sdk/transforms/SimpleStatsFnsTest.java | 129 +
.../cloud/dataflow/sdk/transforms/SumTest.java | 66 +
.../cloud/dataflow/sdk/transforms/TopTest.java | 259 ++
.../dataflow/sdk/transforms/ValuesTest.java | 93 +
.../cloud/dataflow/sdk/transforms/ViewTest.java | 1548 +++++++++
.../dataflow/sdk/transforms/WithKeysTest.java | 127 +
.../sdk/transforms/WithTimestampsTest.java | 210 ++
.../transforms/display/DisplayDataMatchers.java | 98 +
.../display/DisplayDataMatchersTest.java | 81 +
.../sdk/transforms/display/DisplayDataTest.java | 633 ++++
.../transforms/join/CoGbkResultCoderTest.java | 85 +
.../sdk/transforms/join/CoGbkResultTest.java | 124 +
.../sdk/transforms/join/CoGroupByKeyTest.java | 507 +++
.../sdk/transforms/join/UnionCoderTest.java | 48 +
.../sdk/transforms/windowing/AfterAllTest.java | 151 +
.../sdk/transforms/windowing/AfterEachTest.java | 122 +
.../transforms/windowing/AfterFirstTest.java | 175 +
.../sdk/transforms/windowing/AfterPaneTest.java | 126 +
.../windowing/AfterProcessingTimeTest.java | 157 +
.../AfterSynchronizedProcessingTimeTest.java | 121 +
.../windowing/AfterWatermarkTest.java | 338 ++
.../windowing/CalendarWindowsTest.java | 260 ++
.../windowing/DefaultTriggerTest.java | 176 +
.../transforms/windowing/FixedWindowsTest.java | 124 +
.../windowing/IntervalWindowTest.java | 94 +
.../windowing/OrFinallyTriggerTest.java | 209 ++
.../sdk/transforms/windowing/PaneInfoTest.java | 75 +
.../transforms/windowing/RepeatedlyTest.java | 128 +
.../sdk/transforms/windowing/SessionsTest.java | 156 +
.../windowing/SlidingWindowsTest.java | 193 ++
.../sdk/transforms/windowing/TriggerTest.java | 117 +
.../sdk/transforms/windowing/WindowTest.java | 226 ++
.../sdk/transforms/windowing/WindowingTest.java | 244 ++
.../cloud/dataflow/sdk/util/ApiSurfaceTest.java | 187 ++
...mptAndTimeBoundedExponentialBackOffTest.java | 212 ++
.../AttemptBoundedExponentialBackOffTest.java | 85 +
.../cloud/dataflow/sdk/util/AvroUtilsTest.java | 225 ++
.../sdk/util/BatchTimerInternalsTest.java | 116 +
.../sdk/util/BigQueryTableInserterTest.java | 239 ++
.../sdk/util/BigQueryTableRowIteratorTest.java | 255 ++
.../dataflow/sdk/util/BigQueryUtilTest.java | 479 +++
...BufferedElementCountingOutputStreamTest.java | 205 ++
.../cloud/dataflow/sdk/util/CoderUtilsTest.java | 229 ++
.../dataflow/sdk/util/CombineFnUtilTest.java | 62 +
.../sdk/util/CounterAggregatorTest.java | 253 ++
.../sdk/util/DataflowPathValidatorTest.java | 92 +
.../sdk/util/ExecutableTriggerTest.java | 130 +
.../util/ExposedByteArrayInputStreamTest.java | 78 +
.../util/ExposedByteArrayOutputStreamTest.java | 245 ++
.../sdk/util/FileIOChannelFactoryTest.java | 226 ++
.../sdk/util/FinishedTriggersBitSetTest.java | 54 +
.../sdk/util/FinishedTriggersProperties.java | 109 +
.../sdk/util/FinishedTriggersSetTest.java | 60 +
.../sdk/util/GcsIOChannelFactoryTest.java | 43 +
.../cloud/dataflow/sdk/util/GcsUtilTest.java | 490 +++
.../sdk/util/GroupAlsoByWindowsProperties.java | 718 +++++
...oupAlsoByWindowsViaOutputBufferDoFnTest.java | 111 +
.../dataflow/sdk/util/IOChannelUtilsTest.java | 94 +
.../dataflow/sdk/util/InstanceBuilderTest.java | 115 +
.../IntervalBoundedExponentialBackOffTest.java | 99 +
.../sdk/util/KeyedWorkItemCoderTest.java | 61 +
.../util/LateDataDroppingDoFnRunnerTest.java | 115 +
.../sdk/util/MergingActiveWindowSetTest.java | 175 +
.../dataflow/sdk/util/MonitoringUtilTest.java | 146 +
.../sdk/util/MutationDetectorsTest.java | 148 +
.../cloud/dataflow/sdk/util/PTupleTest.java | 40 +
.../dataflow/sdk/util/PackageUtilTest.java | 482 +++
.../dataflow/sdk/util/RandomAccessDataTest.java | 205 ++
.../dataflow/sdk/util/ReduceFnRunnerTest.java | 1049 ++++++
.../cloud/dataflow/sdk/util/ReduceFnTester.java | 776 +++++
.../cloud/dataflow/sdk/util/ReshuffleTest.java | 208 ++
.../dataflow/sdk/util/ReshuffleTriggerTest.java | 58 +
.../util/RetryHttpRequestInitializerTest.java | 296 ++
.../sdk/util/SerializableUtilsTest.java | 165 +
.../cloud/dataflow/sdk/util/SerializerTest.java | 162 +
.../dataflow/sdk/util/SimpleDoFnRunnerTest.java | 86 +
.../dataflow/sdk/util/StreamUtilsTest.java | 71 +
.../dataflow/sdk/util/StringUtilsTest.java | 145 +
.../cloud/dataflow/sdk/util/StructsTest.java | 206 ++
.../cloud/dataflow/sdk/util/TimeUtilTest.java | 73 +
.../dataflow/sdk/util/TimerInternalsTest.java | 52 +
.../cloud/dataflow/sdk/util/TriggerTester.java | 585 ++++
.../sdk/util/UnownedInputStreamTest.java | 76 +
.../sdk/util/UnownedOutputStreamTest.java | 57 +
.../util/UploadIdResponseInterceptorTest.java | 99 +
.../sdk/util/UserCodeExceptionTest.java | 176 +
.../cloud/dataflow/sdk/util/VarIntTest.java | 277 ++
.../dataflow/sdk/util/WindowedValueTest.java | 57 +
.../cloud/dataflow/sdk/util/ZipFilesTest.java | 311 ++
.../sdk/util/common/CounterSetTest.java | 225 ++
.../dataflow/sdk/util/common/CounterTest.java | 589 ++++
.../sdk/util/common/CounterTestUtils.java | 56 +
.../sdk/util/common/ReflectHelpersTest.java | 126 +
.../dataflow/sdk/util/gcsfs/GcsPathTest.java | 333 ++
.../CopyOnAccessInMemoryStateInternalsTest.java | 553 ++++
.../util/state/InMemoryStateInternalsTest.java | 348 ++
.../sdk/util/state/StateNamespacesTest.java | 129 +
.../dataflow/sdk/util/state/StateTagTest.java | 173 +
.../cloud/dataflow/sdk/values/KVTest.java | 112 +
.../sdk/values/PCollectionListTest.java | 47 +
.../sdk/values/PCollectionTupleTest.java | 93 +
.../cloud/dataflow/sdk/values/PDoneTest.java | 102 +
.../cloud/dataflow/sdk/values/TupleTagTest.java | 87 +
.../dataflow/sdk/values/TypeDescriptorTest.java | 193 ++
.../dataflow/sdk/values/TypedPValueTest.java | 164 +
.../PipelineOptionsFactoryJava8Test.java | 90 +
sdks/java/java8tests/pom.xml | 184 ++
.../sdk/transforms/CombineJava8Test.java | 133 +
.../sdk/transforms/FilterJava8Test.java | 118 +
.../transforms/FlatMapElementsJava8Test.java | 84 +
.../sdk/transforms/MapElementsJava8Test.java | 77 +
.../sdk/transforms/PartitionJava8Test.java | 74 +
.../transforms/RemoveDuplicatesJava8Test.java | 98 +
.../sdk/transforms/WithKeysJava8Test.java | 73 +
.../sdk/transforms/WithTimestampsJava8Test.java | 65 +
sdks/java/javadoc/README.md | 4 +
sdks/java/javadoc/apiclient-docs/package-list | 34 +
sdks/java/javadoc/avro-docs/package-list | 30 +
sdks/java/javadoc/bq-docs/package-list | 2 +
.../java/javadoc/dataflow-sdk-docs/package-list | 11 +
sdks/java/javadoc/datastore-docs/package-list | 2 +
sdks/java/javadoc/guava-docs/package-list | 15 +
sdks/java/javadoc/hamcrest-docs/package-list | 10 +
.../jackson-annotations-docs/package-list | 1 +
.../javadoc/jackson-databind-docs/package-list | 20 +
sdks/java/javadoc/joda-docs/package-list | 7 +
sdks/java/javadoc/junit-docs/package-list | 7 +
sdks/java/javadoc/oauth-docs/package-list | 11 +
sdks/java/javadoc/overview.html | 31 +
sdks/java/maven-archetypes/examples/pom.xml | 56 +
.../META-INF/maven/archetype-metadata.xml | 29 +
.../main/resources/archetype-resources/pom.xml | 204 ++
.../src/main/java/DebuggingWordCount.java | 182 ++
.../src/main/java/MinimalWordCount.java | 115 +
.../src/main/java/WindowedWordCount.java | 262 ++
.../src/main/java/WordCount.java | 204 ++
.../java/common/DataflowExampleOptions.java | 29 +
.../main/java/common/DataflowExampleUtils.java | 398 +++
.../common/ExampleBigQueryTableOptions.java | 53 +
.../java/common/ExamplePubsubTopicOptions.java | 49 +
.../main/java/common/PubsubFileInjector.java | 153 +
.../src/test/java/DebuggingWordCountTest.java | 44 +
.../src/test/java/WordCountTest.java | 85 +
.../projects/basic/archetype.properties | 5 +
.../src/test/resources/projects/basic/goal.txt | 1 +
sdks/java/maven-archetypes/pom.xml | 41 +
sdks/java/maven-archetypes/starter/pom.xml | 57 +
.../META-INF/maven/archetype-metadata.xml | 21 +
.../main/resources/archetype-resources/pom.xml | 43 +
.../src/main/java/StarterPipeline.java | 67 +
.../projects/basic/archetype.properties | 5 +
.../src/test/resources/projects/basic/goal.txt | 1 +
.../resources/projects/basic/reference/pom.xml | 43 +
.../src/main/java/it/pkg/StarterPipeline.java | 67 +
travis/test_wordcount.sh | 4 +-
1664 files changed, 169896 insertions(+), 169901 deletions(-)
----------------------------------------------------------------------
[3/7] incubator-beam git commit: Merge branch 'master' into
temp-option
Posted by ke...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c4515687/sdks/java/core/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
----------------------------------------------------------------------
diff --cc sdks/java/core/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
index 0000000,c5f2d3f..300d5d5
mode 000000,100644..100644
--- a/sdks/java/core/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
+++ b/sdks/java/core/src/test/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunnerTest.java
@@@ -1,0 -1,1370 +1,1368 @@@
+ /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+ package com.google.cloud.dataflow.sdk.runners;
+
+ import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInGlobalWindow;
+ import static org.hamcrest.Matchers.containsInAnyOrder;
+ import static org.hamcrest.Matchers.containsString;
+ import static org.hamcrest.Matchers.instanceOf;
+ import static org.hamcrest.Matchers.startsWith;
+ import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+ import static org.junit.Assert.assertEquals;
+ import static org.junit.Assert.assertNotNull;
+ import static org.junit.Assert.assertNull;
+ import static org.junit.Assert.assertThat;
+ import static org.junit.Assert.assertTrue;
+ import static org.junit.Assert.fail;
+ import static org.mockito.Matchers.any;
+ import static org.mockito.Matchers.anyString;
+ import static org.mockito.Matchers.eq;
+ import static org.mockito.Mockito.mock;
+ import static org.mockito.Mockito.when;
+
+ import com.google.api.services.dataflow.Dataflow;
+ import com.google.api.services.dataflow.model.DataflowPackage;
+ import com.google.api.services.dataflow.model.Job;
+ import com.google.api.services.dataflow.model.ListJobsResponse;
+ import com.google.cloud.dataflow.sdk.Pipeline;
+ import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+ import com.google.cloud.dataflow.sdk.coders.BigEndianIntegerCoder;
+ import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+ import com.google.cloud.dataflow.sdk.coders.Coder;
+ import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+ import com.google.cloud.dataflow.sdk.io.AvroIO;
+ import com.google.cloud.dataflow.sdk.io.AvroSource;
+ import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+ import com.google.cloud.dataflow.sdk.io.Read;
+ import com.google.cloud.dataflow.sdk.io.TextIO;
+ import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
+ import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+ import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+ import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
+ import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsList;
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsMap;
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.BatchViewAsMultimap;
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.TransformedMap;
+ import com.google.cloud.dataflow.sdk.runners.dataflow.TestCountingSource;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
+ import com.google.cloud.dataflow.sdk.transforms.Create;
+ import com.google.cloud.dataflow.sdk.transforms.DoFnTester;
+ import com.google.cloud.dataflow.sdk.transforms.PTransform;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.IntervalWindow;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+ import com.google.cloud.dataflow.sdk.util.CoderUtils;
+ import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
+ import com.google.cloud.dataflow.sdk.util.GcsUtil;
+ import com.google.cloud.dataflow.sdk.util.NoopPathValidator;
+ import com.google.cloud.dataflow.sdk.util.TestCredential;
+ import com.google.cloud.dataflow.sdk.util.UserCodeException;
+ import com.google.cloud.dataflow.sdk.util.WindowedValue;
+ import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+ import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+ import com.google.cloud.dataflow.sdk.util.gcsfs.GcsPath;
+ import com.google.cloud.dataflow.sdk.values.KV;
+ import com.google.cloud.dataflow.sdk.values.PCollection;
+ import com.google.cloud.dataflow.sdk.values.PDone;
+ import com.google.cloud.dataflow.sdk.values.PInput;
+ import com.google.cloud.dataflow.sdk.values.PValue;
+ import com.google.cloud.dataflow.sdk.values.TimestampedValue;
+ import com.google.cloud.dataflow.sdk.values.TupleTag;
+ import com.google.cloud.dataflow.sdk.values.TupleTagList;
+ import com.google.common.collect.ImmutableList;
+ import com.google.common.collect.ImmutableMap;
+
+ import org.hamcrest.Description;
+ import org.hamcrest.Matchers;
+ import org.hamcrest.TypeSafeMatcher;
+ import org.joda.time.Instant;
+ import org.junit.Rule;
+ import org.junit.Test;
+ import org.junit.internal.matchers.ThrowableMessageMatcher;
+ import org.junit.rules.ExpectedException;
+ import org.junit.rules.TemporaryFolder;
+ import org.junit.runner.RunWith;
+ import org.junit.runners.JUnit4;
+ import org.mockito.ArgumentCaptor;
+ import org.mockito.Mockito;
+ import org.mockito.invocation.InvocationOnMock;
+ import org.mockito.stubbing.Answer;
+
+ import java.io.File;
+ import java.io.IOException;
+ import java.net.URL;
+ import java.net.URLClassLoader;
+ import java.nio.channels.FileChannel;
+ import java.nio.channels.SeekableByteChannel;
+ import java.nio.file.Files;
+ import java.nio.file.StandardOpenOption;
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.Collections;
+ import java.util.LinkedList;
+ import java.util.List;
+ import java.util.Map;
+
+ /**
+ * Tests for DataflowPipelineRunner.
+ */
+ @RunWith(JUnit4.class)
+ public class DataflowPipelineRunnerTest {
+
+ private static final String PROJECT_ID = "some-project";
+
+ @Rule
+ public TemporaryFolder tmpFolder = new TemporaryFolder();
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ // Asserts that the given Job has all expected fields set.
+ private static void assertValidJob(Job job) {
+ assertNull(job.getId());
+ assertNull(job.getCurrentState());
+ }
+
+ private DataflowPipeline buildDataflowPipeline(DataflowPipelineOptions options) {
+ options.setStableUniqueNames(CheckEnabled.ERROR);
+ DataflowPipeline p = DataflowPipeline.create(options);
+
+ p.apply(TextIO.Read.named("ReadMyFile").from("gs://bucket/object"))
+ .apply(TextIO.Write.named("WriteMyFile").to("gs://bucket/object"));
+
+ return p;
+ }
+
+ private static Dataflow buildMockDataflow(
+ final ArgumentCaptor<Job> jobCaptor) throws IOException {
+ Dataflow mockDataflowClient = mock(Dataflow.class);
+ Dataflow.Projects mockProjects = mock(Dataflow.Projects.class);
+ Dataflow.Projects.Jobs mockJobs = mock(Dataflow.Projects.Jobs.class);
+ Dataflow.Projects.Jobs.Create mockRequest =
+ mock(Dataflow.Projects.Jobs.Create.class);
+ Dataflow.Projects.Jobs.List mockList = mock(Dataflow.Projects.Jobs.List.class);
+
+ when(mockDataflowClient.projects()).thenReturn(mockProjects);
+ when(mockProjects.jobs()).thenReturn(mockJobs);
+ when(mockJobs.create(eq(PROJECT_ID), jobCaptor.capture()))
+ .thenReturn(mockRequest);
+ when(mockJobs.list(eq(PROJECT_ID))).thenReturn(mockList);
+ when(mockList.setPageToken(anyString())).thenReturn(mockList);
+ when(mockList.execute())
+ .thenReturn(new ListJobsResponse().setJobs(
+ Arrays.asList(new Job()
+ .setName("oldJobName")
+ .setId("oldJobId")
+ .setCurrentState("JOB_STATE_RUNNING"))));
+
+ Job resultJob = new Job();
+ resultJob.setId("newid");
+ when(mockRequest.execute()).thenReturn(resultJob);
+ return mockDataflowClient;
+ }
+
+ private GcsUtil buildMockGcsUtil(boolean bucketExists) throws IOException {
+ GcsUtil mockGcsUtil = mock(GcsUtil.class);
+ when(mockGcsUtil.create(any(GcsPath.class), anyString()))
+ .then(new Answer<SeekableByteChannel>() {
+ @Override
+ public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
+ return FileChannel.open(
+ Files.createTempFile("channel-", ".tmp"),
+ StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
+ }
+ });
+
+ when(mockGcsUtil.isGcsPatternSupported(anyString())).thenReturn(true);
+ when(mockGcsUtil.expand(any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
+ @Override
+ public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
+ return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
+ }
+ });
+ when(mockGcsUtil.bucketExists(any(GcsPath.class))).thenReturn(bucketExists);
+ return mockGcsUtil;
+ }
+
+ private DataflowPipelineOptions buildPipelineOptions() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+ return buildPipelineOptions(jobCaptor);
+ }
+
+ private DataflowPipelineOptions buildPipelineOptions(
+ ArgumentCaptor<Job> jobCaptor) throws IOException {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setProject(PROJECT_ID);
+ options.setTempLocation("gs://somebucket/some/path");
+ // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
+ options.setFilesToStage(new LinkedList<String>());
+ options.setDataflowClient(buildMockDataflow(jobCaptor));
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+ options.setGcpCredential(new TestCredential());
+ return options;
+ }
+
+ @Test
+ public void testRun() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+ DataflowPipeline p = buildDataflowPipeline(options);
+ DataflowPipelineJob job = p.run();
+ assertEquals("newid", job.getJobId());
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testRunReturnDifferentRequestId() throws IOException {
+ DataflowPipelineOptions options = buildPipelineOptions();
+ Dataflow mockDataflowClient = options.getDataflowClient();
+ Dataflow.Projects.Jobs.Create mockRequest = mock(Dataflow.Projects.Jobs.Create.class);
+ when(mockDataflowClient.projects().jobs().create(eq(PROJECT_ID), any(Job.class)))
+ .thenReturn(mockRequest);
+ Job resultJob = new Job();
+ resultJob.setId("newid");
+ // Return a different request id.
+ resultJob.setClientRequestId("different_request_id");
+ when(mockRequest.execute()).thenReturn(resultJob);
+
+ DataflowPipeline p = buildDataflowPipeline(options);
+ try {
+ p.run();
+ fail("Expected DataflowJobAlreadyExistsException");
+ } catch (DataflowJobAlreadyExistsException expected) {
+ assertThat(expected.getMessage(),
+ containsString("If you want to submit a second job, try again by setting a "
+ + "different name using --jobName."));
+ assertEquals(expected.getJob().getJobId(), resultJob.getId());
+ }
+ }
+
+ @Test
+ public void testUpdate() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+ options.setUpdate(true);
+ options.setJobName("oldJobName");
+ DataflowPipeline p = buildDataflowPipeline(options);
+ DataflowPipelineJob job = p.run();
+ assertEquals("newid", job.getJobId());
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testUpdateNonExistentPipeline() throws IOException {
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Could not find running job named badJobName");
+
+ DataflowPipelineOptions options = buildPipelineOptions();
+ options.setUpdate(true);
+ options.setJobName("badJobName");
+ DataflowPipeline p = buildDataflowPipeline(options);
+ p.run();
+ }
+
+ @Test
+ public void testUpdateAlreadyUpdatedPipeline() throws IOException {
+ DataflowPipelineOptions options = buildPipelineOptions();
+ options.setUpdate(true);
+ options.setJobName("oldJobName");
+ Dataflow mockDataflowClient = options.getDataflowClient();
+ Dataflow.Projects.Jobs.Create mockRequest = mock(Dataflow.Projects.Jobs.Create.class);
+ when(mockDataflowClient.projects().jobs().create(eq(PROJECT_ID), any(Job.class)))
+ .thenReturn(mockRequest);
+ final Job resultJob = new Job();
+ resultJob.setId("newid");
+ // Return a different request id.
+ resultJob.setClientRequestId("different_request_id");
+ when(mockRequest.execute()).thenReturn(resultJob);
+
+ DataflowPipeline p = buildDataflowPipeline(options);
+
+ thrown.expect(DataflowJobAlreadyUpdatedException.class);
+ thrown.expect(new TypeSafeMatcher<DataflowJobAlreadyUpdatedException>() {
+ @Override
+ public void describeTo(Description description) {
+ description.appendText("Expected job ID: " + resultJob.getId());
+ }
+
+ @Override
+ protected boolean matchesSafely(DataflowJobAlreadyUpdatedException item) {
+ return resultJob.getId().equals(item.getJob().getJobId());
+ }
+ });
+ thrown.expectMessage("The job named oldjobname with id: oldJobId has already been updated "
+ + "into job id: newid and cannot be updated again.");
+ p.run();
+ }
+
+ @Test
+ public void testRunWithFiles() throws IOException {
+ // Test that the function DataflowPipelineRunner.stageFiles works as
+ // expected.
+ GcsUtil mockGcsUtil = buildMockGcsUtil(true /* bucket exists */);
+ final String gcsStaging = "gs://somebucket/some/path";
+ final String gcsTemp = "gs://somebucket/some/temp/path";
+ final String cloudDataflowDataset = "somedataset";
+
+ // Create some temporary files.
+ File temp1 = File.createTempFile("DataflowPipelineRunnerTest", "txt");
+ temp1.deleteOnExit();
+ File temp2 = File.createTempFile("DataflowPipelineRunnerTest2", "txt");
+ temp2.deleteOnExit();
+
+ String overridePackageName = "alias.txt";
+
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setFilesToStage(ImmutableList.of(
+ temp1.getAbsolutePath(),
+ overridePackageName + "=" + temp2.getAbsolutePath()));
+ options.setStagingLocation(gcsStaging);
+ options.setTempLocation(gcsTemp);
+ options.setTempDatasetId(cloudDataflowDataset);
+ options.setProject(PROJECT_ID);
+ options.setJobName("job");
+ options.setDataflowClient(buildMockDataflow(jobCaptor));
+ options.setGcsUtil(mockGcsUtil);
+ options.setGcpCredential(new TestCredential());
+
+ DataflowPipeline p = buildDataflowPipeline(options);
+
+ DataflowPipelineJob job = p.run();
+ assertEquals("newid", job.getJobId());
+
+ Job workflowJob = jobCaptor.getValue();
+ assertValidJob(workflowJob);
+
+ assertEquals(
+ 2,
+ workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
+ DataflowPackage workflowPackage1 =
+ workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
+ assertThat(workflowPackage1.getName(), startsWith(temp1.getName()));
+ DataflowPackage workflowPackage2 =
+ workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
+ assertEquals(overridePackageName, workflowPackage2.getName());
+
+ assertEquals(
+ "storage.googleapis.com/somebucket/some/temp/path",
+ workflowJob.getEnvironment().getTempStoragePrefix());
+ assertEquals(
+ cloudDataflowDataset,
+ workflowJob.getEnvironment().getDataset());
+ assertEquals(
+ DataflowReleaseInfo.getReleaseInfo().getName(),
+ workflowJob.getEnvironment().getUserAgent().get("name"));
+ assertEquals(
+ DataflowReleaseInfo.getReleaseInfo().getVersion(),
+ workflowJob.getEnvironment().getUserAgent().get("version"));
+ }
+
+ @Test
+ public void runWithDefaultFilesToStage() throws Exception {
+ DataflowPipelineOptions options = buildPipelineOptions();
+ options.setFilesToStage(null);
+ DataflowPipelineRunner.fromOptions(options);
+ assertTrue(!options.getFilesToStage().isEmpty());
+ }
+
+ @Test
+ public void detectClassPathResourceWithFileResources() throws Exception {
+ File file = tmpFolder.newFile("file");
+ File file2 = tmpFolder.newFile("file2");
+ URLClassLoader classLoader = new URLClassLoader(new URL[]{
+ file.toURI().toURL(),
+ file2.toURI().toURL()
+ });
+
+ assertEquals(ImmutableList.of(file.getAbsolutePath(), file2.getAbsolutePath()),
+ DataflowPipelineRunner.detectClassPathResourcesToStage(classLoader));
+ }
+
+ @Test
+ public void detectClassPathResourcesWithUnsupportedClassLoader() {
+ ClassLoader mockClassLoader = Mockito.mock(ClassLoader.class);
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Unable to use ClassLoader to detect classpath elements.");
+
+ DataflowPipelineRunner.detectClassPathResourcesToStage(mockClassLoader);
+ }
+
+ @Test
+ public void detectClassPathResourceWithNonFileResources() throws Exception {
+ String url = "http://www.google.com/all-the-secrets.jar";
+ URLClassLoader classLoader = new URLClassLoader(new URL[]{
+ new URL(url)
+ });
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Unable to convert url (" + url + ") to file.");
+
+ DataflowPipelineRunner.detectClassPathResourcesToStage(classLoader);
+ }
+
+ @Test
+ public void testGcsStagingLocationInitialization() throws Exception {
+ // Test that the staging location is initialized correctly.
+ String gcsTemp = "gs://somebucket/some/temp/path";
+
+ // Set temp location (required), and check that staging location is set.
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setTempLocation(gcsTemp);
+ options.setProject(PROJECT_ID);
+ options.setGcpCredential(new TestCredential());
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+ DataflowPipelineRunner.fromOptions(options);
+
+ assertNotNull(options.getStagingLocation());
+ }
+
+ @Test
+ public void testNonGcsFilePathInReadFailure() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+ p.apply(TextIO.Read.named("ReadMyNonGcsFile").from(tmpFolder.newFile().getPath()));
+
+ thrown.expectCause(Matchers.allOf(
+ instanceOf(IllegalArgumentException.class),
+ ThrowableMessageMatcher.hasMessage(
+ containsString("expected a valid 'gs://' path but was given"))));
+ p.run();
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testNonGcsFilePathInWriteFailure() throws IOException {
+ Pipeline p = buildDataflowPipeline(buildPipelineOptions());
+ PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
+ pc.apply(TextIO.Write.named("WriteMyNonGcsFile").to("/tmp/file"));
+ }
+
+ @Test
+ public void testMultiSlashGcsFileReadPath() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ Pipeline p = buildDataflowPipeline(buildPipelineOptions(jobCaptor));
+ p.apply(TextIO.Read.named("ReadInvalidGcsFile")
+ .from("gs://bucket/tmp//file"));
+
+ thrown.expectCause(Matchers.allOf(
+ instanceOf(IllegalArgumentException.class),
+ ThrowableMessageMatcher.hasMessage(containsString("consecutive slashes"))));
+ p.run();
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testMultiSlashGcsFileWritePath() throws IOException {
+ Pipeline p = buildDataflowPipeline(buildPipelineOptions());
+ PCollection<String> pc = p.apply(TextIO.Read.named("ReadMyGcsFile").from("gs://bucket/object"));
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("consecutive slashes");
+ pc.apply(TextIO.Write.named("WriteInvalidGcsFile").to("gs://bucket/tmp//file"));
+ }
+
+ @Test
+ public void testInvalidTempLocation() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+ options.setTempLocation("file://temp/location");
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage(containsString("expected a valid 'gs://' path but was given"));
+ DataflowPipelineRunner.fromOptions(options);
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testInvalidStagingLocation() throws IOException {
+ DataflowPipelineOptions options = buildPipelineOptions();
+ options.setStagingLocation("file://my/staging/location");
+ try {
+ DataflowPipelineRunner.fromOptions(options);
+ fail("fromOptions should have failed");
+ } catch (IllegalArgumentException e) {
+ assertThat(e.getMessage(), containsString("expected a valid 'gs://' path but was given"));
+ }
+ options.setStagingLocation("my/staging/location");
+ try {
+ DataflowPipelineRunner.fromOptions(options);
+ fail("fromOptions should have failed");
+ } catch (IllegalArgumentException e) {
+ assertThat(e.getMessage(), containsString("expected a valid 'gs://' path but was given"));
+ }
+ }
+
+ @Test
+ public void testNonExistentTempLocation() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ GcsUtil mockGcsUtil = buildMockGcsUtil(false /* bucket exists */);
+ DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+ options.setGcsUtil(mockGcsUtil);
+ options.setTempLocation("gs://non-existent-bucket/location");
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage(containsString(
+ "Output path does not exist or is not writeable: gs://non-existent-bucket/location"));
+ DataflowPipelineRunner.fromOptions(options);
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testNonExistentStagingLocation() throws IOException {
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ GcsUtil mockGcsUtil = buildMockGcsUtil(false /* bucket exists */);
+ DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+ options.setGcsUtil(mockGcsUtil);
+ options.setStagingLocation("gs://non-existent-bucket/location");
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage(containsString(
+ "Output path does not exist or is not writeable: gs://non-existent-bucket/location"));
+ DataflowPipelineRunner.fromOptions(options);
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testNoProjectFails() {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+
+ options.setRunner(DataflowPipelineRunner.class);
+ // Explicitly set to null to prevent the default instance factory from reading credentials
+ // from a user's environment, causing this test to fail.
+ options.setProject(null);
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Project id");
+ thrown.expectMessage("when running a Dataflow in the cloud");
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testProjectId() throws IOException {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setProject("foo-12345");
+
+ options.setStagingLocation("gs://spam/ham/eggs");
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+ options.setGcpCredential(new TestCredential());
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testProjectPrefix() throws IOException {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setProject("google.com:some-project-12345");
+
+ options.setStagingLocation("gs://spam/ham/eggs");
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+ options.setGcpCredential(new TestCredential());
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testProjectNumber() throws IOException {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setProject("12345");
+
+ options.setStagingLocation("gs://spam/ham/eggs");
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Project ID");
+ thrown.expectMessage("project number");
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testProjectDescription() throws IOException {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setProject("some project");
+
+ options.setStagingLocation("gs://spam/ham/eggs");
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Project ID");
+ thrown.expectMessage("project description");
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testInvalidNumberOfWorkerHarnessThreads() throws IOException {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setProject("foo-12345");
+
+ options.setStagingLocation("gs://spam/ham/eggs");
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+ options.as(DataflowPipelineDebugOptions.class).setNumberOfWorkerHarnessThreads(-1);
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Number of worker harness threads");
+ thrown.expectMessage("Please make sure the value is non-negative.");
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testNoStagingLocationAndNoTempLocationFails() {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setProject("foo-project");
+
+ thrown.expect(IllegalArgumentException.class);
- thrown.expectMessage("Missing required value for group");
- thrown.expectMessage(DataflowPipelineOptions.DATAFLOW_STORAGE_LOCATION);
- thrown.expectMessage("getStagingLocation");
- thrown.expectMessage("getTempLocation");
++ thrown.expectMessage(
++ "Missing required value: at least one of tempLocation or stagingLocation must be set.");
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testStagingLocationAndNoTempLocationSucceeds() throws Exception {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setGcpCredential(new TestCredential());
+ options.setProject("foo-project");
+ options.setStagingLocation("gs://spam/ham/eggs");
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testTempLocationAndNoStagingLocationSucceeds() throws Exception {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setGcpCredential(new TestCredential());
+ options.setProject("foo-project");
+ options.setTempLocation("gs://spam/ham/eggs");
+ options.setGcsUtil(buildMockGcsUtil(true /* bucket exists */));
+
+ DataflowPipelineRunner.fromOptions(options);
+ }
+
+ @Test
+ public void testInvalidJobName() throws IOException {
+ List<String> invalidNames = Arrays.asList(
+ "invalid_name",
+ "0invalid",
+ "invalid-");
+ List<String> expectedReason = Arrays.asList(
+ "JobName invalid",
+ "JobName invalid",
+ "JobName invalid");
+
+ for (int i = 0; i < invalidNames.size(); ++i) {
+ DataflowPipelineOptions options = buildPipelineOptions();
+ options.setJobName(invalidNames.get(i));
+
+ try {
+ DataflowPipelineRunner.fromOptions(options);
+ fail("Expected IllegalArgumentException for jobName "
+ + options.getJobName());
+ } catch (IllegalArgumentException e) {
+ assertThat(e.getMessage(),
+ containsString(expectedReason.get(i)));
+ }
+ }
+ }
+
+ @Test
+ public void testValidJobName() throws IOException {
+ List<String> names = Arrays.asList("ok", "Ok", "A-Ok", "ok-123",
+ "this-one-is-fairly-long-01234567890123456789");
+
+ for (String name : names) {
+ DataflowPipelineOptions options = buildPipelineOptions();
+ options.setJobName(name);
+
+ DataflowPipelineRunner runner = DataflowPipelineRunner
+ .fromOptions(options);
+ assertNotNull(runner);
+ }
+ }
+
+ /**
+ * A fake PTransform for testing.
+ */
+ public static class TestTransform
+ extends PTransform<PCollection<Integer>, PCollection<Integer>> {
+ public boolean translated = false;
+
+ @Override
+ public PCollection<Integer> apply(PCollection<Integer> input) {
+ return PCollection.<Integer>createPrimitiveOutputInternal(
+ input.getPipeline(),
+ WindowingStrategy.globalDefault(),
+ input.isBounded());
+ }
+
+ @Override
+ protected Coder<?> getDefaultOutputCoder(PCollection<Integer> input) {
+ return input.getCoder();
+ }
+ }
+
+ @Test
+ public void testTransformTranslatorMissing() throws IOException {
+ // Test that we throw if we don't provide a translation.
+ ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
+
+ DataflowPipelineOptions options = buildPipelineOptions(jobCaptor);
+ DataflowPipeline p = DataflowPipeline.create(options);
+
+ p.apply(Create.of(Arrays.asList(1, 2, 3)))
+ .apply(new TestTransform());
+
+ thrown.expect(IllegalStateException.class);
+ thrown.expectMessage(Matchers.containsString("no translator registered"));
+ DataflowPipelineTranslator.fromOptions(options)
+ .translate(p, p.getRunner(), Collections.<DataflowPackage>emptyList());
+ assertValidJob(jobCaptor.getValue());
+ }
+
+ @Test
+ public void testTransformTranslator() throws IOException {
+ // Test that we can provide a custom translation
+ DataflowPipelineOptions options = buildPipelineOptions();
+ DataflowPipeline p = DataflowPipeline.create(options);
+ TestTransform transform = new TestTransform();
+
+ p.apply(Create.of(Arrays.asList(1, 2, 3)).withCoder(BigEndianIntegerCoder.of()))
+ .apply(transform);
+
+ DataflowPipelineTranslator translator = DataflowPipelineRunner
+ .fromOptions(options).getTranslator();
+
+ DataflowPipelineTranslator.registerTransformTranslator(
+ TestTransform.class,
+ new DataflowPipelineTranslator.TransformTranslator<TestTransform>() {
+ @SuppressWarnings("unchecked")
+ @Override
+ public void translate(
+ TestTransform transform,
+ DataflowPipelineTranslator.TranslationContext context) {
+ transform.translated = true;
+
+ // Note: This is about the minimum needed to fake out a
+ // translation. This obviously isn't a real translation.
+ context.addStep(transform, "TestTranslate");
+ context.addOutput("output", context.getOutput(transform));
+ }
+ });
+
+ translator.translate(
+ p, p.getRunner(), Collections.<DataflowPackage>emptyList());
+ assertTrue(transform.translated);
+ }
+
+ /** Records all the composite transforms visited within the Pipeline. */
+ private static class CompositeTransformRecorder implements PipelineVisitor {
+ private List<PTransform<?, ?>> transforms = new ArrayList<>();
+
+ @Override
+ public void enterCompositeTransform(TransformTreeNode node) {
+ if (node.getTransform() != null) {
+ transforms.add(node.getTransform());
+ }
+ }
+
+ @Override
+ public void leaveCompositeTransform(TransformTreeNode node) {
+ }
+
+ @Override
+ public void visitTransform(TransformTreeNode node) {
+ }
+
+ @Override
+ public void visitValue(PValue value, TransformTreeNode producer) {
+ }
+
+ public List<PTransform<?, ?>> getCompositeTransforms() {
+ return transforms;
+ }
+ }
+
+ @Test
+ public void testApplyIsScopedToExactClass() throws IOException {
+ DataflowPipelineOptions options = buildPipelineOptions();
+ DataflowPipeline p = DataflowPipeline.create(options);
+
+ Create.TimestampedValues<String> transform =
+ Create.timestamped(Arrays.asList(TimestampedValue.of("TestString", Instant.now())));
+ p.apply(transform);
+
+ CompositeTransformRecorder recorder = new CompositeTransformRecorder();
+ p.traverseTopologically(recorder);
+
+ assertThat("Expected to have seen CreateTimestamped composite transform.",
+ recorder.getCompositeTransforms(),
+ Matchers.<PTransform<?, ?>>contains(transform));
+ }
+
+ @Test
+ public void testToString() {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setJobName("TestJobName");
+ options.setProject("test-project");
+ options.setTempLocation("gs://test/temp/location");
+ options.setGcpCredential(new TestCredential());
+ options.setPathValidatorClass(NoopPathValidator.class);
+ assertEquals("DataflowPipelineRunner#TestJobName",
+ DataflowPipelineRunner.fromOptions(options).toString());
+ }
+
+ private static PipelineOptions makeOptions(boolean streaming) {
+ DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
+ options.setRunner(DataflowPipelineRunner.class);
+ options.setStreaming(streaming);
+ options.setJobName("TestJobName");
+ options.setProject("test-project");
+ options.setTempLocation("gs://test/temp/location");
+ options.setGcpCredential(new TestCredential());
+ options.setPathValidatorClass(NoopPathValidator.class);
+ return options;
+ }
+
+ private void testUnsupportedSource(PTransform<PInput, ?> source, String name, boolean streaming)
+ throws Exception {
+ String mode = streaming ? "streaming" : "batch";
+ thrown.expect(UnsupportedOperationException.class);
+ thrown.expectMessage(
+ "The DataflowPipelineRunner in " + mode + " mode does not support " + name);
+
+ Pipeline p = Pipeline.create(makeOptions(streaming));
+ p.apply(source);
+ p.run();
+ }
+
+ @Test
+ public void testBoundedSourceUnsupportedInStreaming() throws Exception {
+ testUnsupportedSource(
+ AvroSource.readFromFileWithClass("foo", String.class), "Read.Bounded", true);
+ }
+
+ @Test
+ public void testBigQueryIOSourceUnsupportedInStreaming() throws Exception {
+ testUnsupportedSource(
+ BigQueryIO.Read.from("project:bar.baz").withoutValidation(), "BigQueryIO.Read", true);
+ }
+
+ @Test
+ public void testAvroIOSourceUnsupportedInStreaming() throws Exception {
+ testUnsupportedSource(
+ AvroIO.Read.from("foo"), "AvroIO.Read", true);
+ }
+
+ @Test
+ public void testTextIOSourceUnsupportedInStreaming() throws Exception {
+ testUnsupportedSource(TextIO.Read.from("foo"), "TextIO.Read", true);
+ }
+
+ @Test
+ public void testReadBoundedSourceUnsupportedInStreaming() throws Exception {
+ testUnsupportedSource(Read.from(AvroSource.from("/tmp/test")), "Read.Bounded", true);
+ }
+
+ @Test
+ public void testReadUnboundedUnsupportedInBatch() throws Exception {
+ testUnsupportedSource(Read.from(new TestCountingSource(1)), "Read.Unbounded", false);
+ }
+
+ private void testUnsupportedSink(
+ PTransform<PCollection<String>, PDone> sink, String name, boolean streaming)
+ throws Exception {
+ thrown.expect(UnsupportedOperationException.class);
+ thrown.expectMessage(
+ "The DataflowPipelineRunner in streaming mode does not support " + name);
+
+ Pipeline p = Pipeline.create(makeOptions(streaming));
+ p.apply(Create.of("foo")).apply(sink);
+ p.run();
+ }
+
+ @Test
+ public void testAvroIOSinkUnsupportedInStreaming() throws Exception {
+ testUnsupportedSink(AvroIO.Write.to("foo").withSchema(String.class), "AvroIO.Write", true);
+ }
+
+ @Test
+ public void testTextIOSinkUnsupportedInStreaming() throws Exception {
+ testUnsupportedSink(TextIO.Write.to("foo"), "TextIO.Write", true);
+ }
+
+ @Test
+ public void testBatchViewAsListToIsmRecordForGlobalWindow() throws Exception {
+ DoFnTester<String, IsmRecord<WindowedValue<String>>> doFnTester =
+ DoFnTester.of(new BatchViewAsList.ToIsmRecordForGlobalWindowDoFn<String>());
+
+ // The order of the output elements is important relative to processing order
+ assertThat(doFnTester.processBatch(ImmutableList.of("a", "b", "c")), contains(
+ IsmRecord.of(ImmutableList.of(GlobalWindow.INSTANCE, 0L), valueInGlobalWindow("a")),
+ IsmRecord.of(ImmutableList.of(GlobalWindow.INSTANCE, 1L), valueInGlobalWindow("b")),
+ IsmRecord.of(ImmutableList.of(GlobalWindow.INSTANCE, 2L), valueInGlobalWindow("c"))));
+ }
+
+ @Test
+ public void testBatchViewAsListToIsmRecordForNonGlobalWindow() throws Exception {
+ DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<Long>>>>,
+ IsmRecord<WindowedValue<Long>>> doFnTester =
+ DoFnTester.of(
+ new BatchViewAsList.ToIsmRecordForNonGlobalWindowDoFn<Long, IntervalWindow>(
+ IntervalWindow.getCoder()));
+
+ IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+ IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+ IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+ Iterable<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<Long>>>>> inputElements =
+ ImmutableList.of(
+ KV.of(1, (Iterable<KV<IntervalWindow, WindowedValue<Long>>>) ImmutableList.of(
+ KV.of(
+ windowA, WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+ KV.of(
+ windowA, WindowedValue.of(111L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+ KV.of(
+ windowA, WindowedValue.of(112L, new Instant(4), windowA, PaneInfo.NO_FIRING)),
+ KV.of(
+ windowB, WindowedValue.of(120L, new Instant(12), windowB, PaneInfo.NO_FIRING)),
+ KV.of(
+ windowB, WindowedValue.of(121L, new Instant(14), windowB, PaneInfo.NO_FIRING))
+ )),
+ KV.of(2, (Iterable<KV<IntervalWindow, WindowedValue<Long>>>) ImmutableList.of(
+ KV.of(
+ windowC, WindowedValue.of(210L, new Instant(25), windowC, PaneInfo.NO_FIRING))
+ )));
+
+ // The order of the output elements is important relative to processing order
+ assertThat(doFnTester.processBatch(inputElements), contains(
+ IsmRecord.of(ImmutableList.of(windowA, 0L),
+ WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+ IsmRecord.of(ImmutableList.of(windowA, 1L),
+ WindowedValue.of(111L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+ IsmRecord.of(ImmutableList.of(windowA, 2L),
+ WindowedValue.of(112L, new Instant(4), windowA, PaneInfo.NO_FIRING)),
+ IsmRecord.of(ImmutableList.of(windowB, 0L),
+ WindowedValue.of(120L, new Instant(12), windowB, PaneInfo.NO_FIRING)),
+ IsmRecord.of(ImmutableList.of(windowB, 1L),
+ WindowedValue.of(121L, new Instant(14), windowB, PaneInfo.NO_FIRING)),
+ IsmRecord.of(ImmutableList.of(windowC, 0L),
+ WindowedValue.of(210L, new Instant(25), windowC, PaneInfo.NO_FIRING))));
+ }
+
+ @Test
+ public void testToIsmRecordForMapLikeDoFn() throws Exception {
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+ Coder<Long> keyCoder = VarLongCoder.of();
+ Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+ IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+ 1,
+ 2,
+ ImmutableList.<Coder<?>>of(
+ MetadataKeyCoder.of(keyCoder),
+ IntervalWindow.getCoder(),
+ BigEndianLongCoder.of()),
+ FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+ DoFnTester<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>,
+ IsmRecord<WindowedValue<Long>>> doFnTester =
+ DoFnTester.of(new BatchViewAsMultimap.ToIsmRecordForMapLikeDoFn<Long, Long, IntervalWindow>(
+ outputForSizeTag,
+ outputForEntrySetTag,
+ windowCoder,
+ keyCoder,
+ ismCoder,
+ false /* unique keys */));
+ doFnTester.setSideOutputTags(TupleTagList.of(
+ ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+ IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+ IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+ IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+ Iterable<KV<Integer,
+ Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>> inputElements =
+ ImmutableList.of(
+ KV.of(1, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(
+ KV.of(KV.of(1L, windowA),
+ WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+ // same window same key as to previous
+ KV.of(KV.of(1L, windowA),
+ WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)),
+ // same window different key as to previous
+ KV.of(KV.of(2L, windowA),
+ WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+ // different window same key as to previous
+ KV.of(KV.of(2L, windowB),
+ WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)),
+ // different window and different key as to previous
+ KV.of(KV.of(3L, windowB),
+ WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)))),
+ KV.of(2, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(
+ // different shard
+ KV.of(KV.of(4L, windowC),
+ WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING)))));
+
+ // The order of the output elements is important relative to processing order
+ assertThat(doFnTester.processBatch(inputElements), contains(
+ IsmRecord.of(
+ ImmutableList.of(1L, windowA, 0L),
+ WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+ IsmRecord.of(
+ ImmutableList.of(1L, windowA, 1L),
+ WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)),
+ IsmRecord.of(
+ ImmutableList.of(2L, windowA, 0L),
+ WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)),
+ IsmRecord.of(
+ ImmutableList.of(2L, windowB, 0L),
+ WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)),
+ IsmRecord.of(
+ ImmutableList.of(3L, windowB, 0L),
+ WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)),
+ IsmRecord.of(
+ ImmutableList.of(4L, windowC, 0L),
+ WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING))));
+
+ // Verify the number of unique keys per window.
+ assertThat(doFnTester.takeSideOutputElements(outputForSizeTag), contains(
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)),
+ KV.of(windowA, 2L)),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+ KV.of(windowB, 2L)),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)),
+ KV.of(windowC, 1L))
+ ));
+
+ // Verify the output for the unique keys.
+ assertThat(doFnTester.takeSideOutputElements(outputForEntrySetTag), contains(
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)),
+ KV.of(windowA, 1L)),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)),
+ KV.of(windowA, 2L)),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+ KV.of(windowB, 2L)),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+ KV.of(windowB, 3L)),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)),
+ KV.of(windowC, 4L))
+ ));
+ }
+
+ @Test
+ public void testToIsmRecordForMapLikeDoFnWithoutUniqueKeysThrowsException() throws Exception {
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+ Coder<Long> keyCoder = VarLongCoder.of();
+ Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+ IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+ 1,
+ 2,
+ ImmutableList.<Coder<?>>of(
+ MetadataKeyCoder.of(keyCoder),
+ IntervalWindow.getCoder(),
+ BigEndianLongCoder.of()),
+ FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+ DoFnTester<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>,
+ IsmRecord<WindowedValue<Long>>> doFnTester =
+ DoFnTester.of(new BatchViewAsMultimap.ToIsmRecordForMapLikeDoFn<Long, Long, IntervalWindow>(
+ outputForSizeTag,
+ outputForEntrySetTag,
+ windowCoder,
+ keyCoder,
+ ismCoder,
+ true /* unique keys */));
+ doFnTester.setSideOutputTags(TupleTagList.of(
+ ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+ IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+
+ Iterable<KV<Integer,
+ Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>> inputElements =
+ ImmutableList.of(
+ KV.of(1, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(
+ KV.of(KV.of(1L, windowA),
+ WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)),
+ // same window same key as to previous
+ KV.of(KV.of(1L, windowA),
+ WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)))));
+
+ try {
+ doFnTester.processBatch(inputElements);
+ fail("Expected UserCodeException");
+ } catch (UserCodeException e) {
+ assertTrue(e.getCause() instanceof IllegalStateException);
+ IllegalStateException rootCause = (IllegalStateException) e.getCause();
+ assertThat(rootCause.getMessage(), containsString("Unique keys are expected but found key"));
+ }
+ }
+
+ @Test
+ public void testToIsmMetadataRecordForSizeDoFn() throws Exception {
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+ Coder<Long> keyCoder = VarLongCoder.of();
+ Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+ IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+ 1,
+ 2,
+ ImmutableList.<Coder<?>>of(
+ MetadataKeyCoder.of(keyCoder),
+ IntervalWindow.getCoder(),
+ BigEndianLongCoder.of()),
+ FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+ DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, Long>>>,
+ IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(
+ new BatchViewAsMultimap.ToIsmMetadataRecordForSizeDoFn<Long, Long, IntervalWindow>(
+ windowCoder));
+ doFnTester.setSideOutputTags(TupleTagList.of(
+ ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+ IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+ IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+ IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+ Iterable<KV<Integer, Iterable<KV<IntervalWindow, Long>>>> inputElements =
+ ImmutableList.of(
+ KV.of(1,
+ (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+ KV.of(windowA, 2L),
+ KV.of(windowA, 3L),
+ KV.of(windowB, 7L))),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+ (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+ KV.of(windowC, 9L))));
+
+ // The order of the output elements is important relative to processing order
+ assertThat(doFnTester.processBatch(inputElements), contains(
+ IsmRecord.<WindowedValue<Long>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 0L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), 5L)),
+ IsmRecord.<WindowedValue<Long>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), windowB, 0L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), 7L)),
+ IsmRecord.<WindowedValue<Long>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), windowC, 0L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), 9L))
+ ));
+ }
+
+ @Test
+ public void testToIsmMetadataRecordForKeyDoFn() throws Exception {
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
+ TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
+
+ Coder<Long> keyCoder = VarLongCoder.of();
+ Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+ IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(
+ 1,
+ 2,
+ ImmutableList.<Coder<?>>of(
+ MetadataKeyCoder.of(keyCoder),
+ IntervalWindow.getCoder(),
+ BigEndianLongCoder.of()),
+ FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
+
+ DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, Long>>>,
+ IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(
+ new BatchViewAsMultimap.ToIsmMetadataRecordForKeyDoFn<Long, Long, IntervalWindow>(
+ keyCoder, windowCoder));
+ doFnTester.setSideOutputTags(TupleTagList.of(
+ ImmutableList.<TupleTag<?>>of(outputForSizeTag, outputForEntrySetTag)));
+
+ IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+ IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+ IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+ Iterable<KV<Integer, Iterable<KV<IntervalWindow, Long>>>> inputElements =
+ ImmutableList.of(
+ KV.of(1,
+ (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+ KV.of(windowA, 2L),
+ // same window as previous
+ KV.of(windowA, 3L),
+ // different window as previous
+ KV.of(windowB, 3L))),
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)),
+ (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(
+ KV.of(windowC, 3L))));
+
+ // The order of the output elements is important relative to processing order
+ assertThat(doFnTester.processBatch(inputElements), contains(
+ IsmRecord.<WindowedValue<Long>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 1L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), 2L)),
+ IsmRecord.<WindowedValue<Long>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 2L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L)),
+ IsmRecord.<WindowedValue<Long>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), windowB, 1L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L)),
+ IsmRecord.<WindowedValue<Long>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), windowC, 1L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L))
+ ));
+ }
+
+ @Test
+ public void testToMapDoFn() throws Exception {
+ Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+ DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>,
+ IsmRecord<WindowedValue<TransformedMap<Long,
+ WindowedValue<Long>,
+ Long>>>> doFnTester =
+ DoFnTester.of(new BatchViewAsMap.ToMapDoFn<Long, Long, IntervalWindow>(windowCoder));
+
+
+ IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+ IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+ IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+ Iterable<KV<Integer,
+ Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>> inputElements =
+ ImmutableList.of(
+ KV.of(1,
+ (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+ KV.of(windowA, WindowedValue.of(
+ KV.of(1L, 11L), new Instant(3), windowA, PaneInfo.NO_FIRING)),
+ KV.of(windowA, WindowedValue.of(
+ KV.of(2L, 21L), new Instant(7), windowA, PaneInfo.NO_FIRING)),
+ KV.of(windowB, WindowedValue.of(
+ KV.of(2L, 21L), new Instant(13), windowB, PaneInfo.NO_FIRING)),
+ KV.of(windowB, WindowedValue.of(
+ KV.of(3L, 31L), new Instant(15), windowB, PaneInfo.NO_FIRING)))),
+ KV.of(2,
+ (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+ KV.of(windowC, WindowedValue.of(
+ KV.of(4L, 41L), new Instant(25), windowC, PaneInfo.NO_FIRING)))));
+
+ // The order of the output elements is important relative to processing order
+ List<IsmRecord<WindowedValue<TransformedMap<Long,
+ WindowedValue<Long>,
+ Long>>>> output =
+ doFnTester.processBatch(inputElements);
+ assertEquals(3, output.size());
+ Map<Long, Long> outputMap;
+
+ outputMap = output.get(0).getValue().getValue();
+ assertEquals(2, outputMap.size());
+ assertEquals(ImmutableMap.of(1L, 11L, 2L, 21L), outputMap);
+
+ outputMap = output.get(1).getValue().getValue();
+ assertEquals(2, outputMap.size());
+ assertEquals(ImmutableMap.of(2L, 21L, 3L, 31L), outputMap);
+
+ outputMap = output.get(2).getValue().getValue();
+ assertEquals(1, outputMap.size());
+ assertEquals(ImmutableMap.of(4L, 41L), outputMap);
+ }
+
+ @Test
+ public void testToMultimapDoFn() throws Exception {
+ Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
+
+ DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>,
+ IsmRecord<WindowedValue<TransformedMap<Long,
+ Iterable<WindowedValue<Long>>,
+ Iterable<Long>>>>> doFnTester =
+ DoFnTester.of(
+ new BatchViewAsMultimap.ToMultimapDoFn<Long, Long, IntervalWindow>(windowCoder));
+
+
+ IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
+ IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
+ IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
+
+ Iterable<KV<Integer,
+ Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>> inputElements =
+ ImmutableList.of(
+ KV.of(1,
+ (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+ KV.of(windowA, WindowedValue.of(
+ KV.of(1L, 11L), new Instant(3), windowA, PaneInfo.NO_FIRING)),
+ KV.of(windowA, WindowedValue.of(
+ KV.of(1L, 12L), new Instant(5), windowA, PaneInfo.NO_FIRING)),
+ KV.of(windowA, WindowedValue.of(
+ KV.of(2L, 21L), new Instant(7), windowA, PaneInfo.NO_FIRING)),
+ KV.of(windowB, WindowedValue.of(
+ KV.of(2L, 21L), new Instant(13), windowB, PaneInfo.NO_FIRING)),
+ KV.of(windowB, WindowedValue.of(
+ KV.of(3L, 31L), new Instant(15), windowB, PaneInfo.NO_FIRING)))),
+ KV.of(2,
+ (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(
+ KV.of(windowC, WindowedValue.of(
+ KV.of(4L, 41L), new Instant(25), windowC, PaneInfo.NO_FIRING)))));
+
+ // The order of the output elements is important relative to processing order
+ List<IsmRecord<WindowedValue<TransformedMap<Long,
+ Iterable<WindowedValue<Long>>,
+ Iterable<Long>>>>> output =
+ doFnTester.processBatch(inputElements);
+ assertEquals(3, output.size());
+ Map<Long, Iterable<Long>> outputMap;
+
+ outputMap = output.get(0).getValue().getValue();
+ assertEquals(2, outputMap.size());
+ assertThat(outputMap.get(1L), containsInAnyOrder(11L, 12L));
+ assertThat(outputMap.get(2L), containsInAnyOrder(21L));
+
+ outputMap = output.get(1).getValue().getValue();
+ assertEquals(2, outputMap.size());
+ assertThat(outputMap.get(2L), containsInAnyOrder(21L));
+ assertThat(outputMap.get(3L), containsInAnyOrder(31L));
+
+ outputMap = output.get(2).getValue().getValue();
+ assertEquals(1, outputMap.size());
+ assertThat(outputMap.get(4L), containsInAnyOrder(41L));
+ }
+ }
[2/7] incubator-beam git commit: Merge branch 'master' into
temp-option
Posted by ke...@apache.org.
Merge branch 'master' into temp-option
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/911d2953
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/911d2953
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/911d2953
Branch: refs/heads/master
Commit: 911d29539b8e586bd1452a6ec751155981b0f8f7
Parents: 8bc0659 9247ad7
Author: Pei He <pe...@gmail.com>
Authored: Wed Mar 23 19:56:01 2016 -0700
Committer: Pei He <pe...@gmail.com>
Committed: Wed Mar 23 19:56:01 2016 -0700
----------------------------------------------------------------------
DISCLAIMER | 10 +
NOTICE | 12 +
README.md | 9 +-
examples/pom.xml | 129 +-
.../examples/complete/AutoComplete.java | 10 +-
.../examples/MinimalWordCountJava8.java | 68 --
.../examples/complete/game/GameStats.java | 347 ------
.../examples/complete/game/HourlyTeamScore.java | 193 ---
.../examples/complete/game/LeaderBoard.java | 237 ----
.../dataflow/examples/complete/game/README.md | 119 --
.../examples/complete/game/UserScore.java | 239 ----
.../complete/game/injector/Injector.java | 417 -------
.../complete/game/injector/InjectorUtils.java | 101 --
.../injector/RetryHttpInitializerWrapper.java | 127 --
.../complete/game/utils/WriteToBigQuery.java | 134 ---
.../game/utils/WriteWindowedToBigQuery.java | 76 --
.../examples/MinimalWordCountJava8Test.java | 103 --
.../examples/complete/game/GameStatsTest.java | 99 --
.../complete/game/HourlyTeamScoreTest.java | 121 --
.../examples/complete/game/UserScoreTest.java | 156 ---
java8examples/pom.xml | 278 +++++
.../examples/MinimalWordCountJava8.java | 68 ++
.../examples/complete/game/GameStats.java | 339 ++++++
.../examples/complete/game/HourlyTeamScore.java | 193 +++
.../examples/complete/game/LeaderBoard.java | 237 ++++
.../dataflow/examples/complete/game/README.md | 113 ++
.../examples/complete/game/UserScore.java | 239 ++++
.../complete/game/injector/Injector.java | 415 +++++++
.../complete/game/injector/InjectorUtils.java | 101 ++
.../injector/RetryHttpInitializerWrapper.java | 126 ++
.../complete/game/utils/WriteToBigQuery.java | 134 +++
.../game/utils/WriteWindowedToBigQuery.java | 76 ++
.../examples/MinimalWordCountJava8Test.java | 103 ++
.../examples/complete/game/GameStatsTest.java | 76 ++
.../complete/game/HourlyTeamScoreTest.java | 111 ++
.../examples/complete/game/UserScoreTest.java | 154 +++
java8tests/pom.xml | 183 +++
.../sdk/transforms/CombineJava8Test.java | 133 +++
.../sdk/transforms/FilterJava8Test.java | 118 ++
.../transforms/FlatMapElementsJava8Test.java | 84 ++
.../sdk/transforms/MapElementsJava8Test.java | 77 ++
.../sdk/transforms/PartitionJava8Test.java | 74 ++
.../transforms/RemoveDuplicatesJava8Test.java | 98 ++
.../sdk/transforms/WithKeysJava8Test.java | 73 ++
.../sdk/transforms/WithTimestampsJava8Test.java | 65 ++
maven-archetypes/examples/pom.xml | 2 +-
maven-archetypes/starter/pom.xml | 2 +-
pom.xml | 24 +-
runners/flink/README.md | 101 +-
runners/flink/examples/pom.xml | 89 ++
.../beam/runners/flink/examples/TFIDF.java | 452 +++++++
.../beam/runners/flink/examples/WordCount.java | 113 ++
.../flink/examples/streaming/AutoComplete.java | 387 ++++++
.../flink/examples/streaming/JoinExamples.java | 158 +++
.../KafkaWindowedWordCountExample.java | 142 +++
.../examples/streaming/WindowedWordCount.java | 130 +++
runners/flink/pom.xml | 416 +++----
runners/flink/runner/pom.xml | 147 +++
.../FlinkPipelineExecutionEnvironment.java | 269 +++++
.../runners/flink/FlinkPipelineOptions.java | 93 ++
.../beam/runners/flink/FlinkPipelineRunner.java | 198 ++++
.../runners/flink/FlinkRunnerRegistrar.java | 56 +
.../beam/runners/flink/FlinkRunnerResult.java | 68 ++
.../apache/beam/runners/flink/io/ConsoleIO.java | 82 ++
.../FlinkBatchPipelineTranslator.java | 153 +++
.../FlinkBatchTransformTranslators.java | 594 ++++++++++
.../FlinkBatchTranslationContext.java | 129 ++
.../translation/FlinkPipelineTranslator.java | 36 +
.../FlinkStreamingPipelineTranslator.java | 150 +++
.../FlinkStreamingTransformTranslators.java | 407 +++++++
.../FlinkStreamingTranslationContext.java | 89 ++
.../FlinkCoGroupKeyedListAggregator.java | 60 +
.../functions/FlinkCreateFunction.java | 62 +
.../functions/FlinkDoFnFunction.java | 204 ++++
.../FlinkKeyedListAggregationFunction.java | 77 ++
.../functions/FlinkMultiOutputDoFnFunction.java | 177 +++
.../FlinkMultiOutputPruningFunction.java | 43 +
.../functions/FlinkPartialReduceFunction.java | 60 +
.../functions/FlinkReduceFunction.java | 57 +
.../flink/translation/functions/UnionCoder.java | 150 +++
.../translation/types/CoderComparator.java | 216 ++++
.../translation/types/CoderTypeInformation.java | 116 ++
.../translation/types/CoderTypeSerializer.java | 152 +++
.../types/InspectableByteArrayOutputStream.java | 34 +
.../translation/types/KvCoderComperator.java | 264 +++++
.../types/KvCoderTypeInformation.java | 186 +++
.../types/VoidCoderTypeSerializer.java | 112 ++
.../wrappers/CombineFnAggregatorWrapper.java | 92 ++
.../wrappers/DataInputViewWrapper.java | 59 +
.../wrappers/DataOutputViewWrapper.java | 52 +
.../SerializableFnAggregatorWrapper.java | 91 ++
.../translation/wrappers/SinkOutputFormat.java | 121 ++
.../translation/wrappers/SourceInputFormat.java | 164 +++
.../translation/wrappers/SourceInputSplit.java | 52 +
.../streaming/FlinkAbstractParDoWrapper.java | 266 +++++
.../FlinkGroupAlsoByWindowWrapper.java | 640 ++++++++++
.../streaming/FlinkGroupByKeyWrapper.java | 66 ++
.../streaming/FlinkParDoBoundMultiWrapper.java | 77 ++
.../streaming/FlinkParDoBoundWrapper.java | 100 ++
.../io/FlinkStreamingCreateFunction.java | 65 ++
.../streaming/io/UnboundedFlinkSource.java | 82 ++
.../streaming/io/UnboundedSocketSource.java | 233 ++++
.../streaming/io/UnboundedSourceWrapper.java | 171 +++
.../state/AbstractFlinkTimerInternals.java | 128 ++
.../streaming/state/FlinkStateInternals.java | 715 ++++++++++++
.../streaming/state/StateCheckpointReader.java | 91 ++
.../streaming/state/StateCheckpointUtils.java | 155 +++
.../streaming/state/StateCheckpointWriter.java | 129 ++
.../wrappers/streaming/state/StateType.java | 73 ++
.../runner/src/main/resources/log4j.properties | 23 +
.../apache/beam/runners/flink/AvroITCase.java | 127 ++
.../beam/runners/flink/FlattenizeITCase.java | 74 ++
.../runners/flink/FlinkRunnerRegistrarTest.java | 48 +
.../beam/runners/flink/FlinkTestPipeline.java | 72 ++
.../beam/runners/flink/JoinExamplesITCase.java | 101 ++
.../runners/flink/MaybeEmptyTestITCase.java | 65 ++
.../runners/flink/ParDoMultiOutputITCase.java | 100 ++
.../beam/runners/flink/ReadSourceITCase.java | 165 +++
.../flink/RemoveDuplicatesEmptyITCase.java | 70 ++
.../runners/flink/RemoveDuplicatesITCase.java | 71 ++
.../beam/runners/flink/SideInputITCase.java | 69 ++
.../apache/beam/runners/flink/TfIdfITCase.java | 78 ++
.../beam/runners/flink/WordCountITCase.java | 75 ++
.../runners/flink/WordCountJoin2ITCase.java | 138 +++
.../runners/flink/WordCountJoin3ITCase.java | 156 +++
.../beam/runners/flink/WriteSinkITCase.java | 158 +++
.../flink/streaming/GroupAlsoByWindowTest.java | 508 ++++++++
.../flink/streaming/GroupByNullKeyTest.java | 123 ++
.../flink/streaming/StateSerializationTest.java | 305 +++++
.../streaming/TopWikipediaSessionsITCase.java | 134 +++
.../flink/streaming/UnboundedSourceITCase.java | 210 ++++
.../beam/runners/flink/util/JoinExamples.java | 160 +++
.../src/test/resources/log4j-test.properties | 27 +
.../FlinkPipelineExecutionEnvironment.java | 269 -----
.../runners/flink/FlinkPipelineOptions.java | 93 --
.../beam/runners/flink/FlinkPipelineRunner.java | 206 ----
.../beam/runners/flink/FlinkRunnerResult.java | 68 --
.../beam/runners/flink/examples/TFIDF.java | 452 -------
.../beam/runners/flink/examples/WordCount.java | 113 --
.../flink/examples/streaming/AutoComplete.java | 387 ------
.../flink/examples/streaming/JoinExamples.java | 158 ---
.../KafkaWindowedWordCountExample.java | 143 ---
.../examples/streaming/WindowedWordCount.java | 130 ---
.../apache/beam/runners/flink/io/ConsoleIO.java | 82 --
.../FlinkBatchPipelineTranslator.java | 153 ---
.../FlinkBatchTransformTranslators.java | 594 ----------
.../FlinkBatchTranslationContext.java | 129 --
.../translation/FlinkPipelineTranslator.java | 36 -
.../FlinkStreamingPipelineTranslator.java | 150 ---
.../FlinkStreamingTransformTranslators.java | 406 -------
.../FlinkStreamingTranslationContext.java | 89 --
.../FlinkCoGroupKeyedListAggregator.java | 60 -
.../functions/FlinkCreateFunction.java | 62 -
.../functions/FlinkDoFnFunction.java | 204 ----
.../FlinkKeyedListAggregationFunction.java | 77 --
.../functions/FlinkMultiOutputDoFnFunction.java | 177 ---
.../FlinkMultiOutputPruningFunction.java | 43 -
.../functions/FlinkPartialReduceFunction.java | 60 -
.../functions/FlinkReduceFunction.java | 57 -
.../flink/translation/functions/UnionCoder.java | 150 ---
.../translation/types/CoderComparator.java | 216 ----
.../translation/types/CoderTypeInformation.java | 116 --
.../translation/types/CoderTypeSerializer.java | 152 ---
.../types/InspectableByteArrayOutputStream.java | 34 -
.../translation/types/KvCoderComperator.java | 264 -----
.../types/KvCoderTypeInformation.java | 186 ---
.../types/VoidCoderTypeSerializer.java | 112 --
.../wrappers/CombineFnAggregatorWrapper.java | 92 --
.../wrappers/DataInputViewWrapper.java | 59 -
.../wrappers/DataOutputViewWrapper.java | 52 -
.../SerializableFnAggregatorWrapper.java | 91 --
.../translation/wrappers/SinkOutputFormat.java | 121 --
.../translation/wrappers/SourceInputFormat.java | 164 ---
.../translation/wrappers/SourceInputSplit.java | 52 -
.../streaming/FlinkAbstractParDoWrapper.java | 266 -----
.../FlinkGroupAlsoByWindowWrapper.java | 640 ----------
.../streaming/FlinkGroupByKeyWrapper.java | 66 --
.../streaming/FlinkParDoBoundMultiWrapper.java | 77 --
.../streaming/FlinkParDoBoundWrapper.java | 100 --
.../io/FlinkStreamingCreateFunction.java | 65 --
.../streaming/io/UnboundedFlinkSource.java | 82 --
.../streaming/io/UnboundedSocketSource.java | 233 ----
.../streaming/io/UnboundedSourceWrapper.java | 134 ---
.../state/AbstractFlinkTimerInternals.java | 128 --
.../streaming/state/FlinkStateInternals.java | 715 ------------
.../streaming/state/StateCheckpointReader.java | 91 --
.../streaming/state/StateCheckpointUtils.java | 155 ---
.../streaming/state/StateCheckpointWriter.java | 129 --
.../wrappers/streaming/state/StateType.java | 73 --
.../flink/src/main/resources/log4j.properties | 23 -
.../apache/beam/runners/flink/AvroITCase.java | 127 --
.../beam/runners/flink/FlattenizeITCase.java | 74 --
.../beam/runners/flink/FlinkTestPipeline.java | 72 --
.../beam/runners/flink/JoinExamplesITCase.java | 101 --
.../runners/flink/MaybeEmptyTestITCase.java | 65 --
.../runners/flink/ParDoMultiOutputITCase.java | 100 --
.../beam/runners/flink/ReadSourceITCase.java | 165 ---
.../flink/RemoveDuplicatesEmptyITCase.java | 70 --
.../runners/flink/RemoveDuplicatesITCase.java | 71 --
.../beam/runners/flink/SideInputITCase.java | 69 --
.../apache/beam/runners/flink/TfIdfITCase.java | 78 --
.../beam/runners/flink/WordCountITCase.java | 76 --
.../runners/flink/WordCountJoin2ITCase.java | 138 ---
.../runners/flink/WordCountJoin3ITCase.java | 156 ---
.../beam/runners/flink/WriteSinkITCase.java | 158 ---
.../flink/streaming/GroupAlsoByWindowTest.java | 508 --------
.../flink/streaming/GroupByNullKeyTest.java | 123 --
.../flink/streaming/StateSerializationTest.java | 305 -----
.../streaming/TopWikipediaSessionsITCase.java | 134 ---
.../beam/runners/flink/util/JoinExamples.java | 160 ---
.../src/test/resources/log4j-test.properties | 27 -
runners/pom.xml | 63 +-
runners/spark/.gitignore | 10 -
runners/spark/.travis.yml | 22 -
runners/spark/README.md | 112 +-
runners/spark/build-resources/checkstyle.xml | 27 +-
runners/spark/build-resources/header-file.txt | 23 +-
runners/spark/pom.xml | 246 ++--
.../com/cloudera/dataflow/hadoop/HadoopIO.java | 202 ----
.../dataflow/hadoop/NullWritableCoder.java | 71 --
.../cloudera/dataflow/hadoop/WritableCoder.java | 120 --
.../com/cloudera/dataflow/io/ConsoleIO.java | 60 -
.../com/cloudera/dataflow/io/CreateStream.java | 66 --
.../java/com/cloudera/dataflow/io/KafkaIO.java | 128 --
.../dataflow/spark/BroadcastHelper.java | 121 --
.../com/cloudera/dataflow/spark/ByteArray.java | 52 -
.../cloudera/dataflow/spark/CoderHelpers.java | 185 ---
.../cloudera/dataflow/spark/DoFnFunction.java | 93 --
.../dataflow/spark/EvaluationContext.java | 283 -----
.../dataflow/spark/EvaluationResult.java | 62 -
.../dataflow/spark/MultiDoFnFunction.java | 115 --
.../dataflow/spark/ShardNameBuilder.java | 106 --
.../dataflow/spark/ShardNameTemplateAware.java | 28 -
.../dataflow/spark/ShardNameTemplateHelper.java | 58 -
.../dataflow/spark/SparkContextFactory.java | 66 --
.../dataflow/spark/SparkPipelineEvaluator.java | 52 -
.../dataflow/spark/SparkPipelineOptions.java | 39 -
.../spark/SparkPipelineOptionsFactory.java | 27 -
.../spark/SparkPipelineOptionsRegistrar.java | 27 -
.../dataflow/spark/SparkPipelineRunner.java | 252 ----
.../spark/SparkPipelineRunnerRegistrar.java | 27 -
.../dataflow/spark/SparkPipelineTranslator.java | 27 -
.../dataflow/spark/SparkProcessContext.java | 250 ----
.../dataflow/spark/SparkRuntimeContext.java | 212 ----
.../spark/TemplatedAvroKeyOutputFormat.java | 40 -
.../TemplatedSequenceFileOutputFormat.java | 40 -
.../spark/TemplatedTextOutputFormat.java | 40 -
.../dataflow/spark/TransformEvaluator.java | 24 -
.../dataflow/spark/TransformTranslator.java | 800 -------------
.../dataflow/spark/WindowingHelpers.java | 59 -
.../spark/aggregators/AggAccumParam.java | 35 -
.../spark/aggregators/NamedAggregators.java | 202 ----
.../SparkStreamingPipelineOptions.java | 40 -
.../SparkStreamingPipelineOptionsFactory.java | 27 -
.../SparkStreamingPipelineOptionsRegistrar.java | 28 -
.../streaming/StreamingEvaluationContext.java | 226 ----
.../streaming/StreamingTransformTranslator.java | 414 -------
.../StreamingWindowPipelineDetector.java | 100 --
.../beam/runners/spark/EvaluationResult.java | 65 ++
.../runners/spark/SparkPipelineOptions.java | 42 +
.../beam/runners/spark/SparkPipelineRunner.java | 255 ++++
.../spark/SparkStreamingPipelineOptions.java | 41 +
.../spark/aggregators/AggAccumParam.java | 38 +
.../spark/aggregators/NamedAggregators.java | 205 ++++
.../beam/runners/spark/coders/CoderHelpers.java | 189 +++
.../runners/spark/coders/NullWritableCoder.java | 74 ++
.../runners/spark/coders/WritableCoder.java | 123 ++
.../apache/beam/runners/spark/io/ConsoleIO.java | 63 +
.../beam/runners/spark/io/CreateStream.java | 69 ++
.../apache/beam/runners/spark/io/KafkaIO.java | 131 +++
.../beam/runners/spark/io/hadoop/HadoopIO.java | 203 ++++
.../spark/io/hadoop/ShardNameBuilder.java | 109 ++
.../spark/io/hadoop/ShardNameTemplateAware.java | 31 +
.../io/hadoop/ShardNameTemplateHelper.java | 61 +
.../io/hadoop/TemplatedAvroKeyOutputFormat.java | 43 +
.../TemplatedSequenceFileOutputFormat.java | 43 +
.../io/hadoop/TemplatedTextOutputFormat.java | 43 +
.../runners/spark/translation/DoFnFunction.java | 97 ++
.../spark/translation/EvaluationContext.java | 288 +++++
.../spark/translation/MultiDoFnFunction.java | 119 ++
.../spark/translation/SparkContextFactory.java | 69 ++
.../translation/SparkPipelineEvaluator.java | 56 +
.../SparkPipelineOptionsFactory.java | 31 +
.../SparkPipelineOptionsRegistrar.java | 31 +
.../SparkPipelineRunnerRegistrar.java | 31 +
.../translation/SparkPipelineTranslator.java | 30 +
.../spark/translation/SparkProcessContext.java | 262 +++++
.../spark/translation/SparkRuntimeContext.java | 217 ++++
.../spark/translation/TransformEvaluator.java | 27 +
.../spark/translation/TransformTranslator.java | 808 +++++++++++++
.../spark/translation/WindowingHelpers.java | 62 +
.../SparkStreamingPipelineOptionsFactory.java | 31 +
.../SparkStreamingPipelineOptionsRegistrar.java | 32 +
.../streaming/StreamingEvaluationContext.java | 229 ++++
.../streaming/StreamingTransformTranslator.java | 418 +++++++
.../StreamingWindowPipelineDetector.java | 104 ++
.../runners/spark/util/BroadcastHelper.java | 125 ++
.../beam/runners/spark/util/ByteArray.java | 55 +
...ataflow.sdk.options.PipelineOptionsRegistrar | 4 +-
...dataflow.sdk.runners.PipelineRunnerRegistrar | 2 +-
.../dataflow/hadoop/WritableCoderTest.java | 42 -
.../dataflow/spark/AvroPipelineTest.java | 103 --
.../dataflow/spark/CombineGloballyTest.java | 87 --
.../dataflow/spark/CombinePerKeyTest.java | 69 --
.../com/cloudera/dataflow/spark/DeDupTest.java | 55 -
.../cloudera/dataflow/spark/DoFnOutputTest.java | 57 -
.../cloudera/dataflow/spark/EmptyInputTest.java | 64 -
.../spark/HadoopFileFormatPipelineTest.java | 105 --
.../spark/MultiOutputWordCountTest.java | 148 ---
.../cloudera/dataflow/spark/NumShardsTest.java | 89 --
.../dataflow/spark/SerializationTest.java | 183 ---
.../dataflow/spark/ShardNameBuilderTest.java | 82 --
.../dataflow/spark/SideEffectsTest.java | 77 --
.../dataflow/spark/SimpleWordCountTest.java | 117 --
.../spark/TestSparkPipelineOptionsFactory.java | 34 -
.../com/cloudera/dataflow/spark/TfIdfTest.java | 60 -
.../dataflow/spark/TransformTranslatorTest.java | 95 --
.../dataflow/spark/WindowedWordCountTest.java | 63 -
.../spark/streaming/FlattenStreamingTest.java | 84 --
.../spark/streaming/KafkaStreamingTest.java | 133 ---
.../streaming/SimpleStreamingWordCountTest.java | 73 --
.../utils/DataflowAssertStreaming.java | 39 -
.../streaming/utils/EmbeddedKafkaCluster.java | 314 -----
.../apache/beam/runners/spark/DeDupTest.java | 60 +
.../beam/runners/spark/EmptyInputTest.java | 69 ++
.../beam/runners/spark/SimpleWordCountTest.java | 115 ++
.../apache/beam/runners/spark/TfIdfTest.java | 64 +
.../runners/spark/coders/WritableCoderTest.java | 45 +
.../beam/runners/spark/io/AvroPipelineTest.java | 108 ++
.../beam/runners/spark/io/NumShardsTest.java | 96 ++
.../io/hadoop/HadoopFileFormatPipelineTest.java | 113 ++
.../spark/io/hadoop/ShardNameBuilderTest.java | 85 ++
.../spark/translation/CombineGloballyTest.java | 94 ++
.../spark/translation/CombinePerKeyTest.java | 70 ++
.../spark/translation/DoFnOutputTest.java | 64 +
.../translation/MultiOutputWordCountTest.java | 137 +++
.../spark/translation/SerializationTest.java | 183 +++
.../spark/translation/SideEffectsTest.java | 81 ++
.../TestSparkPipelineOptionsFactory.java | 38 +
.../translation/TransformTranslatorTest.java | 99 ++
.../translation/WindowedWordCountTest.java | 71 ++
.../streaming/FlattenStreamingTest.java | 88 ++
.../streaming/KafkaStreamingTest.java | 140 +++
.../streaming/SimpleStreamingWordCountTest.java | 77 ++
.../utils/DataflowAssertStreaming.java | 42 +
.../streaming/utils/EmbeddedKafkaCluster.java | 317 +++++
sdk/pom.xml | 82 +-
.../sdk/coders/protobuf/package-info.java | 23 +
.../dataflow/sdk/io/bigtable/BigtableIO.java | 4 +-
.../dataflow/sdk/io/bigtable/package-info.java | 22 +
.../dataflow/sdk/options/PipelineOptions.java | 3 +-
.../sdk/options/PipelineOptionsFactory.java | 72 +-
.../sdk/runners/DataflowPipelineRunner.java | 25 +-
.../sdk/runners/DataflowPipelineTranslator.java | 15 +
.../inprocess/BoundedReadEvaluatorFactory.java | 50 +-
.../CachedThreadPoolExecutorServiceFactory.java | 42 +
.../runners/inprocess/CompletionCallback.java | 33 +
.../ConsumerTrackingPipelineVisitor.java | 173 +++
.../sdk/runners/inprocess/EvaluatorKey.java | 1 -
.../inprocess/ExecutorServiceFactory.java | 32 +
.../ExecutorServiceParallelExecutor.java | 432 +++++++
.../inprocess/FlattenEvaluatorFactory.java | 7 +-
.../inprocess/GroupByKeyEvaluatorFactory.java | 10 +-
.../inprocess/InMemoryWatermarkManager.java | 30 +-
.../sdk/runners/inprocess/InProcessBundle.java | 20 +-
.../inprocess/InProcessEvaluationContext.java | 405 +++++++
.../runners/inprocess/InProcessExecutor.java | 46 +
.../inprocess/InProcessPipelineOptions.java | 68 +-
.../inprocess/InProcessPipelineRunner.java | 319 +++--
.../inprocess/InProcessSideInputContainer.java | 71 +-
.../inprocess/KeyedPValueTrackingVisitor.java | 95 ++
.../inprocess/ParDoMultiEvaluatorFactory.java | 6 +-
.../inprocess/ParDoSingleEvaluatorFactory.java | 6 +-
.../sdk/runners/inprocess/StepAndKey.java | 68 ++
.../inprocess/TransformEvaluatorFactory.java | 1 -
.../inprocess/TransformEvaluatorRegistry.java | 72 ++
.../runners/inprocess/TransformExecutor.java | 114 ++
.../inprocess/TransformExecutorService.java | 34 +
.../inprocess/TransformExecutorServices.java | 153 +++
.../UnboundedReadEvaluatorFactory.java | 54 +-
.../runners/inprocess/ViewEvaluatorFactory.java | 8 +-
.../inprocess/WatermarkCallbackExecutor.java | 143 +++
.../cloud/dataflow/sdk/transforms/Combine.java | 18 +-
.../dataflow/sdk/transforms/CombineFns.java | 1100 ++++++++++++++++++
.../cloud/dataflow/sdk/transforms/DoFn.java | 13 +-
.../dataflow/sdk/transforms/DoFnReflector.java | 7 +-
.../dataflow/sdk/transforms/PTransform.java | 14 +-
.../cloud/dataflow/sdk/transforms/ParDo.java | 13 +
.../sdk/transforms/display/DisplayData.java | 530 +++++++++
.../sdk/transforms/display/HasDisplayData.java | 53 +
.../transforms/windowing/AfterWatermark.java | 4 +-
.../cloud/dataflow/sdk/util/DoFnRunners.java | 4 +-
.../cloud/dataflow/sdk/util/PropertyNames.java | 2 +
.../sdk/options/PipelineOptionsFactoryTest.java | 75 +-
.../runners/DataflowPipelineTranslatorTest.java | 98 +-
.../BoundedReadEvaluatorFactoryTest.java | 138 ++-
.../ConsumerTrackingPipelineVisitorTest.java | 233 ++++
.../inprocess/FlattenEvaluatorFactoryTest.java | 1 -
.../GroupByKeyEvaluatorFactoryTest.java | 1 -
.../inprocess/InMemoryWatermarkManagerTest.java | 12 +
.../InProcessEvaluationContextTest.java | 544 +++++++++
.../inprocess/InProcessPipelineRunnerTest.java | 77 ++
.../InProcessSideInputContainerTest.java | 92 +-
.../KeyedPValueTrackingVisitorTest.java | 189 +++
.../ParDoMultiEvaluatorFactoryTest.java | 1 -
.../ParDoSingleEvaluatorFactoryTest.java | 1 -
.../TransformExecutorServicesTest.java | 134 +++
.../inprocess/TransformExecutorTest.java | 312 +++++
.../UnboundedReadEvaluatorFactoryTest.java | 169 ++-
.../inprocess/ViewEvaluatorFactoryTest.java | 1 -
.../WatermarkCallbackExecutorTest.java | 126 ++
.../dataflow/sdk/transforms/CombineFnsTest.java | 413 +++++++
.../cloud/dataflow/sdk/transforms/DoFnTest.java | 15 +
.../dataflow/sdk/transforms/PTransformTest.java | 41 +
.../dataflow/sdk/transforms/ParDoTest.java | 23 +
.../transforms/display/DisplayDataMatchers.java | 98 ++
.../display/DisplayDataMatchersTest.java | 81 ++
.../sdk/transforms/display/DisplayDataTest.java | 633 ++++++++++
.../cloud/dataflow/sdk/util/ApiSurfaceTest.java | 3 +-
.../PipelineOptionsFactoryJava8Test.java | 90 ++
.../sdk/transforms/CombineJava8Test.java | 133 ---
.../sdk/transforms/FilterJava8Test.java | 118 --
.../transforms/FlatMapElementsJava8Test.java | 84 --
.../sdk/transforms/MapElementsJava8Test.java | 77 --
.../sdk/transforms/PartitionJava8Test.java | 74 --
.../transforms/RemoveDuplicatesJava8Test.java | 99 --
.../sdk/transforms/WithKeysJava8Test.java | 74 --
.../sdk/transforms/WithTimestampsJava8Test.java | 66 --
428 files changed, 31664 insertions(+), 23375 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/911d2953/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
----------------------------------------------------------------------
diff --cc runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
index 0000000,e115a15..b413d7a
mode 000000,100644..100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupAlsoByWindowWrapper.java
@@@ -1,0 -1,631 +1,640 @@@
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+ import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+ import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
+ import org.apache.beam.runners.flink.translation.wrappers.streaming.state.*;
+ import com.google.cloud.dataflow.sdk.coders.*;
+ import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+ import com.google.cloud.dataflow.sdk.runners.PipelineRunner;
+ import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+ import com.google.cloud.dataflow.sdk.transforms.Combine;
+ import com.google.cloud.dataflow.sdk.transforms.DoFn;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.OutputTimeFn;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
+ import com.google.cloud.dataflow.sdk.util.*;
+ import com.google.cloud.dataflow.sdk.values.*;
+ import com.google.common.base.Preconditions;
+ import com.google.common.collect.HashMultimap;
+ import com.google.common.collect.Multimap;
+ import org.apache.flink.api.common.accumulators.Accumulator;
+ import org.apache.flink.api.common.accumulators.AccumulatorHelper;
+ import org.apache.flink.core.memory.DataInputView;
+ import org.apache.flink.runtime.state.AbstractStateBackend;
+ import org.apache.flink.runtime.state.StateHandle;
+ import org.apache.flink.streaming.api.datastream.DataStream;
+ import org.apache.flink.streaming.api.datastream.KeyedStream;
+ import org.apache.flink.streaming.api.operators.*;
+ import org.apache.flink.streaming.api.watermark.Watermark;
+ import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+ import org.apache.flink.streaming.runtime.tasks.StreamTaskState;
+ import org.joda.time.Instant;
+
+ import java.io.IOException;
+ import java.util.*;
+
+ /**
+ * This class is the key class implementing all the windowing/triggering logic of Apache Beam.
+ * To provide full compatibility and support for all the windowing/triggering combinations offered by
+ * Beam, we opted for a strategy that uses the SDK's code for doing these operations. See the code in
+ * ({@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn}.
+ * <p/>
+ * In a nutshell, when the execution arrives to this operator, we expect to have a stream <b>already
+ * grouped by key</b>. Each of the elements that enter here, registers a timer
+ * (see {@link TimerInternals#setTimer(TimerInternals.TimerData)} in the
+ * {@link FlinkGroupAlsoByWindowWrapper#activeTimers}.
+ * This is essentially a timestamp indicating when to trigger the computation over the window this
+ * element belongs to.
+ * <p/>
+ * When a watermark arrives, all the registered timers are checked to see which ones are ready to
+ * fire (see {@link FlinkGroupAlsoByWindowWrapper#processWatermark(Watermark)}). These are deregistered from
+ * the {@link FlinkGroupAlsoByWindowWrapper#activeTimers}
+ * list, and are fed into the {@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn}
+ * for furhter processing.
+ */
+ public class FlinkGroupAlsoByWindowWrapper<K, VIN, VACC, VOUT>
+ extends AbstractStreamOperator<WindowedValue<KV<K, VOUT>>>
+ implements OneInputStreamOperator<WindowedValue<KV<K, VIN>>, WindowedValue<KV<K, VOUT>>> {
+
+ private static final long serialVersionUID = 1L;
+
+ private transient PipelineOptions options;
+
+ private transient CoderRegistry coderRegistry;
+
+ private DoFn<KeyedWorkItem<K, VIN>, KV<K, VOUT>> operator;
+
+ private ProcessContext context;
+
+ private final WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy;
+
+ private final Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combineFn;
+
+ private final KvCoder<K, VIN> inputKvCoder;
+
+ /**
+ * State is kept <b>per-key</b>. This data structure keeps this mapping between an active key, i.e. a
+ * key whose elements are currently waiting to be processed, and its associated state.
+ */
+ private Map<K, FlinkStateInternals<K>> perKeyStateInternals = new HashMap<>();
+
+ /**
+ * Timers waiting to be processed.
+ */
+ private Map<K, Set<TimerInternals.TimerData>> activeTimers = new HashMap<>();
+
+ private FlinkTimerInternals timerInternals = new FlinkTimerInternals();
+
+ /**
+ * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
+ * This method assumes that <b>elements are already grouped by key</b>.
+ * <p/>
+ * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)}
+ * is that this method assumes that a combiner function is provided
+ * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
+ * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state.
+ *
+ * @param options the general job configuration options.
+ * @param input the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
+ * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
+ * @param combiner the combiner to be used.
+ * @param outputKvCoder the type of the output values.
+ */
+ public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create(
+ PipelineOptions options,
+ PCollection input,
+ KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey,
+ Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner,
+ KvCoder<K, VOUT> outputKvCoder) {
+ Preconditions.checkNotNull(options);
+
+ KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
+ FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options,
+ input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner);
+
+ Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
+ outputKvCoder,
+ input.getWindowingStrategy().getWindowFn().windowCoder());
+
+ CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo =
+ new CoderTypeInformation<>(windowedOutputElemCoder);
+
+ DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey
+ .transform("GroupByWindowWithCombiner",
+ new CoderTypeInformation<>(outputKvCoder),
+ windower)
+ .returns(outputTypeInfo);
+
+ return groupedByKeyAndWindow;
+ }
+
+ /**
+ * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
+ * This method assumes that <b>elements are already grouped by key</b>.
+ * <p/>
+ * The difference with {@link #create(PipelineOptions, PCollection, KeyedStream, Combine.KeyedCombineFn, KvCoder)}
+ * is that this method assumes no combiner function
+ * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
+ *
+ * @param options the general job configuration options.
+ * @param input the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
+ * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
+ */
+ public static <K, VIN> DataStream<WindowedValue<KV<K, Iterable<VIN>>>> createForIterable(
+ PipelineOptions options,
+ PCollection input,
+ KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey) {
+ Preconditions.checkNotNull(options);
+
+ KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
+ Coder<K> keyCoder = inputKvCoder.getKeyCoder();
+ Coder<VIN> inputValueCoder = inputKvCoder.getValueCoder();
+
+ FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper(options,
+ input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, null);
+
+ Coder<Iterable<VIN>> valueIterCoder = IterableCoder.of(inputValueCoder);
+ KvCoder<K, Iterable<VIN>> outputElemCoder = KvCoder.of(keyCoder, valueIterCoder);
+
+ Coder<WindowedValue<KV<K, Iterable<VIN>>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
+ outputElemCoder,
+ input.getWindowingStrategy().getWindowFn().windowCoder());
+
+ CoderTypeInformation<WindowedValue<KV<K, Iterable<VIN>>>> outputTypeInfo =
+ new CoderTypeInformation<>(windowedOutputElemCoder);
+
+ DataStream<WindowedValue<KV<K, Iterable<VIN>>>> groupedByKeyAndWindow = groupedStreamByKey
+ .transform("GroupByWindow",
+ new CoderTypeInformation<>(windowedOutputElemCoder),
+ windower)
+ .returns(outputTypeInfo);
+
+ return groupedByKeyAndWindow;
+ }
+
+ public static <K, VIN, VACC, VOUT> FlinkGroupAlsoByWindowWrapper
+ createForTesting(PipelineOptions options,
+ CoderRegistry registry,
+ WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy,
+ KvCoder<K, VIN> inputCoder,
+ Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) {
+ Preconditions.checkNotNull(options);
+
+ return new FlinkGroupAlsoByWindowWrapper(options, registry, windowingStrategy, inputCoder, combiner);
+ }
+
+ private FlinkGroupAlsoByWindowWrapper(PipelineOptions options,
+ CoderRegistry registry,
+ WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy,
+ KvCoder<K, VIN> inputCoder,
+ Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) {
+ Preconditions.checkNotNull(options);
+
+ this.options = Preconditions.checkNotNull(options);
+ this.coderRegistry = Preconditions.checkNotNull(registry);
+ this.inputKvCoder = Preconditions.checkNotNull(inputCoder);//(KvCoder<K, VIN>) input.getCoder();
+ this.windowingStrategy = Preconditions.checkNotNull(windowingStrategy);//input.getWindowingStrategy();
+ this.combineFn = combiner;
+ this.operator = createGroupAlsoByWindowOperator();
+ this.chainingStrategy = ChainingStrategy.ALWAYS;
+ }
+
+ @Override
+ public void open() throws Exception {
+ super.open();
+ this.context = new ProcessContext(operator, new TimestampedCollector<>(output), this.timerInternals);
+ }
+
+ /**
+ * Create the adequate {@link com.google.cloud.dataflow.sdk.util.GroupAlsoByWindowsDoFn},
+ * <b> if not already created</b>.
+ * If a {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn} was provided, then
+ * a function with that combiner is created, so that elements are combined as they arrive. This is
+ * done for speed and (in most of the cases) for reduction of the per-window state.
+ */
+ private <W extends BoundedWindow> DoFn<KeyedWorkItem<K, VIN>, KV<K, VOUT>> createGroupAlsoByWindowOperator() {
+ if (this.operator == null) {
+ if (this.combineFn == null) {
+ // Thus VOUT == Iterable<VIN>
+ Coder<VIN> inputValueCoder = inputKvCoder.getValueCoder();
+
+ this.operator = (DoFn) GroupAlsoByWindowViaWindowSetDoFn.create(
+ (WindowingStrategy<?, W>) this.windowingStrategy, SystemReduceFn.<K, VIN, W>buffering(inputValueCoder));
+ } else {
+ Coder<K> inputKeyCoder = inputKvCoder.getKeyCoder();
+
+ AppliedCombineFn<K, VIN, VACC, VOUT> appliedCombineFn = AppliedCombineFn
+ .withInputCoder(combineFn, coderRegistry, inputKvCoder);
+
+ this.operator = GroupAlsoByWindowViaWindowSetDoFn.create(
+ (WindowingStrategy<?, W>) this.windowingStrategy, SystemReduceFn.<K, VIN, VACC, VOUT, W>combining(inputKeyCoder, appliedCombineFn));
+ }
+ }
+ return this.operator;
+ }
+
+ private void processKeyedWorkItem(KeyedWorkItem<K, VIN> workItem) throws Exception {
+ context.setElement(workItem, getStateInternalsForKey(workItem.key()));
+
+ // TODO: Ideally startBundle/finishBundle would be called when the operator is first used / about to be discarded.
+ operator.startBundle(context);
+ operator.processElement(context);
+ operator.finishBundle(context);
+ }
+
+ @Override
+ public void processElement(StreamRecord<WindowedValue<KV<K, VIN>>> element) throws Exception {
+ ArrayList<WindowedValue<VIN>> elements = new ArrayList<>();
+ elements.add(WindowedValue.of(element.getValue().getValue().getValue(), element.getValue().getTimestamp(),
+ element.getValue().getWindows(), element.getValue().getPane()));
+ processKeyedWorkItem(KeyedWorkItems.elementsWorkItem(element.getValue().getValue().getKey(), elements));
+ }
+
+ @Override
+ public void processWatermark(Watermark mark) throws Exception {
+ context.setCurrentInputWatermark(new Instant(mark.getTimestamp()));
+
+ Multimap<K, TimerInternals.TimerData> timers = getTimersReadyToProcess(mark.getTimestamp());
+ if (!timers.isEmpty()) {
+ for (K key : timers.keySet()) {
+ processKeyedWorkItem(KeyedWorkItems.<K, VIN>timersWorkItem(key, timers.get(key)));
+ }
+ }
+
+ /**
+ * This is to take into account the different semantics of the Watermark in Flink and
+ * in Dataflow. To understand the reasoning behind the Dataflow semantics and its
+ * watermark holding logic, see the documentation of
+ * {@link WatermarkHold#addHold(ReduceFn.ProcessValueContext, boolean)}
+ * */
+ long millis = Long.MAX_VALUE;
+ for (FlinkStateInternals state : perKeyStateInternals.values()) {
+ Instant watermarkHold = state.getWatermarkHold();
+ if (watermarkHold != null && watermarkHold.getMillis() < millis) {
+ millis = watermarkHold.getMillis();
+ }
+ }
+
+ if (mark.getTimestamp() < millis) {
+ millis = mark.getTimestamp();
+ }
+
+ context.setCurrentOutputWatermark(new Instant(millis));
+
+ // Don't forget to re-emit the watermark for further operators down the line.
+ // This is critical for jobs with multiple aggregation steps.
+ // Imagine a job with a groupByKey() on key K1, followed by a map() that changes
+ // the key K1 to K2, and another groupByKey() on K2. In this case, if the watermark
+ // is not re-emitted, the second aggregation would never be triggered, and no result
+ // will be produced.
+ output.emitWatermark(new Watermark(millis));
+ }
+
+ @Override
+ public void close() throws Exception {
+ super.close();
+ }
+
+ private void registerActiveTimer(K key, TimerInternals.TimerData timer) {
+ Set<TimerInternals.TimerData> timersForKey = activeTimers.get(key);
+ if (timersForKey == null) {
+ timersForKey = new HashSet<>();
+ }
+ timersForKey.add(timer);
+ activeTimers.put(key, timersForKey);
+ }
+
+ private void unregisterActiveTimer(K key, TimerInternals.TimerData timer) {
+ Set<TimerInternals.TimerData> timersForKey = activeTimers.get(key);
+ if (timersForKey != null) {
+ timersForKey.remove(timer);
+ if (timersForKey.isEmpty()) {
+ activeTimers.remove(key);
+ } else {
+ activeTimers.put(key, timersForKey);
+ }
+ }
+ }
+
+ /**
+ * Returns the list of timers that are ready to fire. These are the timers
+ * that are registered to be triggered at a time before the current watermark.
+ * We keep these timers in a Set, so that they are deduplicated, as the same
+ * timer can be registered multiple times.
+ */
+ private Multimap<K, TimerInternals.TimerData> getTimersReadyToProcess(long currentWatermark) {
+
+ // we keep the timers to return in a different list and launch them later
+ // because we cannot prevent a trigger from registering another trigger,
+ // which would lead to concurrent modification exception.
+ Multimap<K, TimerInternals.TimerData> toFire = HashMultimap.create();
+
+ Iterator<Map.Entry<K, Set<TimerInternals.TimerData>>> it = activeTimers.entrySet().iterator();
+ while (it.hasNext()) {
+ Map.Entry<K, Set<TimerInternals.TimerData>> keyWithTimers = it.next();
+
+ Iterator<TimerInternals.TimerData> timerIt = keyWithTimers.getValue().iterator();
+ while (timerIt.hasNext()) {
+ TimerInternals.TimerData timerData = timerIt.next();
+ if (timerData.getTimestamp().isBefore(currentWatermark)) {
+ toFire.put(keyWithTimers.getKey(), timerData);
+ timerIt.remove();
+ }
+ }
+
+ if (keyWithTimers.getValue().isEmpty()) {
+ it.remove();
+ }
+ }
+ return toFire;
+ }
+
+ /**
+ * Gets the state associated with the specified key.
+ *
+ * @param key the key whose state we want.
+ * @return The {@link FlinkStateInternals}
+ * associated with that key.
+ */
+ private FlinkStateInternals<K> getStateInternalsForKey(K key) {
+ FlinkStateInternals<K> stateInternals = perKeyStateInternals.get(key);
+ if (stateInternals == null) {
+ Coder<? extends BoundedWindow> windowCoder = this.windowingStrategy.getWindowFn().windowCoder();
+ OutputTimeFn<? super BoundedWindow> outputTimeFn = this.windowingStrategy.getWindowFn().getOutputTimeFn();
+ stateInternals = new FlinkStateInternals<>(key, inputKvCoder.getKeyCoder(), windowCoder, outputTimeFn);
+ perKeyStateInternals.put(key, stateInternals);
+ }
+ return stateInternals;
+ }
+
+ private class FlinkTimerInternals extends AbstractFlinkTimerInternals<K, VIN> {
+ @Override
+ public void setTimer(TimerData timerKey) {
+ registerActiveTimer(context.element().key(), timerKey);
+ }
+
+ @Override
+ public void deleteTimer(TimerData timerKey) {
+ unregisterActiveTimer(context.element().key(), timerKey);
+ }
+ }
+
+ private class ProcessContext extends GroupAlsoByWindowViaWindowSetDoFn<K, VIN, VOUT, ?, KeyedWorkItem<K, VIN>>.ProcessContext {
+
+ private final FlinkTimerInternals timerInternals;
+
+ private final TimestampedCollector<WindowedValue<KV<K, VOUT>>> collector;
+
+ private FlinkStateInternals<K> stateInternals;
+
+ private KeyedWorkItem<K, VIN> element;
+
+ public ProcessContext(DoFn<KeyedWorkItem<K, VIN>, KV<K, VOUT>> function,
+ TimestampedCollector<WindowedValue<KV<K, VOUT>>> outCollector,
+ FlinkTimerInternals timerInternals) {
+ function.super();
+ super.setupDelegateAggregators();
+
+ this.collector = Preconditions.checkNotNull(outCollector);
+ this.timerInternals = Preconditions.checkNotNull(timerInternals);
+ }
+
+ public void setElement(KeyedWorkItem<K, VIN> element,
+ FlinkStateInternals<K> stateForKey) {
+ this.element = element;
+ this.stateInternals = stateForKey;
+ }
+
+ public void setCurrentInputWatermark(Instant watermark) {
+ this.timerInternals.setCurrentInputWatermark(watermark);
+ }
+
+ public void setCurrentOutputWatermark(Instant watermark) {
+ this.timerInternals.setCurrentOutputWatermark(watermark);
+ }
+
+ @Override
+ public KeyedWorkItem<K, VIN> element() {
+ return this.element;
+ }
+
+ @Override
+ public Instant timestamp() {
+ throw new UnsupportedOperationException("timestamp() is not available when processing KeyedWorkItems.");
+ }
+
+ @Override
+ public PipelineOptions getPipelineOptions() {
+ // TODO: PipelineOptions need to be available on the workers.
+ // Ideally they are captured as part of the pipeline.
+ // For now, construct empty options so that StateContexts.createFromComponents
+ // will yield a valid StateContext, which is needed to support the StateContext.window().
+ if (options == null) {
+ options = new PipelineOptions() {
+ @Override
+ public <T extends PipelineOptions> T as(Class<T> kls) {
+ return null;
+ }
+
+ @Override
+ public <T extends PipelineOptions> T cloneAs(Class<T> kls) {
+ return null;
+ }
+
+ @Override
+ public Class<? extends PipelineRunner<?>> getRunner() {
+ return null;
+ }
+
+ @Override
+ public void setRunner(Class<? extends PipelineRunner<?>> kls) {
+
+ }
+
+ @Override
+ public CheckEnabled getStableUniqueNames() {
+ return null;
+ }
+
+ @Override
+ public void setStableUniqueNames(CheckEnabled enabled) {
+ }
++
++ @Override
++ public String getTempLocation() {
++ return null;
++ }
++
++ @Override
++ public void setTempLocation(String tempLocation) {
++ }
+ };
+ }
+ return options;
+ }
+
+ @Override
+ public void output(KV<K, VOUT> output) {
+ throw new UnsupportedOperationException(
+ "output() is not available when processing KeyedWorkItems.");
+ }
+
+ @Override
+ public void outputWithTimestamp(KV<K, VOUT> output, Instant timestamp) {
+ throw new UnsupportedOperationException(
+ "outputWithTimestamp() is not available when processing KeyedWorkItems.");
+ }
+
+ @Override
+ public PaneInfo pane() {
+ throw new UnsupportedOperationException("pane() is not available when processing KeyedWorkItems.");
+ }
+
+ @Override
+ public BoundedWindow window() {
+ throw new UnsupportedOperationException(
+ "window() is not available when processing KeyedWorkItems.");
+ }
+
+ @Override
+ public WindowingInternals<KeyedWorkItem<K, VIN>, KV<K, VOUT>> windowingInternals() {
+ return new WindowingInternals<KeyedWorkItem<K, VIN>, KV<K, VOUT>>() {
+
+ @Override
+ public com.google.cloud.dataflow.sdk.util.state.StateInternals stateInternals() {
+ return stateInternals;
+ }
+
+ @Override
+ public void outputWindowedValue(KV<K, VOUT> output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
+ // TODO: No need to represent timestamp twice.
+ collector.setAbsoluteTimestamp(timestamp.getMillis());
+ collector.collect(WindowedValue.of(output, timestamp, windows, pane));
+
+ }
+
+ @Override
+ public TimerInternals timerInternals() {
+ return timerInternals;
+ }
+
+ @Override
+ public Collection<? extends BoundedWindow> windows() {
+ throw new UnsupportedOperationException("windows() is not available in Streaming mode.");
+ }
+
+ @Override
+ public PaneInfo pane() {
+ throw new UnsupportedOperationException("pane() is not available in Streaming mode.");
+ }
+
+ @Override
+ public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
+ throw new RuntimeException("writePCollectionViewData() not available in Streaming mode.");
+ }
+
+ @Override
+ public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
+ throw new RuntimeException("sideInput() is not available in Streaming mode.");
+ }
+ };
+ }
+
+ @Override
+ public <T> T sideInput(PCollectionView<T> view) {
+ throw new RuntimeException("sideInput() is not supported in Streaming mode.");
+ }
+
+ @Override
+ public <T> void sideOutput(TupleTag<T> tag, T output) {
+ // ignore the side output, this can happen when a user does not register
+ // side outputs but then outputs using a freshly created TupleTag.
+ throw new RuntimeException("sideOutput() is not available when grouping by window.");
+ }
+
+ @Override
+ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+ sideOutput(tag, output);
+ }
+
+ @Override
+ protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(String name, Combine.CombineFn<AggInputT, ?, AggOutputT> combiner) {
+ Accumulator acc = getRuntimeContext().getAccumulator(name);
+ if (acc != null) {
+ AccumulatorHelper.compareAccumulatorTypes(name,
+ SerializableFnAggregatorWrapper.class, acc.getClass());
+ return (Aggregator<AggInputT, AggOutputT>) acc;
+ }
+
+ SerializableFnAggregatorWrapper<AggInputT, AggOutputT> accumulator =
+ new SerializableFnAggregatorWrapper<>(combiner);
+ getRuntimeContext().addAccumulator(name, accumulator);
+ return accumulator;
+ }
+ }
+
+ ////////////// Checkpointing implementation ////////////////
+
+ @Override
+ public StreamTaskState snapshotOperatorState(long checkpointId, long timestamp) throws Exception {
+ StreamTaskState taskState = super.snapshotOperatorState(checkpointId, timestamp);
+ AbstractStateBackend.CheckpointStateOutputView out = getStateBackend().createCheckpointStateOutputView(checkpointId, timestamp);
+ StateCheckpointWriter writer = StateCheckpointWriter.create(out);
+ Coder<K> keyCoder = inputKvCoder.getKeyCoder();
+
+ // checkpoint the timers
+ StateCheckpointUtils.encodeTimers(activeTimers, writer, keyCoder);
+
+ // checkpoint the state
+ StateCheckpointUtils.encodeState(perKeyStateInternals, writer, keyCoder);
+
+ // checkpoint the timerInternals
+ context.timerInternals.encodeTimerInternals(context, writer,
+ inputKvCoder, windowingStrategy.getWindowFn().windowCoder());
+
+ taskState.setOperatorState(out.closeAndGetHandle());
+ return taskState;
+ }
+
+ @Override
+ public void restoreState(StreamTaskState taskState, long recoveryTimestamp) throws Exception {
+ super.restoreState(taskState, recoveryTimestamp);
+
+ final ClassLoader userClassloader = getUserCodeClassloader();
+
+ Coder<? extends BoundedWindow> windowCoder = this.windowingStrategy.getWindowFn().windowCoder();
+ Coder<K> keyCoder = inputKvCoder.getKeyCoder();
+
+ @SuppressWarnings("unchecked")
+ StateHandle<DataInputView> inputState = (StateHandle<DataInputView>) taskState.getOperatorState();
+ DataInputView in = inputState.getState(userClassloader);
+ StateCheckpointReader reader = new StateCheckpointReader(in);
+
+ // restore the timers
+ this.activeTimers = StateCheckpointUtils.decodeTimers(reader, windowCoder, keyCoder);
+
+ // restore the state
+ this.perKeyStateInternals = StateCheckpointUtils.decodeState(
+ reader, windowingStrategy.getOutputTimeFn(), keyCoder, windowCoder, userClassloader);
+
+ // restore the timerInternals.
+ this.timerInternals.restoreTimerInternals(reader, inputKvCoder, windowCoder);
+ }
-}
++}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/911d2953/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/PipelineOptions.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/911d2953/sdk/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
----------------------------------------------------------------------
[4/7] incubator-beam git commit: Merge branch 'master' into
temp-option
Posted by ke...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c4515687/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
----------------------------------------------------------------------
diff --cc sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
index 0000000,cd0ebc6..032e93d
mode 000000,100644..100644
--- a/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
+++ b/sdks/java/core/src/main/java/com/google/cloud/dataflow/sdk/runners/DataflowPipelineRunner.java
@@@ -1,0 -1,3003 +1,3007 @@@
+ /*
+ * Copyright (C) 2015 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+ package com.google.cloud.dataflow.sdk.runners;
+
+ import static com.google.cloud.dataflow.sdk.util.StringUtils.approximatePTransformName;
+ import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;
+ import static com.google.cloud.dataflow.sdk.util.WindowedValue.valueInEmptyWindows;
+ import static com.google.common.base.Preconditions.checkArgument;
+ import static com.google.common.base.Preconditions.checkState;
+
+ import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+ import com.google.api.services.clouddebugger.v2.Clouddebugger;
+ import com.google.api.services.clouddebugger.v2.model.Debuggee;
+ import com.google.api.services.clouddebugger.v2.model.RegisterDebuggeeRequest;
+ import com.google.api.services.clouddebugger.v2.model.RegisterDebuggeeResponse;
+ import com.google.api.services.dataflow.Dataflow;
+ import com.google.api.services.dataflow.model.DataflowPackage;
+ import com.google.api.services.dataflow.model.Job;
+ import com.google.api.services.dataflow.model.ListJobsResponse;
+ import com.google.api.services.dataflow.model.WorkerPool;
+ import com.google.cloud.dataflow.sdk.Pipeline;
+ import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
+ import com.google.cloud.dataflow.sdk.PipelineResult.State;
+ import com.google.cloud.dataflow.sdk.annotations.Experimental;
+ import com.google.cloud.dataflow.sdk.coders.AvroCoder;
+ import com.google.cloud.dataflow.sdk.coders.BigEndianLongCoder;
+ import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
+ import com.google.cloud.dataflow.sdk.coders.Coder;
+ import com.google.cloud.dataflow.sdk.coders.Coder.NonDeterministicException;
+ import com.google.cloud.dataflow.sdk.coders.CoderException;
+ import com.google.cloud.dataflow.sdk.coders.CoderRegistry;
+ import com.google.cloud.dataflow.sdk.coders.IterableCoder;
+ import com.google.cloud.dataflow.sdk.coders.KvCoder;
+ import com.google.cloud.dataflow.sdk.coders.ListCoder;
+ import com.google.cloud.dataflow.sdk.coders.MapCoder;
+ import com.google.cloud.dataflow.sdk.coders.SerializableCoder;
+ import com.google.cloud.dataflow.sdk.coders.StandardCoder;
+ import com.google.cloud.dataflow.sdk.coders.VarIntCoder;
+ import com.google.cloud.dataflow.sdk.coders.VarLongCoder;
+ import com.google.cloud.dataflow.sdk.io.AvroIO;
+ import com.google.cloud.dataflow.sdk.io.BigQueryIO;
+ import com.google.cloud.dataflow.sdk.io.FileBasedSink;
+ import com.google.cloud.dataflow.sdk.io.PubsubIO;
+ import com.google.cloud.dataflow.sdk.io.Read;
+ import com.google.cloud.dataflow.sdk.io.ShardNameTemplate;
+ import com.google.cloud.dataflow.sdk.io.TextIO;
+ import com.google.cloud.dataflow.sdk.io.UnboundedSource;
+ import com.google.cloud.dataflow.sdk.io.Write;
+ import com.google.cloud.dataflow.sdk.options.DataflowPipelineDebugOptions;
+ import com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions;
+ import com.google.cloud.dataflow.sdk.options.DataflowPipelineWorkerPoolOptions;
+ import com.google.cloud.dataflow.sdk.options.PipelineOptions;
+ import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
+ import com.google.cloud.dataflow.sdk.options.StreamingOptions;
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.JobSpecification;
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TransformTranslator;
+ import com.google.cloud.dataflow.sdk.runners.DataflowPipelineTranslator.TranslationContext;
+ import com.google.cloud.dataflow.sdk.runners.dataflow.AssignWindows;
+ import com.google.cloud.dataflow.sdk.runners.dataflow.DataflowAggregatorTransforms;
+ import com.google.cloud.dataflow.sdk.runners.dataflow.PubsubIOTranslator;
+ import com.google.cloud.dataflow.sdk.runners.dataflow.ReadTranslator;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecord;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.IsmRecordCoder;
+ import com.google.cloud.dataflow.sdk.runners.worker.IsmFormat.MetadataKeyCoder;
+ import com.google.cloud.dataflow.sdk.transforms.Aggregator;
+ import com.google.cloud.dataflow.sdk.transforms.Combine;
+ import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
+ import com.google.cloud.dataflow.sdk.transforms.Create;
+ import com.google.cloud.dataflow.sdk.transforms.DoFn;
+ import com.google.cloud.dataflow.sdk.transforms.Flatten;
+ import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
+ import com.google.cloud.dataflow.sdk.transforms.PTransform;
+ import com.google.cloud.dataflow.sdk.transforms.ParDo;
+ import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
+ import com.google.cloud.dataflow.sdk.transforms.View;
+ import com.google.cloud.dataflow.sdk.transforms.View.CreatePCollectionView;
+ import com.google.cloud.dataflow.sdk.transforms.WithKeys;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.AfterPane;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.DefaultTrigger;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
+ import com.google.cloud.dataflow.sdk.transforms.windowing.Window;
+ import com.google.cloud.dataflow.sdk.util.CoderUtils;
+ import com.google.cloud.dataflow.sdk.util.DataflowReleaseInfo;
+ import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
+ import com.google.cloud.dataflow.sdk.util.InstanceBuilder;
+ import com.google.cloud.dataflow.sdk.util.MonitoringUtil;
+ import com.google.cloud.dataflow.sdk.util.PCollectionViews;
+ import com.google.cloud.dataflow.sdk.util.PathValidator;
+ import com.google.cloud.dataflow.sdk.util.PropertyNames;
+ import com.google.cloud.dataflow.sdk.util.Reshuffle;
+ import com.google.cloud.dataflow.sdk.util.SystemDoFnInternal;
+ import com.google.cloud.dataflow.sdk.util.Transport;
+ import com.google.cloud.dataflow.sdk.util.ValueWithRecordId;
+ import com.google.cloud.dataflow.sdk.util.WindowedValue;
+ import com.google.cloud.dataflow.sdk.util.WindowedValue.FullWindowedValueCoder;
+ import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
+ import com.google.cloud.dataflow.sdk.values.KV;
+ import com.google.cloud.dataflow.sdk.values.PCollection;
+ import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
+ import com.google.cloud.dataflow.sdk.values.PCollectionList;
+ import com.google.cloud.dataflow.sdk.values.PCollectionTuple;
+ import com.google.cloud.dataflow.sdk.values.PCollectionView;
+ import com.google.cloud.dataflow.sdk.values.PDone;
+ import com.google.cloud.dataflow.sdk.values.PInput;
+ import com.google.cloud.dataflow.sdk.values.POutput;
+ import com.google.cloud.dataflow.sdk.values.PValue;
+ import com.google.cloud.dataflow.sdk.values.TupleTag;
+ import com.google.cloud.dataflow.sdk.values.TupleTagList;
+ import com.google.common.annotations.VisibleForTesting;
+ import com.google.common.base.Function;
+ import com.google.common.base.Joiner;
+ import com.google.common.base.Optional;
+ import com.google.common.base.Preconditions;
+ import com.google.common.base.Strings;
+ import com.google.common.base.Utf8;
+ import com.google.common.collect.ForwardingMap;
+ import com.google.common.collect.HashMultimap;
+ import com.google.common.collect.ImmutableList;
+ import com.google.common.collect.ImmutableMap;
+ import com.google.common.collect.Iterables;
+ import com.google.common.collect.Maps;
+ import com.google.common.collect.Multimap;
+
+ import com.fasterxml.jackson.annotation.JsonCreator;
+ import com.fasterxml.jackson.annotation.JsonProperty;
+
+ import org.joda.time.DateTimeUtils;
+ import org.joda.time.DateTimeZone;
+ import org.joda.time.Duration;
+ import org.joda.time.format.DateTimeFormat;
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
+
+ import java.io.File;
+ import java.io.FileNotFoundException;
+ import java.io.IOException;
+ import java.io.InputStream;
+ import java.io.OutputStream;
+ import java.io.PrintWriter;
+ import java.io.Serializable;
+ import java.net.URISyntaxException;
+ import java.net.URL;
+ import java.net.URLClassLoader;
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.Collection;
+ import java.util.Collections;
+ import java.util.HashMap;
+ import java.util.HashSet;
+ import java.util.Iterator;
+ import java.util.List;
+ import java.util.Map;
+ import java.util.Random;
+ import java.util.Set;
+ import java.util.SortedSet;
+ import java.util.TreeSet;
+
+ /**
+ * A {@link PipelineRunner} that executes the operations in the
+ * pipeline by first translating them to the Dataflow representation
+ * using the {@link DataflowPipelineTranslator} and then submitting
+ * them to a Dataflow service for execution.
+ *
+ * <p><h3>Permissions</h3>
+ * When reading from a Dataflow source or writing to a Dataflow sink using
+ * {@code DataflowPipelineRunner}, the Google cloudservices account and the Google compute engine
+ * service account of the GCP project running the Dataflow Job will need access to the corresponding
+ * source/sink.
+ *
+ * <p>Please see <a href="https://cloud.google.com/dataflow/security-and-permissions">Google Cloud
+ * Dataflow Security and Permissions</a> for more details.
+ */
+ public class DataflowPipelineRunner extends PipelineRunner<DataflowPipelineJob> {
+ private static final Logger LOG = LoggerFactory.getLogger(DataflowPipelineRunner.class);
+
+ /** Provided configuration options. */
+ private final DataflowPipelineOptions options;
+
+ /** Client for the Dataflow service. This is used to actually submit jobs. */
+ private final Dataflow dataflowClient;
+
+ /** Translator for this DataflowPipelineRunner, based on options. */
+ private final DataflowPipelineTranslator translator;
+
+ /** Custom transforms implementations. */
+ private final Map<Class<?>, Class<?>> overrides;
+
+ /** A set of user defined functions to invoke at different points in execution. */
+ private DataflowPipelineRunnerHooks hooks;
+
+ // Environment version information.
+ private static final String ENVIRONMENT_MAJOR_VERSION = "4";
+
+ // Default Docker container images that execute Dataflow worker harness, residing in Google
+ // Container Registry, separately for Batch and Streaming.
+ public static final String BATCH_WORKER_HARNESS_CONTAINER_IMAGE
+ = "dataflow.gcr.io/v1beta3/java-batch:1.5.0";
+ public static final String STREAMING_WORKER_HARNESS_CONTAINER_IMAGE
+ = "dataflow.gcr.io/v1beta3/java-streaming:1.5.0";
+
+ // The limit of CreateJob request size.
+ private static final int CREATE_JOB_REQUEST_LIMIT_BYTES = 10 * 1024 * 1024;
+
+ private final Set<PCollection<?>> pcollectionsRequiringIndexedFormat;
+
+ /**
+ * Project IDs must contain lowercase letters, digits, or dashes.
+ * IDs must start with a letter and may not end with a dash.
+ * This regex isn't exact - this allows for patterns that would be rejected by
+ * the service, but this is sufficient for basic validation of project IDs.
+ */
+ public static final String PROJECT_ID_REGEXP = "[a-z][-a-z0-9:.]+[a-z0-9]";
+
+ /**
+ * Construct a runner from the provided options.
+ *
+ * @param options Properties that configure the runner.
+ * @return The newly created runner.
+ */
+ public static DataflowPipelineRunner fromOptions(PipelineOptions options) {
+ // (Re-)register standard IO factories. Clobbers any prior credentials.
+ IOChannelUtils.registerStandardIOFactories(options);
+
+ DataflowPipelineOptions dataflowOptions =
+ PipelineOptionsValidator.validate(DataflowPipelineOptions.class, options);
+ ArrayList<String> missing = new ArrayList<>();
+
+ if (dataflowOptions.getAppName() == null) {
+ missing.add("appName");
+ }
+ if (missing.size() > 0) {
+ throw new IllegalArgumentException(
+ "Missing required values: " + Joiner.on(',').join(missing));
+ }
+
+ PathValidator validator = dataflowOptions.getPathValidator();
++ Preconditions.checkArgument(!(Strings.isNullOrEmpty(dataflowOptions.getTempLocation())
++ && Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())),
++ "Missing required value: at least one of tempLocation or stagingLocation must be set.");
++
+ if (dataflowOptions.getStagingLocation() != null) {
+ validator.validateOutputFilePrefixSupported(dataflowOptions.getStagingLocation());
+ }
+ if (dataflowOptions.getTempLocation() != null) {
+ validator.validateOutputFilePrefixSupported(dataflowOptions.getTempLocation());
+ }
+ if (Strings.isNullOrEmpty(dataflowOptions.getTempLocation())) {
+ dataflowOptions.setTempLocation(dataflowOptions.getStagingLocation());
+ } else if (Strings.isNullOrEmpty(dataflowOptions.getStagingLocation())) {
+ try {
+ dataflowOptions.setStagingLocation(
+ IOChannelUtils.resolve(dataflowOptions.getTempLocation(), "staging"));
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Unable to resolve PipelineOptions.stagingLocation "
+ + "from PipelineOptions.tempLocation. Please set the staging location explicitly.", e);
+ }
+ }
+
+ if (dataflowOptions.getFilesToStage() == null) {
+ dataflowOptions.setFilesToStage(detectClassPathResourcesToStage(
+ DataflowPipelineRunner.class.getClassLoader()));
+ LOG.info("PipelineOptions.filesToStage was not specified. "
+ + "Defaulting to files from the classpath: will stage {} files. "
+ + "Enable logging at DEBUG level to see which files will be staged.",
+ dataflowOptions.getFilesToStage().size());
+ LOG.debug("Classpath elements: {}", dataflowOptions.getFilesToStage());
+ }
+
+ // Verify jobName according to service requirements.
+ String jobName = dataflowOptions.getJobName().toLowerCase();
+ Preconditions.checkArgument(
+ jobName.matches("[a-z]([-a-z0-9]*[a-z0-9])?"),
+ "JobName invalid; the name must consist of only the characters "
+ + "[-a-z0-9], starting with a letter and ending with a letter "
+ + "or number");
+
+ // Verify project
+ String project = dataflowOptions.getProject();
+ if (project.matches("[0-9]*")) {
+ throw new IllegalArgumentException("Project ID '" + project
+ + "' invalid. Please make sure you specified the Project ID, not project number.");
+ } else if (!project.matches(PROJECT_ID_REGEXP)) {
+ throw new IllegalArgumentException("Project ID '" + project
+ + "' invalid. Please make sure you specified the Project ID, not project description.");
+ }
+
+ DataflowPipelineDebugOptions debugOptions =
+ dataflowOptions.as(DataflowPipelineDebugOptions.class);
+ // Verify the number of worker threads is a valid value
+ if (debugOptions.getNumberOfWorkerHarnessThreads() < 0) {
+ throw new IllegalArgumentException("Number of worker harness threads '"
+ + debugOptions.getNumberOfWorkerHarnessThreads()
+ + "' invalid. Please make sure the value is non-negative.");
+ }
+
+ return new DataflowPipelineRunner(dataflowOptions);
+ }
+
+ @VisibleForTesting protected DataflowPipelineRunner(DataflowPipelineOptions options) {
+ this.options = options;
+ this.dataflowClient = options.getDataflowClient();
+ this.translator = DataflowPipelineTranslator.fromOptions(options);
+ this.pcollectionsRequiringIndexedFormat = new HashSet<>();
+ this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>();
+
+ if (options.isStreaming()) {
+ overrides = ImmutableMap.<Class<?>, Class<?>>builder()
+ .put(Combine.GloballyAsSingletonView.class, StreamingCombineGloballyAsSingletonView.class)
+ .put(Create.Values.class, StreamingCreate.class)
+ .put(View.AsMap.class, StreamingViewAsMap.class)
+ .put(View.AsMultimap.class, StreamingViewAsMultimap.class)
+ .put(View.AsSingleton.class, StreamingViewAsSingleton.class)
+ .put(View.AsList.class, StreamingViewAsList.class)
+ .put(View.AsIterable.class, StreamingViewAsIterable.class)
+ .put(Write.Bound.class, StreamingWrite.class)
+ .put(PubsubIO.Write.Bound.class, StreamingPubsubIOWrite.class)
+ .put(Read.Unbounded.class, StreamingUnboundedRead.class)
+ .put(Read.Bounded.class, UnsupportedIO.class)
+ .put(AvroIO.Read.Bound.class, UnsupportedIO.class)
+ .put(AvroIO.Write.Bound.class, UnsupportedIO.class)
+ .put(BigQueryIO.Read.Bound.class, UnsupportedIO.class)
+ .put(TextIO.Read.Bound.class, UnsupportedIO.class)
+ .put(TextIO.Write.Bound.class, UnsupportedIO.class)
+ .put(Window.Bound.class, AssignWindows.class)
+ .build();
+ } else {
+ ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.<Class<?>, Class<?>>builder();
+ builder.put(Read.Unbounded.class, UnsupportedIO.class);
+ builder.put(Window.Bound.class, AssignWindows.class);
+ builder.put(Write.Bound.class, BatchWrite.class);
+ builder.put(AvroIO.Write.Bound.class, BatchAvroIOWrite.class);
+ builder.put(TextIO.Write.Bound.class, BatchTextIOWrite.class);
+ if (options.getExperiments() == null
+ || !options.getExperiments().contains("disable_ism_side_input")) {
+ builder.put(View.AsMap.class, BatchViewAsMap.class);
+ builder.put(View.AsMultimap.class, BatchViewAsMultimap.class);
+ builder.put(View.AsSingleton.class, BatchViewAsSingleton.class);
+ builder.put(View.AsList.class, BatchViewAsList.class);
+ builder.put(View.AsIterable.class, BatchViewAsIterable.class);
+ }
+ overrides = builder.build();
+ }
+ }
+
+ /**
+ * Applies the given transform to the input. For transforms with customized definitions
+ * for the Dataflow pipeline runner, the application is intercepted and modified here.
+ */
+ @Override
+ public <OutputT extends POutput, InputT extends PInput> OutputT apply(
+ PTransform<InputT, OutputT> transform, InputT input) {
+
+ if (Combine.GroupedValues.class.equals(transform.getClass())
+ || GroupByKey.class.equals(transform.getClass())) {
+
+ // For both Dataflow runners (streaming and batch), GroupByKey and GroupedValues are
+ // primitives. Returning a primitive output instead of the expanded definition
+ // signals to the translator that translation is necessary.
+ @SuppressWarnings("unchecked")
+ PCollection<?> pc = (PCollection<?>) input;
+ @SuppressWarnings("unchecked")
+ OutputT outputT = (OutputT) PCollection.createPrimitiveOutputInternal(
+ pc.getPipeline(),
+ transform instanceof GroupByKey
+ ? ((GroupByKey<?, ?>) transform).updateWindowingStrategy(pc.getWindowingStrategy())
+ : pc.getWindowingStrategy(),
+ pc.isBounded());
+ return outputT;
+ } else if (Window.Bound.class.equals(transform.getClass())) {
+ /*
+ * TODO: make this the generic way overrides are applied (using super.apply() rather than
+ * Pipeline.applyTransform(); this allows the apply method to be replaced without inserting
+ * additional nodes into the graph.
+ */
+ // casting to wildcard
+ @SuppressWarnings("unchecked")
+ OutputT windowed = (OutputT) applyWindow((Window.Bound<?>) transform, (PCollection<?>) input);
+ return windowed;
+ } else if (Flatten.FlattenPCollectionList.class.equals(transform.getClass())
+ && ((PCollectionList<?>) input).size() == 0) {
+ return (OutputT) Pipeline.applyTransform(input, Create.of());
+ } else if (overrides.containsKey(transform.getClass())) {
+ // It is the responsibility of whoever constructs overrides to ensure this is type safe.
+ @SuppressWarnings("unchecked")
+ Class<PTransform<InputT, OutputT>> transformClass =
+ (Class<PTransform<InputT, OutputT>>) transform.getClass();
+
+ @SuppressWarnings("unchecked")
+ Class<PTransform<InputT, OutputT>> customTransformClass =
+ (Class<PTransform<InputT, OutputT>>) overrides.get(transform.getClass());
+
+ PTransform<InputT, OutputT> customTransform =
+ InstanceBuilder.ofType(customTransformClass)
+ .withArg(DataflowPipelineRunner.class, this)
+ .withArg(transformClass, transform)
+ .build();
+
+ return Pipeline.applyTransform(input, customTransform);
+ } else {
+ return super.apply(transform, input);
+ }
+ }
+
+ private <T> PCollection<T> applyWindow(
+ Window.Bound<?> intitialTransform, PCollection<?> initialInput) {
+ // types are matched at compile time
+ @SuppressWarnings("unchecked")
+ Window.Bound<T> transform = (Window.Bound<T>) intitialTransform;
+ @SuppressWarnings("unchecked")
+ PCollection<T> input = (PCollection<T>) initialInput;
+ return super.apply(new AssignWindows<>(transform), input);
+ }
+
+ private String debuggerMessage(String projectId, String uniquifier) {
+ return String.format("To debug your job, visit Google Cloud Debugger at: "
+ + "https://console.developers.google.com/debug?project=%s&dbgee=%s",
+ projectId, uniquifier);
+ }
+
+ private void maybeRegisterDebuggee(DataflowPipelineOptions options, String uniquifier) {
+ if (!options.getEnableCloudDebugger()) {
+ return;
+ }
+
+ if (options.getDebuggee() != null) {
+ throw new RuntimeException("Should not specify the debuggee");
+ }
+
+ Clouddebugger debuggerClient = Transport.newClouddebuggerClient(options).build();
+ Debuggee debuggee = registerDebuggee(debuggerClient, uniquifier);
+ options.setDebuggee(debuggee);
+
+ System.out.println(debuggerMessage(options.getProject(), debuggee.getUniquifier()));
+ }
+
+ private Debuggee registerDebuggee(Clouddebugger debuggerClient, String uniquifier) {
+ RegisterDebuggeeRequest registerReq = new RegisterDebuggeeRequest();
+ registerReq.setDebuggee(new Debuggee()
+ .setProject(options.getProject())
+ .setUniquifier(uniquifier)
+ .setDescription(uniquifier)
+ .setAgentVersion("google.com/cloud-dataflow-java/v1"));
+
+ try {
+ RegisterDebuggeeResponse registerResponse =
+ debuggerClient.controller().debuggees().register(registerReq).execute();
+ Debuggee debuggee = registerResponse.getDebuggee();
+ if (debuggee.getStatus() != null && debuggee.getStatus().getIsError()) {
+ throw new RuntimeException("Unable to register with the debugger: " +
+ debuggee.getStatus().getDescription().getFormat());
+ }
+
+ return debuggee;
+ } catch (IOException e) {
+ throw new RuntimeException("Unable to register with the debugger: ", e);
+ }
+ }
+
+ @Override
+ public DataflowPipelineJob run(Pipeline pipeline) {
+ logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);
+
+ LOG.info("Executing pipeline on the Dataflow Service, which will have billing implications "
+ + "related to Google Compute Engine usage and other Google Cloud Services.");
+
+ List<DataflowPackage> packages = options.getStager().stageFiles();
+
+
+ // Set a unique client_request_id in the CreateJob request.
+ // This is used to ensure idempotence of job creation across retried
+ // attempts to create a job. Specifically, if the service returns a job with
+ // a different client_request_id, it means the returned one is a different
+ // job previously created with the same job name, and that the job creation
+ // has been effectively rejected. The SDK should return
+ // Error::Already_Exists to user in that case.
+ int randomNum = new Random().nextInt(9000) + 1000;
+ String requestId = DateTimeFormat.forPattern("YYYYMMddHHmmssmmm").withZone(DateTimeZone.UTC)
+ .print(DateTimeUtils.currentTimeMillis()) + "_" + randomNum;
+
+ // Try to create a debuggee ID. This must happen before the job is translated since it may
+ // update the options.
+ DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
+ maybeRegisterDebuggee(dataflowOptions, requestId);
+
+ JobSpecification jobSpecification =
+ translator.translate(pipeline, this, packages);
+ Job newJob = jobSpecification.getJob();
+ newJob.setClientRequestId(requestId);
+
+ String version = DataflowReleaseInfo.getReleaseInfo().getVersion();
+ System.out.println("Dataflow SDK version: " + version);
+
+ newJob.getEnvironment().setUserAgent(DataflowReleaseInfo.getReleaseInfo());
+ // The Dataflow Service may write to the temporary directory directly, so
+ // must be verified.
+ if (!Strings.isNullOrEmpty(options.getTempLocation())) {
+ newJob.getEnvironment().setTempStoragePrefix(
+ dataflowOptions.getPathValidator().verifyPath(options.getTempLocation()));
+ }
+ newJob.getEnvironment().setDataset(options.getTempDatasetId());
+ newJob.getEnvironment().setExperiments(options.getExperiments());
+
+ // Set the Docker container image that executes Dataflow worker harness, residing in Google
+ // Container Registry. Translator is guaranteed to create a worker pool prior to this point.
+ String workerHarnessContainerImage =
+ options.as(DataflowPipelineWorkerPoolOptions.class)
+ .getWorkerHarnessContainerImage();
+ for (WorkerPool workerPool : newJob.getEnvironment().getWorkerPools()) {
+ workerPool.setWorkerHarnessContainerImage(workerHarnessContainerImage);
+ }
+
+ // Requirements about the service.
+ Map<String, Object> environmentVersion = new HashMap<>();
+ environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_MAJOR_KEY, ENVIRONMENT_MAJOR_VERSION);
+ newJob.getEnvironment().setVersion(environmentVersion);
+ // Default jobType is JAVA_BATCH_AUTOSCALING: A Java job with workers that the job can
+ // autoscale if specified.
+ String jobType = "JAVA_BATCH_AUTOSCALING";
+
+ if (options.isStreaming()) {
+ jobType = "STREAMING";
+ }
+ environmentVersion.put(PropertyNames.ENVIRONMENT_VERSION_JOB_TYPE_KEY, jobType);
+
+ if (hooks != null) {
+ hooks.modifyEnvironmentBeforeSubmission(newJob.getEnvironment());
+ }
+
+ if (!Strings.isNullOrEmpty(options.getDataflowJobFile())) {
+ try (PrintWriter printWriter = new PrintWriter(
+ new File(options.getDataflowJobFile()))) {
+ String workSpecJson = DataflowPipelineTranslator.jobToString(newJob);
+ printWriter.print(workSpecJson);
+ LOG.info("Printed workflow specification to {}", options.getDataflowJobFile());
+ } catch (IllegalStateException ex) {
+ LOG.warn("Cannot translate workflow spec to json for debug.");
+ } catch (FileNotFoundException ex) {
+ LOG.warn("Cannot create workflow spec output file.");
+ }
+ }
+
+ String jobIdToUpdate = null;
+ if (options.getUpdate()) {
+ jobIdToUpdate = getJobIdFromName(options.getJobName());
+ newJob.setTransformNameMapping(options.getTransformNameMapping());
+ newJob.setReplaceJobId(jobIdToUpdate);
+ }
+ Job jobResult;
+ try {
+ jobResult = dataflowClient
+ .projects()
+ .jobs()
+ .create(options.getProject(), newJob)
+ .execute();
+ } catch (GoogleJsonResponseException e) {
+ String errorMessages = "Unexpected errors";
+ if (e.getDetails() != null) {
+ if (Utf8.encodedLength(newJob.toString()) >= CREATE_JOB_REQUEST_LIMIT_BYTES) {
+ errorMessages = "The size of the serialized JSON representation of the pipeline "
+ + "exceeds the allowable limit. "
+ + "For more information, please check the FAQ link below:\n"
+ + "https://cloud.google.com/dataflow/faq";
+ } else {
+ errorMessages = e.getDetails().getMessage();
+ }
+ }
+ throw new RuntimeException("Failed to create a workflow job: " + errorMessages, e);
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to create a workflow job", e);
+ }
+
+ // Obtain all of the extractors from the PTransforms used in the pipeline so the
+ // DataflowPipelineJob has access to them.
+ AggregatorPipelineExtractor aggregatorExtractor = new AggregatorPipelineExtractor(pipeline);
+ Map<Aggregator<?, ?>, Collection<PTransform<?, ?>>> aggregatorSteps =
+ aggregatorExtractor.getAggregatorSteps();
+
+ DataflowAggregatorTransforms aggregatorTransforms =
+ new DataflowAggregatorTransforms(aggregatorSteps, jobSpecification.getStepNames());
+
+ // Use a raw client for post-launch monitoring, as status calls may fail
+ // regularly and need not be retried automatically.
+ DataflowPipelineJob dataflowPipelineJob =
+ new DataflowPipelineJob(options.getProject(), jobResult.getId(),
+ Transport.newRawDataflowClient(options).build(), aggregatorTransforms);
+
+ // If the service returned client request id, the SDK needs to compare it
+ // with the original id generated in the request, if they are not the same
+ // (i.e., the returned job is not created by this request), throw
+ // DataflowJobAlreadyExistsException or DataflowJobAlreadyUpdatedExcetpion
+ // depending on whether this is a reload or not.
+ if (jobResult.getClientRequestId() != null && !jobResult.getClientRequestId().isEmpty()
+ && !jobResult.getClientRequestId().equals(requestId)) {
+ // If updating a job.
+ if (options.getUpdate()) {
+ throw new DataflowJobAlreadyUpdatedException(dataflowPipelineJob,
+ String.format("The job named %s with id: %s has already been updated into job id: %s "
+ + "and cannot be updated again.",
+ newJob.getName(), jobIdToUpdate, jobResult.getId()));
+ } else {
+ throw new DataflowJobAlreadyExistsException(dataflowPipelineJob,
+ String.format("There is already an active job named %s with id: %s. If you want "
+ + "to submit a second job, try again by setting a different name using --jobName.",
+ newJob.getName(), jobResult.getId()));
+ }
+ }
+
+ LOG.info("To access the Dataflow monitoring console, please navigate to {}",
+ MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
+ System.out.println("Submitted job: " + jobResult.getId());
+
+ LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",
+ MonitoringUtil.getGcloudCancelCommand(options, jobResult.getId()));
+
+ return dataflowPipelineJob;
+ }
+
+ /**
+ * Returns the DataflowPipelineTranslator associated with this object.
+ */
+ public DataflowPipelineTranslator getTranslator() {
+ return translator;
+ }
+
+ /**
+ * Sets callbacks to invoke during execution see {@code DataflowPipelineRunnerHooks}.
+ */
+ @Experimental
+ public void setHooks(DataflowPipelineRunnerHooks hooks) {
+ this.hooks = hooks;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+
+ /** Outputs a warning about PCollection views without deterministic key coders. */
+ private void logWarningIfPCollectionViewHasNonDeterministicKeyCoder(Pipeline pipeline) {
+ // We need to wait till this point to determine the names of the transforms since only
+ // at this time do we know the hierarchy of the transforms otherwise we could
+ // have just recorded the full names during apply time.
+ if (!ptransformViewsWithNonDeterministicKeyCoders.isEmpty()) {
+ final SortedSet<String> ptransformViewNamesWithNonDeterministicKeyCoders = new TreeSet<>();
+ pipeline.traverseTopologically(new PipelineVisitor() {
+ @Override
+ public void visitValue(PValue value, TransformTreeNode producer) {
+ }
+
+ @Override
+ public void visitTransform(TransformTreeNode node) {
+ if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
+ ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
+ }
+ }
+
+ @Override
+ public void enterCompositeTransform(TransformTreeNode node) {
+ if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
+ ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
+ }
+ }
+
+ @Override
+ public void leaveCompositeTransform(TransformTreeNode node) {
+ }
+ });
+
+ LOG.warn("Unable to use indexed implementation for View.AsMap and View.AsMultimap for {} "
+ + "because the key coder is not deterministic. Falling back to singleton implementation "
+ + "which may cause memory and/or performance problems. Future major versions of "
+ + "Dataflow will require deterministic key coders.",
+ ptransformViewNamesWithNonDeterministicKeyCoders);
+ }
+ }
+
+ /**
+ * Returns true if the passed in {@link PCollection} needs to be materialiazed using
+ * an indexed format.
+ */
+ boolean doesPCollectionRequireIndexedFormat(PCollection<?> pcol) {
+ return pcollectionsRequiringIndexedFormat.contains(pcol);
+ }
+
+ /**
+ * Marks the passed in {@link PCollection} as requiring to be materialized using
+ * an indexed format.
+ */
+ private void addPCollectionRequiringIndexedFormat(PCollection<?> pcol) {
+ pcollectionsRequiringIndexedFormat.add(pcol);
+ }
+
+ /** A set of {@link View}s with non-deterministic key coders. */
+ Set<PTransform<?, ?>> ptransformViewsWithNonDeterministicKeyCoders;
+
+ /**
+ * Records that the {@link PTransform} requires a deterministic key coder.
+ */
+ private void recordViewUsesNonDeterministicKeyCoder(PTransform<?, ?> ptransform) {
+ ptransformViewsWithNonDeterministicKeyCoders.add(ptransform);
+ }
+
+ /**
+ * A {@link GroupByKey} transform for the {@link DataflowPipelineRunner} which sorts
+ * values using the secondary key {@code K2}.
+ *
+ * <p>The {@link PCollection} created created by this {@link PTransform} will have values in
+ * the empty window. Care must be taken *afterwards* to either re-window
+ * (using {@link Window#into}) or only use {@link PTransform}s that do not depend on the
+ * values being within a window.
+ */
+ static class GroupByKeyAndSortValuesOnly<K1, K2, V>
+ extends PTransform<PCollection<KV<K1, KV<K2, V>>>, PCollection<KV<K1, Iterable<KV<K2, V>>>>> {
+ private GroupByKeyAndSortValuesOnly() {
+ }
+
+ @Override
+ public PCollection<KV<K1, Iterable<KV<K2, V>>>> apply(PCollection<KV<K1, KV<K2, V>>> input) {
+ PCollection<KV<K1, Iterable<KV<K2, V>>>> rval =
+ PCollection.<KV<K1, Iterable<KV<K2, V>>>>createPrimitiveOutputInternal(
+ input.getPipeline(),
+ WindowingStrategy.globalDefault(),
+ IsBounded.BOUNDED);
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ KvCoder<K1, KV<K2, V>> inputCoder = (KvCoder) input.getCoder();
+ rval.setCoder(
+ KvCoder.of(inputCoder.getKeyCoder(),
+ IterableCoder.of(inputCoder.getValueCoder())));
+ return rval;
+ }
+ }
+
+ /**
+ * A {@link PTransform} that groups the values by a hash of the window's byte representation
+ * and sorts the values using the windows byte representation.
+ */
+ private static class GroupByWindowHashAsKeyAndWindowAsSortKey<T, W extends BoundedWindow> extends
+ PTransform<PCollection<T>, PCollection<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>>> {
+
+ /**
+ * A {@link DoFn} that for each element outputs a {@code KV} structure suitable for
+ * grouping by the hash of the window's byte representation and sorting the grouped values
+ * using the window's byte representation.
+ */
+ @SystemDoFnInternal
+ private static class UseWindowHashAsKeyAndWindowAsSortKeyDoFn<T, W extends BoundedWindow>
+ extends DoFn<T, KV<Integer, KV<W, WindowedValue<T>>>> implements DoFn.RequiresWindowAccess {
+
+ private final IsmRecordCoder<?> ismCoderForHash;
+ private UseWindowHashAsKeyAndWindowAsSortKeyDoFn(IsmRecordCoder<?> ismCoderForHash) {
+ this.ismCoderForHash = ismCoderForHash;
+ }
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ @SuppressWarnings("unchecked")
+ W window = (W) c.window();
+ c.output(
+ KV.of(ismCoderForHash.hash(ImmutableList.of(window)),
+ KV.of(window,
+ WindowedValue.of(
+ c.element(),
+ c.timestamp(),
+ c.window(),
+ c.pane()))));
+ }
+ }
+
+ private final IsmRecordCoder<?> ismCoderForHash;
+ private GroupByWindowHashAsKeyAndWindowAsSortKey(IsmRecordCoder<?> ismCoderForHash) {
+ this.ismCoderForHash = ismCoderForHash;
+ }
+
+ @Override
+ public PCollection<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>> apply(PCollection<T> input) {
+ @SuppressWarnings("unchecked")
+ Coder<W> windowCoder = (Coder<W>)
+ input.getWindowingStrategy().getWindowFn().windowCoder();
+ PCollection<KV<Integer, KV<W, WindowedValue<T>>>> rval =
+ input.apply(ParDo.of(
+ new UseWindowHashAsKeyAndWindowAsSortKeyDoFn<T, W>(ismCoderForHash)));
+ rval.setCoder(
+ KvCoder.of(
+ VarIntCoder.of(),
+ KvCoder.of(windowCoder,
+ FullWindowedValueCoder.of(input.getCoder(), windowCoder))));
+ return rval.apply(new GroupByKeyAndSortValuesOnly<Integer, W, WindowedValue<T>>());
+ }
+ }
+
+ /**
+ * Specialized implementation for
+ * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} for the
+ * Dataflow runner in batch mode.
+ *
+ * <p>Creates a set of files in the {@link IsmFormat} sharded by the hash of the windows
+ * byte representation and with records having:
+ * <ul>
+ * <li>Key 1: Window</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ */
+ static class BatchViewAsSingleton<T>
+ extends PTransform<PCollection<T>, PCollectionView<T>> {
+
+ /**
+ * A {@link DoFn} that outputs {@link IsmRecord}s. These records are structured as follows:
+ * <ul>
+ * <li>Key 1: Window
+ * <li>Value: Windowed value
+ * </ul>
+ */
+ static class IsmRecordForSingularValuePerWindowDoFn<T, W extends BoundedWindow>
+ extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
+ IsmRecord<WindowedValue<T>>> {
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ Iterator<KV<W, WindowedValue<T>>> iterator = c.element().getValue().iterator();
+ while (iterator.hasNext()) {
+ KV<W, WindowedValue<T>> next = iterator.next();
+ c.output(
+ IsmRecord.of(
+ ImmutableList.of(next.getKey()), next.getValue()));
+ }
+ }
+ }
+
+ private final DataflowPipelineRunner runner;
+ private final View.AsSingleton<T> transform;
+ /**
+ * Builds an instance of this class from the overridden transform.
+ */
+ @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+ public BatchViewAsSingleton(DataflowPipelineRunner runner, View.AsSingleton<T> transform) {
+ this.runner = runner;
+ this.transform = transform;
+ }
+
+ @Override
+ public PCollectionView<T> apply(PCollection<T> input) {
+ return BatchViewAsSingleton.<T, T, T, BoundedWindow>applyForSingleton(
+ runner,
+ input,
+ new IsmRecordForSingularValuePerWindowDoFn<T, BoundedWindow>(),
+ transform.hasDefaultValue(),
+ transform.defaultValue(),
+ input.getCoder());
+ }
+
+ static <T, FinalT, ViewT, W extends BoundedWindow> PCollectionView<ViewT>
+ applyForSingleton(
+ DataflowPipelineRunner runner,
+ PCollection<T> input,
+ DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
+ IsmRecord<WindowedValue<FinalT>>> doFn,
+ boolean hasDefault,
+ FinalT defaultValue,
+ Coder<FinalT> defaultValueCoder) {
+
+ @SuppressWarnings("unchecked")
+ Coder<W> windowCoder = (Coder<W>)
+ input.getWindowingStrategy().getWindowFn().windowCoder();
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ PCollectionView<ViewT> view =
+ (PCollectionView<ViewT>) PCollectionViews.<FinalT, W>singletonView(
+ input.getPipeline(),
+ (WindowingStrategy) input.getWindowingStrategy(),
+ hasDefault,
+ defaultValue,
+ defaultValueCoder);
+
+ IsmRecordCoder<WindowedValue<FinalT>> ismCoder =
+ coderForSingleton(windowCoder, defaultValueCoder);
+
+ PCollection<IsmRecord<WindowedValue<FinalT>>> reifiedPerWindowAndSorted = input
+ .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
+ .apply(ParDo.of(doFn));
+ reifiedPerWindowAndSorted.setCoder(ismCoder);
+
+ runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
+ return reifiedPerWindowAndSorted.apply(
+ CreatePCollectionView.<IsmRecord<WindowedValue<FinalT>>, ViewT>of(view));
+ }
+
+ @Override
+ protected String getKindString() {
+ return "BatchViewAsSingleton";
+ }
+
+ static <T> IsmRecordCoder<WindowedValue<T>> coderForSingleton(
+ Coder<? extends BoundedWindow> windowCoder, Coder<T> valueCoder) {
+ return IsmRecordCoder.of(
+ 1, // We hash using only the window
+ 0, // There are no metadata records
+ ImmutableList.<Coder<?>>of(windowCoder),
+ FullWindowedValueCoder.of(valueCoder, windowCoder));
+ }
+ }
+
+ /**
+ * Specialized implementation for
+ * {@link com.google.cloud.dataflow.sdk.transforms.View.AsIterable View.AsIterable} for the
+ * Dataflow runner in batch mode.
+ *
+ * <p>Creates a set of {@code Ism} files sharded by the hash of the windows byte representation
+ * and with records having:
+ * <ul>
+ * <li>Key 1: Window</li>
+ * <li>Key 2: Index offset within window</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ */
+ static class BatchViewAsIterable<T>
+ extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
+
+ private final DataflowPipelineRunner runner;
+ /**
+ * Builds an instance of this class from the overridden transform.
+ */
+ @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+ public BatchViewAsIterable(DataflowPipelineRunner runner, View.AsIterable<T> transform) {
+ this.runner = runner;
+ }
+
+ @Override
+ public PCollectionView<Iterable<T>> apply(PCollection<T> input) {
+ PCollectionView<Iterable<T>> view = PCollectionViews.iterableView(
+ input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
+ return BatchViewAsList.applyForIterableLike(runner, input, view);
+ }
+ }
+
+ /**
+ * Specialized implementation for
+ * {@link com.google.cloud.dataflow.sdk.transforms.View.AsList View.AsList} for the
+ * Dataflow runner in batch mode.
+ *
+ * <p>Creates a set of {@code Ism} files sharded by the hash of the window's byte representation
+ * and with records having:
+ * <ul>
+ * <li>Key 1: Window</li>
+ * <li>Key 2: Index offset within window</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ */
+ static class BatchViewAsList<T>
+ extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
+ /**
+ * A {@link DoFn} which creates {@link IsmRecord}s assuming that each element is within the
+ * global window. Each {@link IsmRecord} has
+ * <ul>
+ * <li>Key 1: Global window</li>
+ * <li>Key 2: Index offset within window</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ */
+ @SystemDoFnInternal
+ static class ToIsmRecordForGlobalWindowDoFn<T>
+ extends DoFn<T, IsmRecord<WindowedValue<T>>> {
+
+ long indexInBundle;
+ @Override
+ public void startBundle(Context c) throws Exception {
+ indexInBundle = 0;
+ }
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ c.output(IsmRecord.of(
+ ImmutableList.of(GlobalWindow.INSTANCE, indexInBundle),
+ WindowedValue.of(
+ c.element(),
+ c.timestamp(),
+ GlobalWindow.INSTANCE,
+ c.pane())));
+ indexInBundle += 1;
+ }
+ }
+
+ /**
+ * A {@link DoFn} which creates {@link IsmRecord}s comparing successive elements windows
+ * to locate the window boundaries. The {@link IsmRecord} has:
+ * <ul>
+ * <li>Key 1: Window</li>
+ * <li>Key 2: Index offset within window</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ */
+ @SystemDoFnInternal
+ static class ToIsmRecordForNonGlobalWindowDoFn<T, W extends BoundedWindow>
+ extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>,
+ IsmRecord<WindowedValue<T>>> {
+
+ private final Coder<W> windowCoder;
+ ToIsmRecordForNonGlobalWindowDoFn(Coder<W> windowCoder) {
+ this.windowCoder = windowCoder;
+ }
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ long elementsInWindow = 0;
+ Optional<Object> previousWindowStructuralValue = Optional.absent();
+ for (KV<W, WindowedValue<T>> value : c.element().getValue()) {
+ Object currentWindowStructuralValue = windowCoder.structuralValue(value.getKey());
+ // Compare to see if this is a new window so we can reset the index counter i
+ if (previousWindowStructuralValue.isPresent()
+ && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
+ // Reset i since we have a new window.
+ elementsInWindow = 0;
+ }
+ c.output(IsmRecord.of(
+ ImmutableList.of(value.getKey(), elementsInWindow),
+ value.getValue()));
+ previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
+ elementsInWindow += 1;
+ }
+ }
+ }
+
+ private final DataflowPipelineRunner runner;
+ /**
+ * Builds an instance of this class from the overridden transform.
+ */
+ @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+ public BatchViewAsList(DataflowPipelineRunner runner, View.AsList<T> transform) {
+ this.runner = runner;
+ }
+
+ @Override
+ public PCollectionView<List<T>> apply(PCollection<T> input) {
+ PCollectionView<List<T>> view = PCollectionViews.listView(
+ input.getPipeline(), input.getWindowingStrategy(), input.getCoder());
+ return applyForIterableLike(runner, input, view);
+ }
+
+ static <T, W extends BoundedWindow, ViewT> PCollectionView<ViewT> applyForIterableLike(
+ DataflowPipelineRunner runner,
+ PCollection<T> input,
+ PCollectionView<ViewT> view) {
+
+ @SuppressWarnings("unchecked")
+ Coder<W> windowCoder = (Coder<W>)
+ input.getWindowingStrategy().getWindowFn().windowCoder();
+
+ IsmRecordCoder<WindowedValue<T>> ismCoder = coderForListLike(windowCoder, input.getCoder());
+
+ // If we are working in the global window, we do not need to do a GBK using the window
+ // as the key since all the elements of the input PCollection are already such.
+ // We just reify the windowed value while converting them to IsmRecords and generating
+ // an index based upon where we are within the bundle. Each bundle
+ // maps to one file exactly.
+ if (input.getWindowingStrategy().getWindowFn() instanceof GlobalWindows) {
+ PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted =
+ input.apply(ParDo.of(new ToIsmRecordForGlobalWindowDoFn<T>()));
+ reifiedPerWindowAndSorted.setCoder(ismCoder);
+
+ runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
+ return reifiedPerWindowAndSorted.apply(
+ CreatePCollectionView.<IsmRecord<WindowedValue<T>>, ViewT>of(view));
+ }
+
+ PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted = input
+ .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder))
+ .apply(ParDo.of(new ToIsmRecordForNonGlobalWindowDoFn<T, W>(windowCoder)));
+ reifiedPerWindowAndSorted.setCoder(ismCoder);
+
+ runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted);
+ return reifiedPerWindowAndSorted.apply(
+ CreatePCollectionView.<IsmRecord<WindowedValue<T>>, ViewT>of(view));
+ }
+
+ @Override
+ protected String getKindString() {
+ return "BatchViewAsList";
+ }
+
+ static <T> IsmRecordCoder<WindowedValue<T>> coderForListLike(
+ Coder<? extends BoundedWindow> windowCoder, Coder<T> valueCoder) {
+ // TODO: swap to use a variable length long coder which has values which compare
+ // the same as their byte representation compare lexicographically within the key coder
+ return IsmRecordCoder.of(
+ 1, // We hash using only the window
+ 0, // There are no metadata records
+ ImmutableList.of(windowCoder, BigEndianLongCoder.of()),
+ FullWindowedValueCoder.of(valueCoder, windowCoder));
+ }
+ }
+
+ /**
+ * Specialized implementation for
+ * {@link com.google.cloud.dataflow.sdk.transforms.View.AsMap View.AsMap} for the
+ * Dataflow runner in batch mode.
+ *
+ * <p>Creates a set of {@code Ism} files sharded by the hash of the key's byte
+ * representation. Each record is structured as follows:
+ * <ul>
+ * <li>Key 1: User key K</li>
+ * <li>Key 2: Window</li>
+ * <li>Key 3: 0L (constant)</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ *
+ * <p>Alongside the data records, there are the following metadata records:
+ * <ul>
+ * <li>Key 1: Metadata Key</li>
+ * <li>Key 2: Window</li>
+ * <li>Key 3: Index [0, size of map]</li>
+ * <li>Value: variable length long byte representation of size of map if index is 0,
+ * otherwise the byte representation of a key</li>
+ * </ul>
+ * The {@code [META, Window, 0]} record stores the number of unique keys per window, while
+ * {@code [META, Window, i]} for {@code i} in {@code [1, size of map]} stores a the users key.
+ * This allows for one to access the size of the map by looking at {@code [META, Window, 0]}
+ * and iterate over all the keys by accessing {@code [META, Window, i]} for {@code i} in
+ * {@code [1, size of map]}.
+ *
+ * <p>Note that in the case of a non-deterministic key coder, we fallback to using
+ * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} printing
+ * a warning to users to specify a deterministic key coder.
+ */
+ static class BatchViewAsMap<K, V>
+ extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
+
+ /**
+ * A {@link DoFn} which groups elements by window boundaries. For each group,
+ * the group of elements is transformed into a {@link TransformedMap}.
+ * The transformed {@code Map<K, V>} is backed by a {@code Map<K, WindowedValue<V>>}
+ * and contains a function {@code WindowedValue<V> -> V}.
+ *
+ * <p>Outputs {@link IsmRecord}s having:
+ * <ul>
+ * <li>Key 1: Window</li>
+ * <li>Value: Transformed map containing a transform that removes the encapsulation
+ * of the window around each value,
+ * {@code Map<K, WindowedValue<V>> -> Map<K, V>}.</li>
+ * </ul>
+ */
+ static class ToMapDoFn<K, V, W extends BoundedWindow>
+ extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<KV<K, V>>>>>,
+ IsmRecord<WindowedValue<TransformedMap<K,
+ WindowedValue<V>,
+ V>>>> {
+
+ private final Coder<W> windowCoder;
+ ToMapDoFn(Coder<W> windowCoder) {
+ this.windowCoder = windowCoder;
+ }
+
+ @Override
+ public void processElement(ProcessContext c)
+ throws Exception {
+ Optional<Object> previousWindowStructuralValue = Optional.absent();
+ Optional<W> previousWindow = Optional.absent();
+ Map<K, WindowedValue<V>> map = new HashMap<>();
+ for (KV<W, WindowedValue<KV<K, V>>> kv : c.element().getValue()) {
+ Object currentWindowStructuralValue = windowCoder.structuralValue(kv.getKey());
+ if (previousWindowStructuralValue.isPresent()
+ && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
+ // Construct the transformed map containing all the elements since we
+ // are at a window boundary.
+ c.output(IsmRecord.of(
+ ImmutableList.of(previousWindow.get()),
+ valueInEmptyWindows(new TransformedMap<>(WindowedValueToValue.<V>of(), map))));
+ map = new HashMap<>();
+ }
+
+ // Verify that the user isn't trying to insert the same key multiple times.
+ checkState(!map.containsKey(kv.getValue().getValue().getKey()),
+ "Multiple values [%s, %s] found for single key [%s] within window [%s].",
+ map.get(kv.getValue().getValue().getKey()),
+ kv.getValue().getValue().getValue(),
+ kv.getKey());
+ map.put(kv.getValue().getValue().getKey(),
+ kv.getValue().withValue(kv.getValue().getValue().getValue()));
+ previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
+ previousWindow = Optional.of(kv.getKey());
+ }
+
+ // The last value for this hash is guaranteed to be at a window boundary
+ // so we output a transformed map containing all the elements since the last
+ // window boundary.
+ c.output(IsmRecord.of(
+ ImmutableList.of(previousWindow.get()),
+ valueInEmptyWindows(new TransformedMap<>(WindowedValueToValue.<V>of(), map))));
+ }
+ }
+
+ private final DataflowPipelineRunner runner;
+ /**
+ * Builds an instance of this class from the overridden transform.
+ */
+ @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+ public BatchViewAsMap(DataflowPipelineRunner runner, View.AsMap<K, V> transform) {
+ this.runner = runner;
+ }
+
+ @Override
+ public PCollectionView<Map<K, V>> apply(PCollection<KV<K, V>> input) {
+ return this.<BoundedWindow>applyInternal(input);
+ }
+
+ private <W extends BoundedWindow> PCollectionView<Map<K, V>>
+ applyInternal(PCollection<KV<K, V>> input) {
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+ try {
+ PCollectionView<Map<K, V>> view = PCollectionViews.mapView(
+ input.getPipeline(), input.getWindowingStrategy(), inputCoder);
+ return BatchViewAsMultimap.applyForMapLike(runner, input, view, true /* unique keys */);
+ } catch (NonDeterministicException e) {
+ runner.recordViewUsesNonDeterministicKeyCoder(this);
+
+ // Since the key coder is not deterministic, we convert the map into a singleton
+ // and return a singleton view equivalent.
+ return applyForSingletonFallback(input);
+ }
+ }
+
+ @Override
+ protected String getKindString() {
+ return "BatchViewAsMap";
+ }
+
+ /** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */
+ private <W extends BoundedWindow> PCollectionView<Map<K, V>>
+ applyForSingletonFallback(PCollection<KV<K, V>> input) {
+ @SuppressWarnings("unchecked")
+ Coder<W> windowCoder = (Coder<W>)
+ input.getWindowingStrategy().getWindowFn().windowCoder();
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ Coder<Function<WindowedValue<V>, V>> transformCoder =
+ (Coder) SerializableCoder.of(WindowedValueToValue.class);
+
+ Coder<TransformedMap<K, WindowedValue<V>, V>> finalValueCoder =
+ TransformedMapCoder.of(
+ transformCoder,
+ MapCoder.of(
+ inputCoder.getKeyCoder(),
+ FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder)));
+
+ TransformedMap<K, WindowedValue<V>, V> defaultValue = new TransformedMap<>(
+ WindowedValueToValue.<V>of(),
+ ImmutableMap.<K, WindowedValue<V>>of());
+
+ return BatchViewAsSingleton.<KV<K, V>,
+ TransformedMap<K, WindowedValue<V>, V>,
+ Map<K, V>,
+ W> applyForSingleton(
+ runner,
+ input,
+ new ToMapDoFn<K, V, W>(windowCoder),
+ true,
+ defaultValue,
+ finalValueCoder);
+ }
+ }
+
+ /**
+ * Specialized implementation for
+ * {@link com.google.cloud.dataflow.sdk.transforms.View.AsMultimap View.AsMultimap} for the
+ * Dataflow runner in batch mode.
+ *
+ * <p>Creates a set of {@code Ism} files sharded by the hash of the key's byte
+ * representation. Each record is structured as follows:
+ * <ul>
+ * <li>Key 1: User key K</li>
+ * <li>Key 2: Window</li>
+ * <li>Key 3: Index offset for a given key and window.</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ *
+ * <p>Alongside the data records, there are the following metadata records:
+ * <ul>
+ * <li>Key 1: Metadata Key</li>
+ * <li>Key 2: Window</li>
+ * <li>Key 3: Index [0, size of map]</li>
+ * <li>Value: variable length long byte representation of size of map if index is 0,
+ * otherwise the byte representation of a key</li>
+ * </ul>
+ * The {@code [META, Window, 0]} record stores the number of unique keys per window, while
+ * {@code [META, Window, i]} for {@code i} in {@code [1, size of map]} stores a the users key.
+ * This allows for one to access the size of the map by looking at {@code [META, Window, 0]}
+ * and iterate over all the keys by accessing {@code [META, Window, i]} for {@code i} in
+ * {@code [1, size of map]}.
+ *
+ * <p>Note that in the case of a non-deterministic key coder, we fallback to using
+ * {@link com.google.cloud.dataflow.sdk.transforms.View.AsSingleton View.AsSingleton} printing
+ * a warning to users to specify a deterministic key coder.
+ */
+ static class BatchViewAsMultimap<K, V>
+ extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
+ /**
+ * A {@link PTransform} that groups elements by the hash of window's byte representation
+ * if the input {@link PCollection} is not within the global window. Otherwise by the hash
+ * of the window and key's byte representation. This {@link PTransform} also sorts
+ * the values by the combination of the window and key's byte representations.
+ */
+ private static class GroupByKeyHashAndSortByKeyAndWindow<K, V, W extends BoundedWindow>
+ extends PTransform<PCollection<KV<K, V>>,
+ PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>> {
+
+ @SystemDoFnInternal
+ private static class GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>
+ extends DoFn<KV<K, V>, KV<Integer, KV<KV<K, W>, WindowedValue<V>>>>
+ implements DoFn.RequiresWindowAccess {
+
+ private final IsmRecordCoder<?> coder;
+ private GroupByKeyHashAndSortByKeyAndWindowDoFn(IsmRecordCoder<?> coder) {
+ this.coder = coder;
+ }
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ @SuppressWarnings("unchecked")
+ W window = (W) c.window();
+
+ c.output(
+ KV.of(coder.hash(ImmutableList.of(c.element().getKey())),
+ KV.of(KV.of(c.element().getKey(), window),
+ WindowedValue.of(
+ c.element().getValue(),
+ c.timestamp(),
+ (BoundedWindow) window,
+ c.pane()))));
+ }
+ }
+
+ private final IsmRecordCoder<?> coder;
+ public GroupByKeyHashAndSortByKeyAndWindow(IsmRecordCoder<?> coder) {
+ this.coder = coder;
+ }
+
+ @Override
+ public PCollection<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>>
+ apply(PCollection<KV<K, V>> input) {
+
+ @SuppressWarnings("unchecked")
+ Coder<W> windowCoder = (Coder<W>)
+ input.getWindowingStrategy().getWindowFn().windowCoder();
+ @SuppressWarnings("unchecked")
+ KvCoder<K, V> inputCoder = (KvCoder<K, V>) input.getCoder();
+
+ PCollection<KV<Integer, KV<KV<K, W>, WindowedValue<V>>>> keyedByHash;
+ keyedByHash = input.apply(
+ ParDo.of(new GroupByKeyHashAndSortByKeyAndWindowDoFn<K, V, W>(coder)));
+ keyedByHash.setCoder(
+ KvCoder.of(
+ VarIntCoder.of(),
+ KvCoder.of(KvCoder.of(inputCoder.getKeyCoder(), windowCoder),
+ FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));
+
+ return keyedByHash.apply(
+ new GroupByKeyAndSortValuesOnly<Integer, KV<K, W>, WindowedValue<V>>());
+ }
+ }
+
+ /**
+ * A {@link DoFn} which creates {@link IsmRecord}s comparing successive elements windows
+ * and keys to locate window and key boundaries. The main output {@link IsmRecord}s have:
+ * <ul>
+ * <li>Key 1: Window</li>
+ * <li>Key 2: User key K</li>
+ * <li>Key 3: Index offset for a given key and window.</li>
+ * <li>Value: Windowed value</li>
+ * </ul>
+ *
+ * <p>Additionally, we output all the unique keys per window seen to {@code outputForEntrySet}
+ * and the unique key count per window to {@code outputForSize}.
+ *
+ * <p>Finally, if this DoFn has been requested to perform unique key checking, it will
+ * throw an {@link IllegalStateException} if more than one key per window is found.
+ */
+ static class ToIsmRecordForMapLikeDoFn<K, V, W extends BoundedWindow>
+ extends DoFn<KV<Integer, Iterable<KV<KV<K, W>, WindowedValue<V>>>>,
+ IsmRecord<WindowedValue<V>>> {
+
+ private final TupleTag<KV<Integer, KV<W, Long>>> outputForSize;
+ private final TupleTag<KV<Integer, KV<W, K>>> outputForEntrySet;
+ private final Coder<W> windowCoder;
+ private final Coder<K> keyCoder;
+ private final IsmRecordCoder<WindowedValue<V>> ismCoder;
+ private final boolean uniqueKeysExpected;
+ ToIsmRecordForMapLikeDoFn(
+ TupleTag<KV<Integer, KV<W, Long>>> outputForSize,
+ TupleTag<KV<Integer, KV<W, K>>> outputForEntrySet,
+ Coder<W> windowCoder,
+ Coder<K> keyCoder,
+ IsmRecordCoder<WindowedValue<V>> ismCoder,
+ boolean uniqueKeysExpected) {
+ this.outputForSize = outputForSize;
+ this.outputForEntrySet = outputForEntrySet;
+ this.windowCoder = windowCoder;
+ this.keyCoder = keyCoder;
+ this.ismCoder = ismCoder;
+ this.uniqueKeysExpected = uniqueKeysExpected;
+ }
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ long currentKeyIndex = 0;
+ // We use one based indexing while counting
+ long currentUniqueKeyCounter = 1;
+ Iterator<KV<KV<K, W>, WindowedValue<V>>> iterator = c.element().getValue().iterator();
+
+ KV<KV<K, W>, WindowedValue<V>> currentValue = iterator.next();
+ Object currentKeyStructuralValue =
+ keyCoder.structuralValue(currentValue.getKey().getKey());
+ Object currentWindowStructuralValue =
+ windowCoder.structuralValue(currentValue.getKey().getValue());
+
+ while (iterator.hasNext()) {
+ KV<KV<K, W>, WindowedValue<V>> nextValue = iterator.next();
+ Object nextKeyStructuralValue =
+ keyCoder.structuralValue(nextValue.getKey().getKey());
+ Object nextWindowStructuralValue =
+ windowCoder.structuralValue(nextValue.getKey().getValue());
+
+ outputDataRecord(c, currentValue, currentKeyIndex);
+
+ final long nextKeyIndex;
+ final long nextUniqueKeyCounter;
+
+ // Check to see if its a new window
+ if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
+ // The next value is a new window, so we output for size the number of unique keys
+ // seen and the last key of the window. We also reset the next key index the unique
+ // key counter.
+ outputMetadataRecordForSize(c, currentValue, currentUniqueKeyCounter);
+ outputMetadataRecordForEntrySet(c, currentValue);
+
+ nextKeyIndex = 0;
+ nextUniqueKeyCounter = 1;
+ } else if (!currentKeyStructuralValue.equals(nextKeyStructuralValue)){
+ // It is a new key within the same window so output the key for the entry set,
+ // reset the key index and increase the count of unique keys seen within this window.
+ outputMetadataRecordForEntrySet(c, currentValue);
+
+ nextKeyIndex = 0;
+ nextUniqueKeyCounter = currentUniqueKeyCounter + 1;
+ } else if (!uniqueKeysExpected) {
+ // It is not a new key so we don't have to output the number of elements in this
+ // window or increase the unique key counter. All we do is increase the key index.
+
+ nextKeyIndex = currentKeyIndex + 1;
+ nextUniqueKeyCounter = currentUniqueKeyCounter;
+ } else {
+ throw new IllegalStateException(String.format(
+ "Unique keys are expected but found key %s with values %s and %s in window %s.",
+ currentValue.getKey().getKey(),
+ currentValue.getValue().getValue(),
+ nextValue.getValue().getValue(),
+ currentValue.getKey().getValue()));
+ }
+
+ currentValue = nextValue;
+ currentWindowStructuralValue = nextWindowStructuralValue;
+ currentKeyStructuralValue = nextKeyStructuralValue;
+ currentKeyIndex = nextKeyIndex;
+ currentUniqueKeyCounter = nextUniqueKeyCounter;
+ }
+
+ outputDataRecord(c, currentValue, currentKeyIndex);
+ outputMetadataRecordForSize(c, currentValue, currentUniqueKeyCounter);
+ // The last value for this hash is guaranteed to be at a window boundary
+ // so we output a record with the number of unique keys seen.
+ outputMetadataRecordForEntrySet(c, currentValue);
+ }
+
+ /** This outputs the data record. */
+ private void outputDataRecord(
+ ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value, long keyIndex) {
+ IsmRecord<WindowedValue<V>> ismRecord = IsmRecord.of(
+ ImmutableList.of(
+ value.getKey().getKey(),
+ value.getKey().getValue(),
+ keyIndex),
+ value.getValue());
+ c.output(ismRecord);
+ }
+
+ /**
+ * This outputs records which will be used to compute the number of keys for a given window.
+ */
+ private void outputMetadataRecordForSize(
+ ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value, long uniqueKeyCount) {
+ c.sideOutput(outputForSize,
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(),
+ value.getKey().getValue())),
+ KV.of(value.getKey().getValue(), uniqueKeyCount)));
+ }
+
+ /** This outputs records which will be used to construct the entry set. */
+ private void outputMetadataRecordForEntrySet(
+ ProcessContext c, KV<KV<K, W>, WindowedValue<V>> value) {
+ c.sideOutput(outputForEntrySet,
+ KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(),
+ value.getKey().getValue())),
+ KV.of(value.getKey().getValue(), value.getKey().getKey())));
+ }
+ }
+
+ /**
+ * A {@link DoFn} which outputs a metadata {@link IsmRecord} per window of:
+ * <ul>
+ * <li>Key 1: META key</li>
+ * <li>Key 2: window</li>
+ * <li>Key 3: 0L (constant)</li>
+ * <li>Value: sum of values for window</li>
+ * </ul>
+ *
+ * <p>This {@link DoFn} is meant to be used to compute the number of unique keys
+ * per window for map and multimap side inputs.
+ */
+ static class ToIsmMetadataRecordForSizeDoFn<K, V, W extends BoundedWindow>
+ extends DoFn<KV<Integer, Iterable<KV<W, Long>>>, IsmRecord<WindowedValue<V>>> {
+ private final Coder<W> windowCoder;
+ ToIsmMetadataRecordForSizeDoFn(Coder<W> windowCoder) {
+ this.windowCoder = windowCoder;
+ }
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ Iterator<KV<W, Long>> iterator = c.element().getValue().iterator();
+ KV<W, Long> currentValue = iterator.next();
+ Object currentWindowStructuralValue = windowCoder.structuralValue(currentValue.getKey());
+ long size = 0;
+ while (iterator.hasNext()) {
+ KV<W, Long> nextValue = iterator.next();
+ Object nextWindowStructuralValue = windowCoder.structuralValue(nextValue.getKey());
+
+ size += currentValue.getValue();
+ if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
+ c.output(IsmRecord.<WindowedValue<V>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), 0L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), size)));
+ size = 0;
+ }
+
+ currentValue = nextValue;
+ currentWindowStructuralValue = nextWindowStructuralValue;
+ }
+
+ size += currentValue.getValue();
+ // Output the final value since it is guaranteed to be on a window boundary.
+ c.output(IsmRecord.<WindowedValue<V>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), 0L),
+ CoderUtils.encodeToByteArray(VarLongCoder.of(), size)));
+ }
+ }
+
+ /**
+ * A {@link DoFn} which outputs a metadata {@link IsmRecord} per window and key pair of:
+ * <ul>
+ * <li>Key 1: META key</li>
+ * <li>Key 2: window</li>
+ * <li>Key 3: index offset (1-based index)</li>
+ * <li>Value: key</li>
+ * </ul>
+ *
+ * <p>This {@link DoFn} is meant to be used to output index to key records
+ * per window for map and multimap side inputs.
+ */
+ static class ToIsmMetadataRecordForKeyDoFn<K, V, W extends BoundedWindow>
+ extends DoFn<KV<Integer, Iterable<KV<W, K>>>, IsmRecord<WindowedValue<V>>> {
+
+ private final Coder<K> keyCoder;
+ private final Coder<W> windowCoder;
+ ToIsmMetadataRecordForKeyDoFn(Coder<K> keyCoder, Coder<W> windowCoder) {
+ this.keyCoder = keyCoder;
+ this.windowCoder = windowCoder;
+ }
+
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+ Iterator<KV<W, K>> iterator = c.element().getValue().iterator();
+ KV<W, K> currentValue = iterator.next();
+ Object currentWindowStructuralValue = windowCoder.structuralValue(currentValue.getKey());
+ long elementsInWindow = 1;
+ while (iterator.hasNext()) {
+ KV<W, K> nextValue = iterator.next();
+ Object nextWindowStructuralValue = windowCoder.structuralValue(nextValue.getKey());
+
+ c.output(IsmRecord.<WindowedValue<V>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), elementsInWindow),
+ CoderUtils.encodeToByteArray(keyCoder, currentValue.getValue())));
+ elementsInWindow += 1;
+
+ if (!currentWindowStructuralValue.equals(nextWindowStructuralValue)) {
+ elementsInWindow = 1;
+ }
+
+ currentValue = nextValue;
+ currentWindowStructuralValue = nextWindowStructuralValue;
+ }
+
+ // Output the final value since it is guaranteed to be on a window boundary.
+ c.output(IsmRecord.<WindowedValue<V>>meta(
+ ImmutableList.of(IsmFormat.getMetadataKey(), currentValue.getKey(), elementsInWindow),
+ CoderUtils.encodeToByteArray(keyCoder, currentValue.getValue())));
+ }
+ }
+
+ /**
+ * A {@link DoFn} which partitions sets of elements by window boundaries. Within each
+ * partition, the set of elements is transformed into a {@link TransformedMap}.
+ * The transformed {@code Map<K, Iterable<V>>} is backed by a
+ * {@code Map<K, Iterable<WindowedValue<V>>>} and contains a function
+ * {@code Iterable<WindowedValue<V>> -> Iterable<V>}.
+ *
+ * <p>Outputs {@link IsmRecord}s having:
+ * <ul>
+ * <li>Key 1: Window</li>
+ * <li>Value: Transformed map containing a transform that removes the encapsulation
+ * of the window around each value,
+ * {@code Map<K, Iterable<WindowedValue<V>>> -> Map<K, Iterable<V>>}.</li>
+ * </ul>
+ */
+ static class ToMultimapDoFn<K, V, W extends BoundedWindow>
+ extends DoFn<KV<Integer, Iterable<KV<W, WindowedValue<KV<K, V>>>>>,
+ IsmRecord<WindowedValue<TransformedMap<K,
+ Iterable<WindowedValue<V>>,
+ Iterable<V>>>>> {
+
+ private final Coder<W> windowCoder;
+ ToMultimapDoFn(Coder<W> windowCoder) {
+ this.windowCoder = windowCoder;
+ }
+
+ @Override
+ public void processElement(ProcessContext c)
+ throws Exception {
+ Optional<Object> previousWindowStructuralValue = Optional.absent();
+ Optional<W> previousWindow = Optional.absent();
+ Multimap<K, WindowedValue<V>> multimap = HashMultimap.create();
+ for (KV<W, WindowedValue<KV<K, V>>> kv : c.element().getValue()) {
+ Object currentWindowStructuralValue = windowCoder.structuralValue(kv.getKey());
+ if (previousWindowStructuralValue.isPresent()
+ && !previousWindowStructuralValue.get().equals(currentWindowStructuralValue)) {
+ // Construct the transformed map containing all the elements since we
+ // are at a window boundary.
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ Map<K, Iterable<WindowedValue<V>>> resultMap = (Map) multimap.asMap();
+ c.output(IsmRecord.<WindowedValue<TransformedMap<K,
+ Iterable<WindowedValue<V>>,
+ Iterable<V>>>>of(
+ ImmutableList.of(previousWindow.get()),
+ valueInEmptyWindows(
+ new TransformedMap<>(
+ IterableWithWindowedValuesToIterable.<V>of(), resultMap))));
+ multimap = HashMultimap.create();
+ }
+
+ multimap.put(kv.getValue().getValue().getKey(),
+ kv.getValue().withValue(kv.getValue().getValue().getValue()));
+ previousWindowStructuralValue = Optional.of(currentWindowStructuralValue);
+ previousWindow = Optional.of(kv.getKey());
+ }
+
+ // The last value for this hash is guaranteed to be at a window boundary
+ // so we output a transformed map containing all the elements since the last
+ // window boundary.
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ Map<K, Iterable<WindowedValue<V>>> resultMap = (Map) multimap.asMap();
+ c.output(IsmRecord.<WindowedValue<TransformedMap<K,
+ Iterable<WindowedValue<V>>,
+ Iterable<V>>>>of(
+ ImmutableList.of(previousWindow.get()),
+ valueInEmptyWindows(
+ new TransformedMap<>(IterableWithWindowedValuesToIterable.<V>of(), resultMap))));
+ }
+ }
+
+ private final DataflowPipelineRunner runner;
+ /**
+ * Builds an instance of this class from the overridden transform.
+ */
+ @SuppressWarnings("unused") // used via reflection in DataflowPipelineRunner#apply()
+ public BatchViewAsMultimap(DataflowPipelineRunner runner, View.AsMultimap<K, V> transform) {
+ this.runner = runner;
+ }
+
+ @Override
+ public PCollectionView<Map<K, Iterable<V>>> apply(PCollection<KV<K, V>> input) {
+ return this.<BoundedWindow>applyInternal(input);
+ }
+
+ private <W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>>
+ applyInternal(PCollection<KV<K, V>> input) {
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+ try {
+ PCollectionView<Map<K, Iterable<V>>> view = PCollectionViews.multimapView(
+ input.getPipeline(), input.getWindowingStrategy(), inputCoder);
+
+ return applyForMapLike(runner, input, view, false /* unique keys not expected */);
+ } catch (NonDeterministicException e) {
+ runner.recordViewUsesNonDeterministicKeyCoder(this);
+
+ // Since the key coder is not deterministic, we convert the map into a singleton
+ // and return a singleton view equivalent.
+ return applyForSingletonFallback(input);
+ }
+ }
+
+ /** Transforms the input {@link PCollection} into a singleton {@link Map} per window. */
+ private <W extends BoundedWindow> PCollectionView<Map<K, Iterable<V>>>
+ applyForSingletonFallback(PCollection<KV<K, V>> input) {
+ @SuppressWarnings("unchecked")
+ Coder<W> windowCoder = (Coder<W>)
+ input.getWindowingStrategy().getWindowFn().windowCoder();
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ Coder<Function<Iterable<WindowedValue<V>>, Iterable<V>>> transformCoder =
+ (Coder) SerializableCoder.of(IterableWithWindowedValuesToIterable.class);
+
+ Coder<TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>> finalValueCoder =
+ TransformedMapCoder.of(
+ transformCoder,
+ MapCoder.of(
+ inputCoder.getKeyCoder(),
+ IterableCoder.of(
+ FullWindowedValueCoder.of(inputCoder.getValueCoder(), windowCoder))));
+
+ TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>> defaultValue =
+ new TransformedMap<>(
+ IterableWithWindowedValuesToIterable.<V>of(),
+ ImmutableMap.<K, Iterable<WindowedValue<V>>>of());
+
+ return BatchViewAsSingleton.<KV<K, V>,
+ TransformedMap<K, Iterable<WindowedValue<V>>, Iterable<V>>,
+ Map<K, Iterable<V>>,
+ W> applyForSingleton(
+ runner,
+ input,
+ new ToMultimapDoFn<K, V, W>(windowCoder),
+ true,
+ defaultValue,
+ finalValueCoder);
+ }
+
+ private static <K, V, W extends BoundedWindow, ViewT> PCollectionView<ViewT> applyForMapLike(
+ DataflowPipelineRunner runner,
+ PCollection<KV<K, V>> input,
+ PCollectionView<ViewT> view,
+ boolean uniqueKeysExpected) throws NonDeterministicException {
+
+ @SuppressWarnings("unchecked")
+ Coder<W> windowCoder = (Coder<W>)
+ input.getWindowingStrategy().getWindowFn().windowCoder();
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+
+ // If our key coder is deterministic, we can use the key portion of each KV
+ // part of a composite key containing the window , key and index.
+ inputCoder.getKeyCoder().verifyDeterministic();
+
+ IsmRecordCoder<WindowedValue<V>> ismCoder =
+ coderForMapLike(windowCoder, inputCoder.getKeyCoder(), inputCoder.getValueCoder());
+
+ // Create the various output tags representing the main output containing the data stream
+ // and the side outputs containing the metadata about the size and entry set.
+ TupleTag<IsmRecord<WindowedValue<V>>> mainOutputTag = new TupleTag<>();
+ TupleTag<KV<Integer, KV<W, Long>>> outputForSizeTag = new TupleTag<>();
+ TupleTag<KV<Integer, KV<W, K>>> outputForEntrySetTag = new TupleTag<>();
+
+ // Process all the elements grouped by key hash, and sorted by key and then window
+ // outputting to all the outputs defined above.
+ PCollectionTuple outputTuple = input
+ .apply("GBKaSVForData", new GroupByKeyHashAndSortByKeyAndWindow<K, V, W>(ismCoder))
+ .apply(ParDo.of(new ToIsmRecordForMapLikeDoFn<K, V, W>(
+ outputForSizeTag, outputForEntrySetTag,
+ windowCoder, inputCoder.getKeyCoder(), ismCoder, uniqueKeysExpected))
+ .withOutputTags(mainOutputTag,
+ TupleTagList.of(
+ ImmutableList.<TupleTag<?>>of(outputForSizeTag,
+ outputForEntrySetTag))));
+
+ // Set the coder on the main data output.
+ PCollection<IsmRecord<WindowedValue<V>>> perHashWithReifiedWindows =
+ outputTuple.get(mainOutputTag);
+ perHashWithReifiedWindows.setCoder(ismCoder);
+
+ // Set the coder on the metadata output for size and process the entries
+ // producing a [META, Window, 0L] record per window storing the number of unique keys
+ // for each window.
+ PCollection<KV<Integer, KV<W, Long>>> outputForSize = outputTuple.get(outputForSizeTag);
+ outputForSize.setCoder(
+ KvCoder.of(VarIntCoder.of(),
+ KvCoder.of(windowCoder, VarLongCoder.of())));
+ PCollection<IsmRecord<WindowedValue<V>>> windowMapSizeMetadata = outputForSize
+ .apply("GBKaSVForSize", new GroupByKeyAndSortValuesOnly<Integer, W, Long>())
+ .apply(ParDo.of(new ToIsmMetadataRecordForSizeDoFn<K, V, W>(windowCoder)));
+ windowMapSizeMetadata.setCoder(ismCoder);
+
+ // Set the coder on the metadata output destined to build the entry set and process the
+ // entries producing a [META, Window, Index] record per window key pair storing the key.
+ PCollection<KV<Integer, KV<W, K>>> outputForEntrySet =
+ outputTuple.get(outputForEntrySetTag);
+ outputForEntrySet.setCoder(
+ KvCoder.of(VarIntCoder.of(),
+ KvCoder.of(windowCoder, inputCoder.getKeyCoder())));
+ PCollection<IsmRecord<WindowedValue<V>>> windowMapKeysMetadata = outputForEntrySet
+ .apply("GBKaSVForKeys", new GroupByKeyAndSortValuesOnly<Integer, W, K>())
+ .apply(ParDo.of(
+ new ToIsmMetadataRecordForKeyDoFn<K, V, W>(inputCoder.getKeyCoder(), windowCoder)));
+ windowMapKeysMetadata.setCoder(ismCoder);
+
+ // Set that all these outputs should be materialized using an indexed format.
+ runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows);
+ runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata);
+ runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);
+
+ PCollectionList<IsmRecord<WindowedValue<V>>> outputs =
+ PCollectionList.of(ImmutableList.of(
+ perHashWithReifiedWindows, windowMapSizeMetadata, windowMapKeysMetadata));
+
+ return Pipeline.applyTransform(outputs,
+ Flatten.<IsmRecord<WindowedValue<V>>>pCollections())
+ .apply(CreatePCollectionView.<IsmRecord<WindowedValue<V>>,
+ ViewT>of(view));
+ }
+
+ @Override
+ protected String getKindString() {
+ return "BatchViewAsMultimap";
+ }
+
+ static <V> IsmRecordCoder<WindowedValue<V>> coderForMapLike(
+ Coder<? extends BoundedWindow> windowCoder, Coder<?> keyCoder, Coder<V> valueCoder) {
+ // TODO: swap to use a variable length long coder which has values which compare
+ // the same as their byte representation compare lexicographically within the key coder
+ return IsmRecordCoder.of(
+ 1, // We use only the key for hashing when producing value records
+ 2, // Since the key is not present, we add the window to the hash when
+ // producing metadata records
+ ImmutableList.of(
+ MetadataKeyCoder.of(keyCoder),
+ windowCoder,
+ BigEndianLongCoder.of()),
+ FullWindowedValueCoder.of(valueCoder, windowCoder));
+ }
+ }
+
+ /**
+ * A {@code Map<K, V2>} backed by a {@code Map<K, V1>} and a function that transforms
+ * {@code V1 -> V2}.
+ */
+ static class TransformedMap<K, V1, V2>
+ extends ForwardingMap<K, V2> {
+ private final Function<V1, V2> transform;
+ private final Map<K, V1> originalMap;
+ private final Map<K, V2> transformedMap;
+
+ private TransformedMap(Function<V1, V2> transform, Map<K, V1> originalMap) {
+ this.transform = transform;
+ this.originalMap = Collections.unmodifiableMap(originalMap);
+ this.transformedMap =
<TRUNCATED>
[7/7] incubator-beam git commit: This closes #44
Posted by ke...@apache.org.
This closes #44
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/c1de175b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/c1de175b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/c1de175b
Branch: refs/heads/master
Commit: c1de175bdfaeb410295de8eb1e2bae0f111bf542
Parents: 1c21aa2 c451568
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Mar 24 13:42:29 2016 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Thu Mar 24 13:42:29 2016 -0700
----------------------------------------------------------------------
.../FlinkGroupAlsoByWindowWrapper.java | 11 ++++++++-
.../sdk/options/DataflowPipelineOptions.java | 26 +++-----------------
.../dataflow/sdk/options/PipelineOptions.java | 14 +++++++++++
.../sdk/runners/DataflowPipelineRunner.java | 4 +++
.../sdk/runners/DataflowPipelineRunnerTest.java | 6 ++---
5 files changed, 33 insertions(+), 28 deletions(-)
----------------------------------------------------------------------