You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2017/03/17 21:46:32 UTC

[01/50] [abbrv] beam git commit: This closes #2142: Deprecate Pipeline.getOptions

Repository: beam
Updated Branches:
  refs/heads/gearpump-runner 15a8ad62a -> 555842a1a


This closes #2142: Deprecate Pipeline.getOptions


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ef256df1
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ef256df1
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ef256df1

Branch: refs/heads/gearpump-runner
Commit: ef256df18e91dcce936d051a7d52077dcaf02d73
Parents: 013365f 4f3c5bd
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Mar 9 11:57:21 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Thu Mar 9 11:57:21 2017 -0800

----------------------------------------------------------------------
 sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------



[04/50] [abbrv] beam git commit: This closes #1994

Posted by ke...@apache.org.
This closes #1994


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b79dd642
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b79dd642
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b79dd642

Branch: refs/heads/gearpump-runner
Commit: b79dd6428f328e0615deef5ab9880a69903b79ca
Parents: ef256df d01620c
Author: Davor Bonaci <da...@google.com>
Authored: Thu Mar 9 15:21:02 2017 -0800
Committer: Davor Bonaci <da...@google.com>
Committed: Thu Mar 9 15:21:02 2017 -0800

----------------------------------------------------------------------
 sdks/java/io/hadoop-input-format/README.md      | 167 ++++
 sdks/java/io/hadoop-input-format/pom.xml        | 136 +++
 .../hadoop/inputformat/HadoopInputFormatIO.java | 941 +++++++++++++++++++
 .../sdk/io/hadoop/inputformat/package-info.java |  23 +
 .../ConfigurableEmployeeInputFormat.java        | 131 +++
 .../sdk/io/hadoop/inputformat/Employee.java     |  85 ++
 .../hadoop/inputformat/EmployeeInputFormat.java | 172 ++++
 .../inputformat/HadoopInputFormatIOTest.java    | 844 +++++++++++++++++
 .../ReuseObjectsEmployeeInputFormat.java        | 176 ++++
 .../hadoop/inputformat/TestEmployeeDataSet.java |  76 ++
 10 files changed, 2751 insertions(+)
----------------------------------------------------------------------



[27/50] [abbrv] beam git commit: This closes #2217: Fix Flink State GC

Posted by ke...@apache.org.
This closes #2217: Fix Flink State GC

  Properly deal with late processing-time timers
  Introduce Flink-specific state GC implementations
  Move GC timer checking to StatefulDoFnRunner.CleanupTimer


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/75fe559a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/75fe559a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/75fe559a

Branch: refs/heads/gearpump-runner
Commit: 75fe559a4115661c9a138099e58a73c6f42a038f
Parents: 54390a3 dbfcf4b
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri Mar 10 13:01:00 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Fri Mar 10 13:01:00 2017 -0800

----------------------------------------------------------------------
 .../apache/beam/runners/core/DoFnRunners.java   |  15 +-
 .../beam/runners/core/StatefulDoFnRunner.java   | 128 +++--------
 .../runners/core/StatefulDoFnRunnerTest.java    | 110 ++++++++-
 .../wrappers/streaming/DoFnOperator.java        | 111 ++++++++-
 .../flink/streaming/DoFnOperatorTest.java       | 225 +++++++++++++++++++
 5 files changed, 472 insertions(+), 117 deletions(-)
----------------------------------------------------------------------



[30/50] [abbrv] beam git commit: Added a test of default PAssert failure reason

Posted by ke...@apache.org.
Added a test of default PAssert failure reason


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2485a4cc
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2485a4cc
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2485a4cc

Branch: refs/heads/gearpump-runner
Commit: 2485a4ccce18784abf520792ea36b7d3d6efc394
Parents: 0d08d2a
Author: Aviem Zur <av...@gmail.com>
Authored: Wed Mar 1 08:10:03 2017 +0200
Committer: Aviem Zur <av...@gmail.com>
Committed: Fri Mar 10 23:13:38 2017 +0200

----------------------------------------------------------------------
 .../apache/beam/sdk/testing/PAssertTest.java    | 21 ++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/2485a4cc/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
index f50adf4..9bdb1b5 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
@@ -393,8 +393,25 @@ public class PAssertTest implements Serializable {
 
     Throwable thrown = runExpectingAssertionFailure(pipeline);
 
-    assertThat(thrown.getMessage(), containsString("Vals should have been empty"));
-    assertThat(thrown.getMessage(), containsString("Expected: iterable over [] in any order"));
+    String message = thrown.getMessage();
+
+    assertThat(message, containsString("Vals should have been empty"));
+    assertThat(message, containsString("Expected: iterable over [] in any order"));
+  }
+
+  @Test
+  @Category(RunnableOnService.class)
+  public void testEmptyFalseDefaultReasonString() throws Exception {
+    PCollection<Long> vals = pipeline.apply(CountingInput.upTo(5L));
+    PAssert.that(vals).empty();
+
+    Throwable thrown = runExpectingAssertionFailure(pipeline);
+
+    String message = thrown.getMessage();
+
+    assertThat(message,
+        containsString("CountingInput.BoundedCountingInput/Read(BoundedCountingSource).out"));
+    assertThat(message, containsString("Expected: iterable over [] in any order"));
   }
 
   private static Throwable runExpectingAssertionFailure(Pipeline pipeline) {


[08/50] [abbrv] beam git commit: Auto-generated runner api proto bindings.

Posted by ke...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/3bb125e1/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py b/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py
new file mode 100644
index 0000000..66c331b
--- /dev/null
+++ b/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py
@@ -0,0 +1,2755 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: beam_runner_api.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf.internal import enum_type_wrapper
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf import descriptor_pb2
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='beam_runner_api.proto',
+  package='org.apache.beam.runner_api.v1',
+  syntax='proto3',
+  serialized_pb=_b('\n\x15\x62\x65\x61m_runner_api.proto\x12\x1dorg.apache.beam.runner_api.v1\x1a\x19google/protobuf/any.proto\"\x8d\x07\n\nComponents\x12M\n\ntransforms\x18\x01 \x03(\x0b\x32\x39.org.apache.beam.runner_api.v1.Components.TransformsEntry\x12Q\n\x0cpcollections\x18\x02 \x03(\x0b\x32;.org.apache.beam.runner_api.v1.Components.PcollectionsEntry\x12`\n\x14windowing_strategies\x18\x03 \x03(\x0b\x32\x42.org.apache.beam.runner_api.v1.Components.WindowingStrategiesEntry\x12\x45\n\x06\x63oders\x18\x04 \x03(\x0b\x32\x35.org.apache.beam.runner_api.v1.Components.CodersEntry\x12Q\n\x0c\x65nvironments\x18\x05 \x03(\x0b\x32;.org.apache.beam.runner_api.v1.Components.EnvironmentsEntry\x1a\\\n\x0fTransformsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32).org.apache.beam.runner_api.v1.PTransform:\x02\x38\x01\x1a_\n\x11PcollectionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x39\n\x05value\x18\x02 \x01(\x0b\x32*.org.apache.beam.runner_api.v1.PCollection:\x02\
 x38\x01\x1al\n\x18WindowingStrategiesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12?\n\x05value\x18\x02 \x01(\x0b\x32\x30.org.apache.beam.runner_api.v1.WindowingStrategy:\x02\x38\x01\x1aS\n\x0b\x43odersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.org.apache.beam.runner_api.v1.Coder:\x02\x38\x01\x1a_\n\x11\x45nvironmentsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x39\n\x05value\x18\x02 \x01(\x0b\x32*.org.apache.beam.runner_api.v1.Environment:\x02\x38\x01\"\xe4\x06\n\x15MessageWithComponents\x12=\n\ncomponents\x18\x01 \x01(\x0b\x32).org.apache.beam.runner_api.v1.Components\x12\x35\n\x05\x63oder\x18\x02 \x01(\x0b\x32$.org.apache.beam.runner_api.v1.CoderH\x00\x12H\n\x0f\x63ombine_payload\x18\x03 \x01(\x0b\x32-.org.apache.beam.runner_api.v1.CombinePayloadH\x00\x12\x44\n\rfunction_spec\x18\x04 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpecH\x00\x12\x45\n\x0epar_do_payload\x18\x06 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.ParDoPayloadH\x00\x12?\
 n\nptransform\x18\x07 \x01(\x0b\x32).org.apache.beam.runner_api.v1.PTransformH\x00\x12\x41\n\x0bpcollection\x18\x08 \x01(\x0b\x32*.org.apache.beam.runner_api.v1.PCollectionH\x00\x12\x42\n\x0cread_payload\x18\t \x01(\x0b\x32*.org.apache.beam.runner_api.v1.ReadPayloadH\x00\x12>\n\nside_input\x18\x0b \x01(\x0b\x32(.org.apache.beam.runner_api.v1.SideInputH\x00\x12O\n\x13window_into_payload\x18\x0c \x01(\x0b\x32\x30.org.apache.beam.runner_api.v1.WindowIntoPayloadH\x00\x12N\n\x12windowing_strategy\x18\r \x01(\x0b\x32\x30.org.apache.beam.runner_api.v1.WindowingStrategyH\x00\x12M\n\x12urn_with_parameter\x18\x0e \x01(\x0b\x32/.org.apache.beam.runner_api.v1.UrnWithParameterH\x00\x42\x06\n\x04root\"\xa6\x01\n\x08Pipeline\x12=\n\ncomponents\x18\x01 \x01(\x0b\x32).org.apache.beam.runner_api.v1.Components\x12\x19\n\x11root_transform_id\x18\x02 \x01(\t\x12@\n\x0c\x64isplay_data\x18\x03 \x01(\x0b\x32*.org.apache.beam.runner_api.v1.DisplayData\"\xa8\x03\n\nPTransform\x12\x13\n\x0bunique_name\x18\x05
  \x01(\t\x12=\n\x04spec\x18\x01 \x01(\x0b\x32/.org.apache.beam.runner_api.v1.UrnWithParameter\x12\x15\n\rsubtransforms\x18\x02 \x03(\t\x12\x45\n\x06inputs\x18\x03 \x03(\x0b\x32\x35.org.apache.beam.runner_api.v1.PTransform.InputsEntry\x12G\n\x07outputs\x18\x04 \x03(\x0b\x32\x36.org.apache.beam.runner_api.v1.PTransform.OutputsEntry\x12@\n\x0c\x64isplay_data\x18\x06 \x01(\x0b\x32*.org.apache.beam.runner_api.v1.DisplayData\x1a-\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a.\n\x0cOutputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xd3\x01\n\x0bPCollection\x12\x13\n\x0bunique_name\x18\x01 \x01(\t\x12\x10\n\x08\x63oder_id\x18\x02 \x01(\t\x12<\n\nis_bounded\x18\x03 \x01(\x0e\x32(.org.apache.beam.runner_api.v1.IsBounded\x12\x1d\n\x15windowing_strategy_id\x18\x04 \x01(\t\x12@\n\x0c\x64isplay_data\x18\x05 \x01(\x0b\x32*.org.apache.beam.runner_api.v1.DisplayData\"\xb5\x03\n\x0cParDoPayload\x12:\n\x05
 \x64o_fn\x18\x01 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\x12<\n\nparameters\x18\x02 \x03(\x0b\x32(.org.apache.beam.runner_api.v1.Parameter\x12P\n\x0bside_inputs\x18\x03 \x03(\x0b\x32;.org.apache.beam.runner_api.v1.ParDoPayload.SideInputsEntry\x12=\n\x0bstate_specs\x18\x04 \x03(\x0b\x32(.org.apache.beam.runner_api.v1.StateSpec\x12=\n\x0btimer_specs\x18\x05 \x03(\x0b\x32(.org.apache.beam.runner_api.v1.TimerSpec\x1a[\n\x0fSideInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32(.org.apache.beam.runner_api.v1.SideInput:\x02\x38\x01\"\x8b\x01\n\tParameter\x12;\n\x04type\x18\x01 \x01(\x0e\x32-.org.apache.beam.runner_api.v1.Parameter.Type\"A\n\x04Type\x12\n\n\x06WINDOW\x10\x00\x12\x14\n\x10PIPELINE_OPTIONS\x10\x01\x12\x17\n\x13RESTRICTION_TRACKER\x10\x02\"\x0b\n\tStateSpec\"\x0b\n\tTimerSpec\"\x88\x01\n\x0bReadPayload\x12;\n\x06source\x18\x01 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\x12<\n\nis_bounded\x18\x02 \x01(\x0e\x32
 (.org.apache.beam.runner_api.v1.IsBounded\"S\n\x11WindowIntoPayload\x12>\n\twindow_fn\x18\x01 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\"\xde\x02\n\x0e\x43ombinePayload\x12?\n\ncombine_fn\x18\x01 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\x12\x1c\n\x14\x61\x63\x63umulator_coder_id\x18\x02 \x01(\t\x12<\n\nparameters\x18\x03 \x03(\x0b\x32(.org.apache.beam.runner_api.v1.Parameter\x12R\n\x0bside_inputs\x18\x04 \x03(\x0b\x32=.org.apache.beam.runner_api.v1.CombinePayload.SideInputsEntry\x1a[\n\x0fSideInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32(.org.apache.beam.runner_api.v1.SideInput:\x02\x38\x01\"_\n\x05\x43oder\x12\x39\n\x04spec\x18\x01 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\x12\x1b\n\x13\x63omponent_coder_ids\x18\x02 \x03(\t\"\xd7\x03\n\x11WindowingStrategy\x12>\n\twindow_fn\x18\x01 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\x12@\n\x0cmerge_status\x18\x02 \x01(\x0e\x32*.org.apache
 .beam.runner_api.v1.MergeStatus\x12\x17\n\x0fwindow_coder_id\x18\x03 \x01(\t\x12\x37\n\x07trigger\x18\x04 \x01(\x0b\x32&.org.apache.beam.runner_api.v1.Trigger\x12J\n\x11\x61\x63\x63umulation_mode\x18\x05 \x01(\x0e\x32/.org.apache.beam.runner_api.v1.AccumulationMode\x12>\n\x0boutput_time\x18\x06 \x01(\x0e\x32).org.apache.beam.runner_api.v1.OutputTime\x12H\n\x10\x63losing_behavior\x18\x07 \x01(\x0e\x32..org.apache.beam.runner_api.v1.ClosingBehavior\x12\x18\n\x10\x61llowed_lateness\x18\x08 \x01(\x03\"\xac\r\n\x07Trigger\x12\x44\n\tafter_all\x18\x01 \x01(\x0b\x32/.org.apache.beam.runner_api.v1.Trigger.AfterAllH\x00\x12\x44\n\tafter_any\x18\x02 \x01(\x0b\x32/.org.apache.beam.runner_api.v1.Trigger.AfterAnyH\x00\x12\x46\n\nafter_each\x18\x03 \x01(\x0b\x32\x30.org.apache.beam.runner_api.v1.Trigger.AfterEachH\x00\x12U\n\x12\x61\x66ter_end_of_widow\x18\x04 \x01(\x0b\x32\x37.org.apache.beam.runner_api.v1.Trigger.AfterEndOfWindowH\x00\x12[\n\x15\x61\x66ter_processing_time\x18\x05 \x01(\x0b\x32:
 .org.apache.beam.runner_api.v1.Trigger.AfterProcessingTimeH\x00\x12t\n\"after_synchronized_processing_time\x18\x06 \x01(\x0b\x32\x46.org.apache.beam.runner_api.v1.Trigger.AfterSynchronizedProcessingTimeH\x00\x12?\n\x06\x61lways\x18\x0c \x01(\x0b\x32-.org.apache.beam.runner_api.v1.Trigger.AlwaysH\x00\x12\x41\n\x07\x64\x65\x66\x61ult\x18\x07 \x01(\x0b\x32..org.apache.beam.runner_api.v1.Trigger.DefaultH\x00\x12L\n\relement_count\x18\x08 \x01(\x0b\x32\x33.org.apache.beam.runner_api.v1.Trigger.ElementCountH\x00\x12=\n\x05never\x18\t \x01(\x0b\x32,.org.apache.beam.runner_api.v1.Trigger.NeverH\x00\x12\x46\n\nor_finally\x18\n \x01(\x0b\x32\x30.org.apache.beam.runner_api.v1.Trigger.OrFinallyH\x00\x12?\n\x06repeat\x18\x0b \x01(\x0b\x32-.org.apache.beam.runner_api.v1.Trigger.RepeatH\x00\x1aG\n\x08\x41\x66terAll\x12;\n\x0bsubtriggers\x18\x01 \x03(\x0b\x32&.org.apache.beam.runner_api.v1.Trigger\x1aG\n\x08\x41\x66terAny\x12;\n\x0bsubtriggers\x18\x01 \x03(\x0b\x32&.org.apache.beam.runner_api.v1.Tr
 igger\x1aH\n\tAfterEach\x12;\n\x0bsubtriggers\x18\x01 \x03(\x0b\x32&.org.apache.beam.runner_api.v1.Trigger\x1a\x8f\x01\n\x10\x41\x66terEndOfWindow\x12=\n\rearly_firings\x18\x01 \x01(\x0b\x32&.org.apache.beam.runner_api.v1.Trigger\x12<\n\x0clate_firings\x18\x02 \x01(\x0b\x32&.org.apache.beam.runner_api.v1.Trigger\x1a\x66\n\x13\x41\x66terProcessingTime\x12O\n\x14timestamp_transforms\x18\x01 \x03(\x0b\x32\x31.org.apache.beam.runner_api.v1.TimestampTransform\x1a!\n\x1f\x41\x66terSynchronizedProcessingTime\x1a\t\n\x07\x44\x65\x66\x61ult\x1a%\n\x0c\x45lementCount\x12\x15\n\relement_count\x18\x01 \x01(\x05\x1a\x07\n\x05Never\x1a\x08\n\x06\x41lways\x1az\n\tOrFinally\x12\x34\n\x04main\x18\x01 \x01(\x0b\x32&.org.apache.beam.runner_api.v1.Trigger\x12\x37\n\x07\x66inally\x18\x02 \x01(\x0b\x32&.org.apache.beam.runner_api.v1.Trigger\x1a\x44\n\x06Repeat\x12:\n\nsubtrigger\x18\x01 \x01(\x0b\x32&.org.apache.beam.runner_api.v1.TriggerB\t\n\x07trigger\"\x8e\x02\n\x12TimestampTransform\x12H\n\x05\x64\x
 65lay\x18\x01 \x01(\x0b\x32\x37.org.apache.beam.runner_api.v1.TimestampTransform.DelayH\x00\x12M\n\x08\x61lign_to\x18\x02 \x01(\x0b\x32\x39.org.apache.beam.runner_api.v1.TimestampTransform.AlignToH\x00\x1a\x1d\n\x05\x44\x65lay\x12\x14\n\x0c\x64\x65lay_millis\x18\x01 \x01(\x03\x1a)\n\x07\x41lignTo\x12\x0e\n\x06period\x18\x03 \x01(\x03\x12\x0e\n\x06offset\x18\x04 \x01(\x03\x42\x15\n\x13timestamp_transform\"\xda\x01\n\tSideInput\x12G\n\x0e\x61\x63\x63\x65ss_pattern\x18\x01 \x01(\x0b\x32/.org.apache.beam.runner_api.v1.UrnWithParameter\x12<\n\x07view_fn\x18\x02 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\x12\x46\n\x11window_mapping_fn\x18\x03 \x01(\x0b\x32+.org.apache.beam.runner_api.v1.FunctionSpec\"\x1a\n\x0b\x45nvironment\x12\x0b\n\x03url\x18\x01 \x01(\t\"e\n\x0c\x46unctionSpec\x12=\n\x04spec\x18\x01 \x01(\x0b\x32/.org.apache.beam.runner_api.v1.UrnWithParameter\x12\x16\n\x0e\x65nvironment_id\x18\x02 \x01(\t\"H\n\x10UrnWithParameter\x12\x0b\n\x03urn\x18\x01 \x01(\t\x12\'\
 n\tparameter\x18\x02 \x01(\x0b\x32\x14.google.protobuf.Any\"\xf7\x03\n\x0b\x44isplayData\x12>\n\x05items\x18\x01 \x03(\x0b\x32/.org.apache.beam.runner_api.v1.DisplayData.Item\x1a\x46\n\nIdentifier\x12\x14\n\x0ctransform_id\x18\x01 \x01(\t\x12\x15\n\rtransform_urn\x18\x02 \x01(\t\x12\x0b\n\x03key\x18\x03 \x01(\t\x1a\xf9\x01\n\x04Item\x12\x41\n\x02id\x18\x01 \x01(\x0b\x32\x35.org.apache.beam.runner_api.v1.DisplayData.Identifier\x12=\n\x04type\x18\x02 \x01(\x0e\x32/.org.apache.beam.runner_api.v1.DisplayData.Type\x12#\n\x05value\x18\x03 \x01(\x0b\x32\x14.google.protobuf.Any\x12)\n\x0bshort_value\x18\x04 \x01(\x0b\x32\x14.google.protobuf.Any\x12\r\n\x05label\x18\x05 \x01(\t\x12\x10\n\x08link_url\x18\x06 \x01(\t\"d\n\x04Type\x12\n\n\x06STRING\x10\x00\x12\x0b\n\x07INTEGER\x10\x01\x12\t\n\x05\x46LOAT\x10\x02\x12\x0b\n\x07\x42OOLEAN\x10\x03\x12\r\n\tTIMESTAMP\x10\x04\x12\x0c\n\x08\x44URATION\x10\x05\x12\x0e\n\nJAVA_CLASS\x10\x06*\'\n\tIsBounded\x12\x0b\n\x07\x42OUNDED\x10\x00\x12\r\n\tUNBOUN
 DED\x10\x01*C\n\x0bMergeStatus\x12\x0f\n\x0bNON_MERGING\x10\x00\x12\x0f\n\x0bNEEDS_MERGE\x10\x01\x12\x12\n\x0e\x41LREADY_MERGED\x10\x02*4\n\x10\x41\x63\x63umulationMode\x12\x0e\n\nDISCARDING\x10\x00\x12\x10\n\x0c\x41\x43\x43UMULATING\x10\x01*8\n\x0f\x43losingBehavior\x12\x0f\n\x0b\x45MIT_ALWAYS\x10\x00\x12\x14\n\x10\x45MIT_IF_NONEMPTY\x10\x01*I\n\nOutputTime\x12\x11\n\rEND_OF_WINDOW\x10\x00\x12\x12\n\x0eLATEST_IN_PANE\x10\x01\x12\x14\n\x10\x45\x41RLIEST_IN_PANE\x10\x02*S\n\nTimeDomain\x12\x0e\n\nEVENT_TIME\x10\x00\x12\x13\n\x0fPROCESSING_TIME\x10\x01\x12 \n\x1cSYNCHRONIZED_PROCESSING_TIME\x10\x02\x42\x31\n$org.apache.beam.sdk.common.runner.v1B\tRunnerApib\x06proto3')
+  ,
+  dependencies=[google_dot_protobuf_dot_any__pb2.DESCRIPTOR,])
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+_ISBOUNDED = _descriptor.EnumDescriptor(
+  name='IsBounded',
+  full_name='org.apache.beam.runner_api.v1.IsBounded',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='BOUNDED', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='UNBOUNDED', index=1, number=1,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=7348,
+  serialized_end=7387,
+)
+_sym_db.RegisterEnumDescriptor(_ISBOUNDED)
+
+IsBounded = enum_type_wrapper.EnumTypeWrapper(_ISBOUNDED)
+_MERGESTATUS = _descriptor.EnumDescriptor(
+  name='MergeStatus',
+  full_name='org.apache.beam.runner_api.v1.MergeStatus',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='NON_MERGING', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='NEEDS_MERGE', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='ALREADY_MERGED', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=7389,
+  serialized_end=7456,
+)
+_sym_db.RegisterEnumDescriptor(_MERGESTATUS)
+
+MergeStatus = enum_type_wrapper.EnumTypeWrapper(_MERGESTATUS)
+_ACCUMULATIONMODE = _descriptor.EnumDescriptor(
+  name='AccumulationMode',
+  full_name='org.apache.beam.runner_api.v1.AccumulationMode',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='DISCARDING', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='ACCUMULATING', index=1, number=1,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=7458,
+  serialized_end=7510,
+)
+_sym_db.RegisterEnumDescriptor(_ACCUMULATIONMODE)
+
+AccumulationMode = enum_type_wrapper.EnumTypeWrapper(_ACCUMULATIONMODE)
+_CLOSINGBEHAVIOR = _descriptor.EnumDescriptor(
+  name='ClosingBehavior',
+  full_name='org.apache.beam.runner_api.v1.ClosingBehavior',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='EMIT_ALWAYS', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='EMIT_IF_NONEMPTY', index=1, number=1,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=7512,
+  serialized_end=7568,
+)
+_sym_db.RegisterEnumDescriptor(_CLOSINGBEHAVIOR)
+
+ClosingBehavior = enum_type_wrapper.EnumTypeWrapper(_CLOSINGBEHAVIOR)
+_OUTPUTTIME = _descriptor.EnumDescriptor(
+  name='OutputTime',
+  full_name='org.apache.beam.runner_api.v1.OutputTime',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='END_OF_WINDOW', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='LATEST_IN_PANE', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='EARLIEST_IN_PANE', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=7570,
+  serialized_end=7643,
+)
+_sym_db.RegisterEnumDescriptor(_OUTPUTTIME)
+
+OutputTime = enum_type_wrapper.EnumTypeWrapper(_OUTPUTTIME)
+_TIMEDOMAIN = _descriptor.EnumDescriptor(
+  name='TimeDomain',
+  full_name='org.apache.beam.runner_api.v1.TimeDomain',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='EVENT_TIME', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='PROCESSING_TIME', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='SYNCHRONIZED_PROCESSING_TIME', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=7645,
+  serialized_end=7728,
+)
+_sym_db.RegisterEnumDescriptor(_TIMEDOMAIN)
+
+TimeDomain = enum_type_wrapper.EnumTypeWrapper(_TIMEDOMAIN)
+BOUNDED = 0
+UNBOUNDED = 1
+NON_MERGING = 0
+NEEDS_MERGE = 1
+ALREADY_MERGED = 2
+DISCARDING = 0
+ACCUMULATING = 1
+EMIT_ALWAYS = 0
+EMIT_IF_NONEMPTY = 1
+END_OF_WINDOW = 0
+LATEST_IN_PANE = 1
+EARLIEST_IN_PANE = 2
+EVENT_TIME = 0
+PROCESSING_TIME = 1
+SYNCHRONIZED_PROCESSING_TIME = 2
+
+
+_PARAMETER_TYPE = _descriptor.EnumDescriptor(
+  name='Type',
+  full_name='org.apache.beam.runner_api.v1.Parameter.Type',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='WINDOW', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='PIPELINE_OPTIONS', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='RESTRICTION_TRACKER', index=2, number=2,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=3191,
+  serialized_end=3256,
+)
+_sym_db.RegisterEnumDescriptor(_PARAMETER_TYPE)
+
+_DISPLAYDATA_TYPE = _descriptor.EnumDescriptor(
+  name='Type',
+  full_name='org.apache.beam.runner_api.v1.DisplayData.Type',
+  filename=None,
+  file=DESCRIPTOR,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='STRING', index=0, number=0,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='INTEGER', index=1, number=1,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='FLOAT', index=2, number=2,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='BOOLEAN', index=3, number=3,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='TIMESTAMP', index=4, number=4,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='DURATION', index=5, number=5,
+      options=None,
+      type=None),
+    _descriptor.EnumValueDescriptor(
+      name='JAVA_CLASS', index=6, number=6,
+      options=None,
+      type=None),
+  ],
+  containing_type=None,
+  options=None,
+  serialized_start=7246,
+  serialized_end=7346,
+)
+_sym_db.RegisterEnumDescriptor(_DISPLAYDATA_TYPE)
+
+
+_COMPONENTS_TRANSFORMSENTRY = _descriptor.Descriptor(
+  name='TransformsEntry',
+  full_name='org.apache.beam.runner_api.v1.Components.TransformsEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.Components.TransformsEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.Components.TransformsEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=512,
+  serialized_end=604,
+)
+
+_COMPONENTS_PCOLLECTIONSENTRY = _descriptor.Descriptor(
+  name='PcollectionsEntry',
+  full_name='org.apache.beam.runner_api.v1.Components.PcollectionsEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.Components.PcollectionsEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.Components.PcollectionsEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=606,
+  serialized_end=701,
+)
+
+_COMPONENTS_WINDOWINGSTRATEGIESENTRY = _descriptor.Descriptor(
+  name='WindowingStrategiesEntry',
+  full_name='org.apache.beam.runner_api.v1.Components.WindowingStrategiesEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.Components.WindowingStrategiesEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.Components.WindowingStrategiesEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=703,
+  serialized_end=811,
+)
+
+_COMPONENTS_CODERSENTRY = _descriptor.Descriptor(
+  name='CodersEntry',
+  full_name='org.apache.beam.runner_api.v1.Components.CodersEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.Components.CodersEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.Components.CodersEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=813,
+  serialized_end=896,
+)
+
+_COMPONENTS_ENVIRONMENTSENTRY = _descriptor.Descriptor(
+  name='EnvironmentsEntry',
+  full_name='org.apache.beam.runner_api.v1.Components.EnvironmentsEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.Components.EnvironmentsEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.Components.EnvironmentsEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=898,
+  serialized_end=993,
+)
+
+_COMPONENTS = _descriptor.Descriptor(
+  name='Components',
+  full_name='org.apache.beam.runner_api.v1.Components',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='transforms', full_name='org.apache.beam.runner_api.v1.Components.transforms', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='pcollections', full_name='org.apache.beam.runner_api.v1.Components.pcollections', index=1,
+      number=2, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='windowing_strategies', full_name='org.apache.beam.runner_api.v1.Components.windowing_strategies', index=2,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='coders', full_name='org.apache.beam.runner_api.v1.Components.coders', index=3,
+      number=4, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='environments', full_name='org.apache.beam.runner_api.v1.Components.environments', index=4,
+      number=5, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_COMPONENTS_TRANSFORMSENTRY, _COMPONENTS_PCOLLECTIONSENTRY, _COMPONENTS_WINDOWINGSTRATEGIESENTRY, _COMPONENTS_CODERSENTRY, _COMPONENTS_ENVIRONMENTSENTRY, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=84,
+  serialized_end=993,
+)
+
+
+_MESSAGEWITHCOMPONENTS = _descriptor.Descriptor(
+  name='MessageWithComponents',
+  full_name='org.apache.beam.runner_api.v1.MessageWithComponents',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='components', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.components', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='coder', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.coder', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='combine_payload', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.combine_payload', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='function_spec', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.function_spec', index=3,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='par_do_payload', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.par_do_payload', index=4,
+      number=6, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='ptransform', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.ptransform', index=5,
+      number=7, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='pcollection', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.pcollection', index=6,
+      number=8, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='read_payload', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.read_payload', index=7,
+      number=9, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='side_input', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.side_input', index=8,
+      number=11, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='window_into_payload', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.window_into_payload', index=9,
+      number=12, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='windowing_strategy', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.windowing_strategy', index=10,
+      number=13, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='urn_with_parameter', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.urn_with_parameter', index=11,
+      number=14, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='root', full_name='org.apache.beam.runner_api.v1.MessageWithComponents.root',
+      index=0, containing_type=None, fields=[]),
+  ],
+  serialized_start=996,
+  serialized_end=1864,
+)
+
+
+_PIPELINE = _descriptor.Descriptor(
+  name='Pipeline',
+  full_name='org.apache.beam.runner_api.v1.Pipeline',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='components', full_name='org.apache.beam.runner_api.v1.Pipeline.components', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='root_transform_id', full_name='org.apache.beam.runner_api.v1.Pipeline.root_transform_id', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='display_data', full_name='org.apache.beam.runner_api.v1.Pipeline.display_data', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=1867,
+  serialized_end=2033,
+)
+
+
+_PTRANSFORM_INPUTSENTRY = _descriptor.Descriptor(
+  name='InputsEntry',
+  full_name='org.apache.beam.runner_api.v1.PTransform.InputsEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.PTransform.InputsEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.PTransform.InputsEntry.value', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=2367,
+  serialized_end=2412,
+)
+
+_PTRANSFORM_OUTPUTSENTRY = _descriptor.Descriptor(
+  name='OutputsEntry',
+  full_name='org.apache.beam.runner_api.v1.PTransform.OutputsEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.PTransform.OutputsEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.PTransform.OutputsEntry.value', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=2414,
+  serialized_end=2460,
+)
+
+_PTRANSFORM = _descriptor.Descriptor(
+  name='PTransform',
+  full_name='org.apache.beam.runner_api.v1.PTransform',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='unique_name', full_name='org.apache.beam.runner_api.v1.PTransform.unique_name', index=0,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='spec', full_name='org.apache.beam.runner_api.v1.PTransform.spec', index=1,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='subtransforms', full_name='org.apache.beam.runner_api.v1.PTransform.subtransforms', index=2,
+      number=2, type=9, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='inputs', full_name='org.apache.beam.runner_api.v1.PTransform.inputs', index=3,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='outputs', full_name='org.apache.beam.runner_api.v1.PTransform.outputs', index=4,
+      number=4, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='display_data', full_name='org.apache.beam.runner_api.v1.PTransform.display_data', index=5,
+      number=6, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_PTRANSFORM_INPUTSENTRY, _PTRANSFORM_OUTPUTSENTRY, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=2036,
+  serialized_end=2460,
+)
+
+
+_PCOLLECTION = _descriptor.Descriptor(
+  name='PCollection',
+  full_name='org.apache.beam.runner_api.v1.PCollection',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='unique_name', full_name='org.apache.beam.runner_api.v1.PCollection.unique_name', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='coder_id', full_name='org.apache.beam.runner_api.v1.PCollection.coder_id', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='is_bounded', full_name='org.apache.beam.runner_api.v1.PCollection.is_bounded', index=2,
+      number=3, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='windowing_strategy_id', full_name='org.apache.beam.runner_api.v1.PCollection.windowing_strategy_id', index=3,
+      number=4, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='display_data', full_name='org.apache.beam.runner_api.v1.PCollection.display_data', index=4,
+      number=5, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=2463,
+  serialized_end=2674,
+)
+
+
+_PARDOPAYLOAD_SIDEINPUTSENTRY = _descriptor.Descriptor(
+  name='SideInputsEntry',
+  full_name='org.apache.beam.runner_api.v1.ParDoPayload.SideInputsEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.ParDoPayload.SideInputsEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.ParDoPayload.SideInputsEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3023,
+  serialized_end=3114,
+)
+
+_PARDOPAYLOAD = _descriptor.Descriptor(
+  name='ParDoPayload',
+  full_name='org.apache.beam.runner_api.v1.ParDoPayload',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='do_fn', full_name='org.apache.beam.runner_api.v1.ParDoPayload.do_fn', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='parameters', full_name='org.apache.beam.runner_api.v1.ParDoPayload.parameters', index=1,
+      number=2, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='side_inputs', full_name='org.apache.beam.runner_api.v1.ParDoPayload.side_inputs', index=2,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='state_specs', full_name='org.apache.beam.runner_api.v1.ParDoPayload.state_specs', index=3,
+      number=4, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='timer_specs', full_name='org.apache.beam.runner_api.v1.ParDoPayload.timer_specs', index=4,
+      number=5, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_PARDOPAYLOAD_SIDEINPUTSENTRY, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=2677,
+  serialized_end=3114,
+)
+
+
+_PARAMETER = _descriptor.Descriptor(
+  name='Parameter',
+  full_name='org.apache.beam.runner_api.v1.Parameter',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='type', full_name='org.apache.beam.runner_api.v1.Parameter.type', index=0,
+      number=1, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+    _PARAMETER_TYPE,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3117,
+  serialized_end=3256,
+)
+
+
+_STATESPEC = _descriptor.Descriptor(
+  name='StateSpec',
+  full_name='org.apache.beam.runner_api.v1.StateSpec',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3258,
+  serialized_end=3269,
+)
+
+
+_TIMERSPEC = _descriptor.Descriptor(
+  name='TimerSpec',
+  full_name='org.apache.beam.runner_api.v1.TimerSpec',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3271,
+  serialized_end=3282,
+)
+
+
+_READPAYLOAD = _descriptor.Descriptor(
+  name='ReadPayload',
+  full_name='org.apache.beam.runner_api.v1.ReadPayload',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='source', full_name='org.apache.beam.runner_api.v1.ReadPayload.source', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='is_bounded', full_name='org.apache.beam.runner_api.v1.ReadPayload.is_bounded', index=1,
+      number=2, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3285,
+  serialized_end=3421,
+)
+
+
+_WINDOWINTOPAYLOAD = _descriptor.Descriptor(
+  name='WindowIntoPayload',
+  full_name='org.apache.beam.runner_api.v1.WindowIntoPayload',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='window_fn', full_name='org.apache.beam.runner_api.v1.WindowIntoPayload.window_fn', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3423,
+  serialized_end=3506,
+)
+
+
+_COMBINEPAYLOAD_SIDEINPUTSENTRY = _descriptor.Descriptor(
+  name='SideInputsEntry',
+  full_name='org.apache.beam.runner_api.v1.CombinePayload.SideInputsEntry',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.CombinePayload.SideInputsEntry.key', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.CombinePayload.SideInputsEntry.value', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=_descriptor._ParseOptions(descriptor_pb2.MessageOptions(), _b('8\001')),
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3023,
+  serialized_end=3114,
+)
+
+_COMBINEPAYLOAD = _descriptor.Descriptor(
+  name='CombinePayload',
+  full_name='org.apache.beam.runner_api.v1.CombinePayload',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='combine_fn', full_name='org.apache.beam.runner_api.v1.CombinePayload.combine_fn', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='accumulator_coder_id', full_name='org.apache.beam.runner_api.v1.CombinePayload.accumulator_coder_id', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='parameters', full_name='org.apache.beam.runner_api.v1.CombinePayload.parameters', index=2,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='side_inputs', full_name='org.apache.beam.runner_api.v1.CombinePayload.side_inputs', index=3,
+      number=4, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_COMBINEPAYLOAD_SIDEINPUTSENTRY, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3509,
+  serialized_end=3859,
+)
+
+
+_CODER = _descriptor.Descriptor(
+  name='Coder',
+  full_name='org.apache.beam.runner_api.v1.Coder',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='spec', full_name='org.apache.beam.runner_api.v1.Coder.spec', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='component_coder_ids', full_name='org.apache.beam.runner_api.v1.Coder.component_coder_ids', index=1,
+      number=2, type=9, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3861,
+  serialized_end=3956,
+)
+
+
+_WINDOWINGSTRATEGY = _descriptor.Descriptor(
+  name='WindowingStrategy',
+  full_name='org.apache.beam.runner_api.v1.WindowingStrategy',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='window_fn', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.window_fn', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='merge_status', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.merge_status', index=1,
+      number=2, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='window_coder_id', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.window_coder_id', index=2,
+      number=3, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='trigger', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.trigger', index=3,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='accumulation_mode', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.accumulation_mode', index=4,
+      number=5, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='output_time', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.output_time', index=5,
+      number=6, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='closing_behavior', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.closing_behavior', index=6,
+      number=7, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='allowed_lateness', full_name='org.apache.beam.runner_api.v1.WindowingStrategy.allowed_lateness', index=7,
+      number=8, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=3959,
+  serialized_end=4430,
+)
+
+
+_TRIGGER_AFTERALL = _descriptor.Descriptor(
+  name='AfterAll',
+  full_name='org.apache.beam.runner_api.v1.Trigger.AfterAll',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='subtriggers', full_name='org.apache.beam.runner_api.v1.Trigger.AfterAll.subtriggers', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5364,
+  serialized_end=5435,
+)
+
+_TRIGGER_AFTERANY = _descriptor.Descriptor(
+  name='AfterAny',
+  full_name='org.apache.beam.runner_api.v1.Trigger.AfterAny',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='subtriggers', full_name='org.apache.beam.runner_api.v1.Trigger.AfterAny.subtriggers', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5437,
+  serialized_end=5508,
+)
+
+_TRIGGER_AFTEREACH = _descriptor.Descriptor(
+  name='AfterEach',
+  full_name='org.apache.beam.runner_api.v1.Trigger.AfterEach',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='subtriggers', full_name='org.apache.beam.runner_api.v1.Trigger.AfterEach.subtriggers', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5510,
+  serialized_end=5582,
+)
+
+_TRIGGER_AFTERENDOFWINDOW = _descriptor.Descriptor(
+  name='AfterEndOfWindow',
+  full_name='org.apache.beam.runner_api.v1.Trigger.AfterEndOfWindow',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='early_firings', full_name='org.apache.beam.runner_api.v1.Trigger.AfterEndOfWindow.early_firings', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='late_firings', full_name='org.apache.beam.runner_api.v1.Trigger.AfterEndOfWindow.late_firings', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5585,
+  serialized_end=5728,
+)
+
+_TRIGGER_AFTERPROCESSINGTIME = _descriptor.Descriptor(
+  name='AfterProcessingTime',
+  full_name='org.apache.beam.runner_api.v1.Trigger.AfterProcessingTime',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='timestamp_transforms', full_name='org.apache.beam.runner_api.v1.Trigger.AfterProcessingTime.timestamp_transforms', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5730,
+  serialized_end=5832,
+)
+
+_TRIGGER_AFTERSYNCHRONIZEDPROCESSINGTIME = _descriptor.Descriptor(
+  name='AfterSynchronizedProcessingTime',
+  full_name='org.apache.beam.runner_api.v1.Trigger.AfterSynchronizedProcessingTime',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5834,
+  serialized_end=5867,
+)
+
+_TRIGGER_DEFAULT = _descriptor.Descriptor(
+  name='Default',
+  full_name='org.apache.beam.runner_api.v1.Trigger.Default',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5869,
+  serialized_end=5878,
+)
+
+_TRIGGER_ELEMENTCOUNT = _descriptor.Descriptor(
+  name='ElementCount',
+  full_name='org.apache.beam.runner_api.v1.Trigger.ElementCount',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='element_count', full_name='org.apache.beam.runner_api.v1.Trigger.ElementCount.element_count', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5880,
+  serialized_end=5917,
+)
+
+_TRIGGER_NEVER = _descriptor.Descriptor(
+  name='Never',
+  full_name='org.apache.beam.runner_api.v1.Trigger.Never',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5919,
+  serialized_end=5926,
+)
+
+_TRIGGER_ALWAYS = _descriptor.Descriptor(
+  name='Always',
+  full_name='org.apache.beam.runner_api.v1.Trigger.Always',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5928,
+  serialized_end=5936,
+)
+
+_TRIGGER_ORFINALLY = _descriptor.Descriptor(
+  name='OrFinally',
+  full_name='org.apache.beam.runner_api.v1.Trigger.OrFinally',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='main', full_name='org.apache.beam.runner_api.v1.Trigger.OrFinally.main', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='finally', full_name='org.apache.beam.runner_api.v1.Trigger.OrFinally.finally', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=5938,
+  serialized_end=6060,
+)
+
+_TRIGGER_REPEAT = _descriptor.Descriptor(
+  name='Repeat',
+  full_name='org.apache.beam.runner_api.v1.Trigger.Repeat',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='subtrigger', full_name='org.apache.beam.runner_api.v1.Trigger.Repeat.subtrigger', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6062,
+  serialized_end=6130,
+)
+
+_TRIGGER = _descriptor.Descriptor(
+  name='Trigger',
+  full_name='org.apache.beam.runner_api.v1.Trigger',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='after_all', full_name='org.apache.beam.runner_api.v1.Trigger.after_all', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='after_any', full_name='org.apache.beam.runner_api.v1.Trigger.after_any', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='after_each', full_name='org.apache.beam.runner_api.v1.Trigger.after_each', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='after_end_of_widow', full_name='org.apache.beam.runner_api.v1.Trigger.after_end_of_widow', index=3,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='after_processing_time', full_name='org.apache.beam.runner_api.v1.Trigger.after_processing_time', index=4,
+      number=5, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='after_synchronized_processing_time', full_name='org.apache.beam.runner_api.v1.Trigger.after_synchronized_processing_time', index=5,
+      number=6, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='always', full_name='org.apache.beam.runner_api.v1.Trigger.always', index=6,
+      number=12, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='default', full_name='org.apache.beam.runner_api.v1.Trigger.default', index=7,
+      number=7, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='element_count', full_name='org.apache.beam.runner_api.v1.Trigger.element_count', index=8,
+      number=8, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='never', full_name='org.apache.beam.runner_api.v1.Trigger.never', index=9,
+      number=9, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='or_finally', full_name='org.apache.beam.runner_api.v1.Trigger.or_finally', index=10,
+      number=10, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='repeat', full_name='org.apache.beam.runner_api.v1.Trigger.repeat', index=11,
+      number=11, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_TRIGGER_AFTERALL, _TRIGGER_AFTERANY, _TRIGGER_AFTEREACH, _TRIGGER_AFTERENDOFWINDOW, _TRIGGER_AFTERPROCESSINGTIME, _TRIGGER_AFTERSYNCHRONIZEDPROCESSINGTIME, _TRIGGER_DEFAULT, _TRIGGER_ELEMENTCOUNT, _TRIGGER_NEVER, _TRIGGER_ALWAYS, _TRIGGER_ORFINALLY, _TRIGGER_REPEAT, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='trigger', full_name='org.apache.beam.runner_api.v1.Trigger.trigger',
+      index=0, containing_type=None, fields=[]),
+  ],
+  serialized_start=4433,
+  serialized_end=6141,
+)
+
+
+_TIMESTAMPTRANSFORM_DELAY = _descriptor.Descriptor(
+  name='Delay',
+  full_name='org.apache.beam.runner_api.v1.TimestampTransform.Delay',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='delay_millis', full_name='org.apache.beam.runner_api.v1.TimestampTransform.Delay.delay_millis', index=0,
+      number=1, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6319,
+  serialized_end=6348,
+)
+
+_TIMESTAMPTRANSFORM_ALIGNTO = _descriptor.Descriptor(
+  name='AlignTo',
+  full_name='org.apache.beam.runner_api.v1.TimestampTransform.AlignTo',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='period', full_name='org.apache.beam.runner_api.v1.TimestampTransform.AlignTo.period', index=0,
+      number=3, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='offset', full_name='org.apache.beam.runner_api.v1.TimestampTransform.AlignTo.offset', index=1,
+      number=4, type=3, cpp_type=2, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6350,
+  serialized_end=6391,
+)
+
+_TIMESTAMPTRANSFORM = _descriptor.Descriptor(
+  name='TimestampTransform',
+  full_name='org.apache.beam.runner_api.v1.TimestampTransform',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='delay', full_name='org.apache.beam.runner_api.v1.TimestampTransform.delay', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='align_to', full_name='org.apache.beam.runner_api.v1.TimestampTransform.align_to', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_TIMESTAMPTRANSFORM_DELAY, _TIMESTAMPTRANSFORM_ALIGNTO, ],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+    _descriptor.OneofDescriptor(
+      name='timestamp_transform', full_name='org.apache.beam.runner_api.v1.TimestampTransform.timestamp_transform',
+      index=0, containing_type=None, fields=[]),
+  ],
+  serialized_start=6144,
+  serialized_end=6414,
+)
+
+
+_SIDEINPUT = _descriptor.Descriptor(
+  name='SideInput',
+  full_name='org.apache.beam.runner_api.v1.SideInput',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='access_pattern', full_name='org.apache.beam.runner_api.v1.SideInput.access_pattern', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='view_fn', full_name='org.apache.beam.runner_api.v1.SideInput.view_fn', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='window_mapping_fn', full_name='org.apache.beam.runner_api.v1.SideInput.window_mapping_fn', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6417,
+  serialized_end=6635,
+)
+
+
+_ENVIRONMENT = _descriptor.Descriptor(
+  name='Environment',
+  full_name='org.apache.beam.runner_api.v1.Environment',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='url', full_name='org.apache.beam.runner_api.v1.Environment.url', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6637,
+  serialized_end=6663,
+)
+
+
+_FUNCTIONSPEC = _descriptor.Descriptor(
+  name='FunctionSpec',
+  full_name='org.apache.beam.runner_api.v1.FunctionSpec',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='spec', full_name='org.apache.beam.runner_api.v1.FunctionSpec.spec', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='environment_id', full_name='org.apache.beam.runner_api.v1.FunctionSpec.environment_id', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6665,
+  serialized_end=6766,
+)
+
+
+_URNWITHPARAMETER = _descriptor.Descriptor(
+  name='UrnWithParameter',
+  full_name='org.apache.beam.runner_api.v1.UrnWithParameter',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='urn', full_name='org.apache.beam.runner_api.v1.UrnWithParameter.urn', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='parameter', full_name='org.apache.beam.runner_api.v1.UrnWithParameter.parameter', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6768,
+  serialized_end=6840,
+)
+
+
+_DISPLAYDATA_IDENTIFIER = _descriptor.Descriptor(
+  name='Identifier',
+  full_name='org.apache.beam.runner_api.v1.DisplayData.Identifier',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='transform_id', full_name='org.apache.beam.runner_api.v1.DisplayData.Identifier.transform_id', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='transform_urn', full_name='org.apache.beam.runner_api.v1.DisplayData.Identifier.transform_urn', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='key', full_name='org.apache.beam.runner_api.v1.DisplayData.Identifier.key', index=2,
+      number=3, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6922,
+  serialized_end=6992,
+)
+
+_DISPLAYDATA_ITEM = _descriptor.Descriptor(
+  name='Item',
+  full_name='org.apache.beam.runner_api.v1.DisplayData.Item',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='id', full_name='org.apache.beam.runner_api.v1.DisplayData.Item.id', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='type', full_name='org.apache.beam.runner_api.v1.DisplayData.Item.type', index=1,
+      number=2, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='value', full_name='org.apache.beam.runner_api.v1.DisplayData.Item.value', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='short_value', full_name='org.apache.beam.runner_api.v1.DisplayData.Item.short_value', index=3,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='label', full_name='org.apache.beam.runner_api.v1.DisplayData.Item.label', index=4,
+      number=5, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    _descriptor.FieldDescriptor(
+      name='link_url', full_name='org.apache.beam.runner_api.v1.DisplayData.Item.link_url', index=5,
+      number=6, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6995,
+  serialized_end=7244,
+)
+
+_DISPLAYDATA = _descriptor.Descriptor(
+  name='DisplayData',
+  full_name='org.apache.beam.runner_api.v1.DisplayData',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='items', full_name='org.apache.beam.runner_api.v1.DisplayData.items', index=0,
+      number=1, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[_DISPLAYDATA_IDENTIFIER, _DISPLAYDATA_ITEM, ],
+  enum_types=[
+    _DISPLAYDATA_TYPE,
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=6843,
+  serialized_end=7346,
+)
+
+_COMPONENTS_TRANSFORMSENTRY.fields_by_name['value'].message_type = _PTRANSFORM
+_COMPONENTS_TRANSFORMSENTRY.containing_type = _COMPONENTS
+_COMPONENTS_PCOLLECTIONSENTRY.fields_by_name['value'].message_type = _PCOLLECTION
+_COMPONENTS_PCOLLECTIONSENTRY.containing_type = _COMPONENTS
+_COMPONENTS_WINDOWINGSTRATEGIESENTRY.fields_by_name['value'].message_type = _WINDOWINGSTRATEGY
+_COMPONENTS_WINDOWINGSTRATEGIESENTRY.containing_type = _COMPONENTS
+_COMPONENTS_CODERSENTRY.fields_by_name['value'].message_type = _CODER
+_COMPONENTS_CODERSENTRY.containing_type = _COMPONENTS
+_COMPONENTS_ENVIRONMENTSENTRY.fields_by_name['value'].message_type = _ENVIRONMENT
+_COMPONENTS_ENVIRONMENTSENTRY.containing_type = _COMPONENTS
+_COMPONENTS.fields_by_name['transforms'].message_type = _COMPONENTS_TRANSFORMSENTRY
+_COMPONENTS.fields_by_name['pcollections'].message_type = _COMPONENTS_PCOLLECTIONSENTRY
+_COMPONENTS.fields_by_name['windowing_strategies'].message_type = _COMPONENTS_WINDOWINGSTRATEGIESENTRY
+_COMPONENTS.fields_by_name['coders'].message_type = _COMPONENTS_CODERSENTRY
+_COMPONENTS.fields_by_name['environments'].message_type = _COMPONENTS_ENVIRONMENTSENTRY
+_MESSAGEWITHCOMPONENTS.fields_by_name['components'].message_type = _COMPONENTS
+_MESSAGEWITHCOMPONENTS.fields_by_name['coder'].message_type = _CODER
+_MESSAGEWITHCOMPONENTS.fields_by_name['combine_payload'].message_type = _COMBINEPAYLOAD
+_MESSAGEWITHCOMPONENTS.fields_by_name['function_spec'].message_type = _FUNCTIONSPEC
+_MESSAGEWITHCOMPONENTS.fields_by_name['par_do_payload'].message_type = _PARDOPAYLOAD
+_MESSAGEWITHCOMPONENTS.fields_by_name['ptransform'].message_type = _PTRANSFORM
+_MESSAGEWITHCOMPONENTS.fields_by_name['pcollection'].message_type = _PCOLLECTION
+_MESSAGEWITHCOMPONENTS.fields_by_name['read_payload'].message_type = _READPAYLOAD
+_MESSAGEWITHCOMPONENTS.fields_by_name['side_input'].message_type = _SIDEINPUT
+_MESSAGEWITHCOMPONENTS.fields_by_name['window_into_payload'].message_type = _WINDOWINTOPAYLOAD
+_MESSAGEWITHCOMPONENTS.fields_by_name['windowing_strategy'].message_type = _WINDOWINGSTRATEGY
+_MESSAGEWITHCOMPONENTS.fields_by_name['urn_with_parameter'].message_type = _URNWITHPARAMETER
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].fields.append(
+  _MESSAGEWITHCOMPONENTS.fields_by_name['coder'])
+_MESSAGEWITHCOMPONENTS.fields_by_name['coder'].containing_oneof = _MESSAGEWITHCOMPONENTS.oneofs_by_name['root']
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].fields.append(
+  _MESSAGEWITHCOMPONENTS.fields_by_name['combine_payload'])
+_MESSAGEWITHCOMPONENTS.fields_by_name['combine_payload'].containing_oneof = _MESSAGEWITHCOMPONENTS.oneofs_by_name['root']
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].fields.append(
+  _MESSAGEWITHCOMPONENTS.fields_by_name['function_spec'])
+_MESSAGEWITHCOMPONENTS.fields_by_name['function_spec'].containing_oneof = _MESSAGEWITHCOMPONENTS.oneofs_by_name['root']
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].fields.append(
+  _MESSAGEWITHCOMPONENTS.fields_by_name['par_do_payload'])
+_MESSAGEWITHCOMPONENTS.fields_by_name['par_do_payload'].containing_oneof = _MESSAGEWITHCOMPONENTS.oneofs_by_name['root']
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].fields.append(
+  _MESSAGEWITHCOMPONENTS.fields_by_name['ptransform'])
+_MESSAGEWITHCOMPONENTS.fields_by_name['ptransform'].containing_oneof = _MESSAGEWITHCOMPONENTS.oneofs_by_name['root']
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].fields.append(
+  _MESSAGEWITHCOMPONENTS.fields_by_name['pcollection'])
+_MESSAGEWITHCOMPONENTS.fields_by_name['pcollection'].containing_oneof = _MESSAGEWITHCOMPONENTS.oneofs_by_name['root']
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].fields.append(
+  _MESSAGEWITHCOMPONENTS.fields_by_name['read_payload'])
+_MESSAGEWITHCOMPONENTS.fields_by_name['read_payload'].containing_oneof = _MESSAGEWITHCOMPONENTS.oneofs_by_name['root']
+_MESSAGEWITHCOMPONENTS.oneofs_by_name['root'].f

<TRUNCATED>

[43/50] [abbrv] beam git commit: [BEAM-1629] Init metrics/aggregators accumulators before traversing pipeline

Posted by ke...@apache.org.
[BEAM-1629] Init metrics/aggregators accumulators before traversing pipeline


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/874c8d0d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/874c8d0d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/874c8d0d

Branch: refs/heads/gearpump-runner
Commit: 874c8d0da65568b01cd5f184e303d39c7810a8bf
Parents: d167153
Author: Aviem Zur <av...@gmail.com>
Authored: Mon Mar 6 20:48:48 2017 +0200
Committer: Stas Levin <st...@apache.org>
Committed: Sun Mar 12 10:02:23 2017 +0200

----------------------------------------------------------------------
 .../spark/SparkNativePipelineVisitor.java       |  4 --
 .../beam/runners/spark/SparkPipelineResult.java |  8 +--
 .../apache/beam/runners/spark/SparkRunner.java  | 65 ++++++++++----------
 .../beam/runners/spark/SparkRunnerDebugger.java | 30 ++++++---
 .../beam/runners/spark/TestSparkRunner.java     |  4 +-
 .../aggregators/AggregatorsAccumulator.java     | 44 +++++++++----
 .../spark/aggregators/SparkAggregators.java     | 40 ++----------
 .../spark/metrics/AggregatorMetricSource.java   | 11 ++--
 .../spark/metrics/MetricsAccumulator.java       | 38 ++++++++----
 .../spark/metrics/SparkBeamMetricSource.java    | 11 ++--
 .../spark/metrics/SparkMetricsContainer.java    | 17 ++---
 .../spark/translation/TransformTranslator.java  | 13 ++--
 .../SparkRunnerStreamingContextFactory.java     |  3 +
 .../streaming/StreamingTransformTranslator.java | 10 +--
 .../metrics/sink/NamedAggregatorsTest.java      | 15 +----
 .../ResumeFromCheckpointStreamingTest.java      |  4 +-
 16 files changed, 156 insertions(+), 161 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
index 056da97..c2784a2 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
@@ -19,7 +19,6 @@
 package org.apache.beam.runners.spark;
 
 import com.google.common.base.Joiner;
-import com.google.common.base.Optional;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
@@ -27,11 +26,9 @@ import java.lang.reflect.Field;
 import java.lang.reflect.InvocationTargetException;
 import java.util.ArrayList;
 import java.util.List;
-import org.apache.beam.runners.spark.metrics.MetricsAccumulator;
 import org.apache.beam.runners.spark.translation.EvaluationContext;
 import org.apache.beam.runners.spark.translation.SparkPipelineTranslator;
 import org.apache.beam.runners.spark.translation.TransformEvaluator;
-import org.apache.beam.runners.spark.translation.streaming.Checkpoint;
 import org.apache.beam.sdk.io.Read;
 import org.apache.beam.sdk.runners.TransformHierarchy;
 import org.apache.beam.sdk.transforms.MapElements;
@@ -55,7 +52,6 @@ public class SparkNativePipelineVisitor extends SparkRunner.Evaluator {
   SparkNativePipelineVisitor(SparkPipelineTranslator translator, EvaluationContext ctxt) {
     super(translator, ctxt);
     this.transforms = new ArrayList<>();
-    MetricsAccumulator.init(ctxt.getSparkContext(), Optional.<Checkpoint.CheckpointDir>absent());
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineResult.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineResult.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineResult.java
index ddc1964..ed1e0c8 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineResult.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineResult.java
@@ -27,7 +27,6 @@ import java.util.concurrent.TimeoutException;
 import org.apache.beam.runners.spark.aggregators.SparkAggregators;
 import org.apache.beam.runners.spark.metrics.SparkMetricResults;
 import org.apache.beam.runners.spark.translation.SparkContextFactory;
-import org.apache.beam.sdk.AggregatorRetrievalException;
 import org.apache.beam.sdk.AggregatorValues;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.PipelineResult;
@@ -84,13 +83,12 @@ public abstract class SparkPipelineResult implements PipelineResult {
       throws TimeoutException, ExecutionException, InterruptedException;
 
   public <T> T getAggregatorValue(final String name, final Class<T> resultType) {
-    return SparkAggregators.valueOf(name, resultType, javaSparkContext);
+    return SparkAggregators.valueOf(name, resultType);
   }
 
   @Override
-  public <T> AggregatorValues<T> getAggregatorValues(final Aggregator<?, T> aggregator)
-      throws AggregatorRetrievalException {
-    return SparkAggregators.valueOf(aggregator, javaSparkContext);
+  public <T> AggregatorValues<T> getAggregatorValues(final Aggregator<?, T> aggregator) {
+    return SparkAggregators.valueOf(aggregator);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
index a706f00..de648fc 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
@@ -18,7 +18,6 @@
 
 package org.apache.beam.runners.spark;
 
-import com.google.common.base.Optional;
 import com.google.common.collect.Iterables;
 import java.util.Arrays;
 import java.util.Collection;
@@ -27,8 +26,6 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import org.apache.beam.runners.spark.aggregators.AggregatorsAccumulator;
-import org.apache.beam.runners.spark.aggregators.NamedAggregators;
-import org.apache.beam.runners.spark.aggregators.SparkAggregators;
 import org.apache.beam.runners.spark.io.CreateStream;
 import org.apache.beam.runners.spark.metrics.AggregatorMetricSource;
 import org.apache.beam.runners.spark.metrics.CompositeSource;
@@ -59,7 +56,6 @@ import org.apache.beam.sdk.values.PInput;
 import org.apache.beam.sdk.values.POutput;
 import org.apache.beam.sdk.values.PValue;
 import org.apache.beam.sdk.values.TaggedPValue;
-import org.apache.spark.Accumulator;
 import org.apache.spark.SparkEnv$;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.metrics.MetricsSystem;
@@ -141,31 +137,6 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
     mOptions = options;
   }
 
-  private void registerMetrics(final SparkPipelineOptions opts, final JavaSparkContext jsc) {
-    Optional<CheckpointDir> maybeCheckpointDir =
-        opts.isStreaming() ? Optional.of(new CheckpointDir(opts.getCheckpointDir()))
-            : Optional.<CheckpointDir>absent();
-    final Accumulator<NamedAggregators> aggregatorsAccumulator =
-        SparkAggregators.getOrCreateNamedAggregators(jsc, maybeCheckpointDir);
-    // Instantiate metrics accumulator
-    MetricsAccumulator.init(jsc, maybeCheckpointDir);
-    final NamedAggregators initialValue = aggregatorsAccumulator.value();
-    if (opts.getEnableSparkMetricSinks()) {
-      final MetricsSystem metricsSystem = SparkEnv$.MODULE$.get().metricsSystem();
-      String appName = opts.getAppName();
-      final AggregatorMetricSource aggregatorMetricSource =
-          new AggregatorMetricSource(appName, initialValue);
-      final SparkBeamMetricSource metricsSource =
-          new SparkBeamMetricSource(appName);
-      final CompositeSource compositeSource =
-          new CompositeSource(appName,
-              metricsSource.metricRegistry(), aggregatorMetricSource.metricRegistry());
-      // re-register the metrics in case of context re-use
-      metricsSystem.removeSource(compositeSource);
-      metricsSystem.registerSource(compositeSource);
-    }
-  }
-
   @Override
   public SparkPipelineResult run(final Pipeline pipeline) {
     LOG.info("Executing pipeline using the SparkRunner.");
@@ -203,11 +174,16 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
       // register Watermarks listener to broadcast the advanced WMs.
       jssc.addStreamingListener(new JavaStreamingListenerWrapper(new WatermarksListener(jssc)));
 
+      // The reason we call initAccumulators here even though it is called in
+      // SparkRunnerStreamingContextFactory is because the factory is not called when resuming
+      // from checkpoint (When not resuming from checkpoint initAccumulators will be called twice
+      // but this is fine since it is idempotent).
+      initAccumulators(mOptions, jssc.sparkContext());
+
       startPipeline = executorService.submit(new Runnable() {
 
         @Override
         public void run() {
-          registerMetrics(mOptions, jssc.sparkContext());
           LOG.info("Starting streaming pipeline execution.");
           jssc.start();
         }
@@ -218,11 +194,12 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
       final JavaSparkContext jsc = SparkContextFactory.getSparkContext(mOptions);
       final EvaluationContext evaluationContext = new EvaluationContext(jsc, pipeline);
 
+      initAccumulators(mOptions, jsc);
+
       startPipeline = executorService.submit(new Runnable() {
 
         @Override
         public void run() {
-          registerMetrics(mOptions, jsc);
           pipeline.traverseTopologically(new Evaluator(new TransformTranslator.Translator(),
                                                        evaluationContext));
           evaluationContext.computeOutputs();
@@ -233,9 +210,35 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
       result = new SparkPipelineResult.BatchMode(startPipeline, jsc);
     }
 
+    if (mOptions.getEnableSparkMetricSinks()) {
+      registerMetricsSource(mOptions.getAppName());
+    }
+
     return result;
   }
 
+  private void registerMetricsSource(String appName) {
+      final MetricsSystem metricsSystem = SparkEnv$.MODULE$.get().metricsSystem();
+      final AggregatorMetricSource aggregatorMetricSource =
+          new AggregatorMetricSource(null, AggregatorsAccumulator.getInstance().value());
+      final SparkBeamMetricSource metricsSource = new SparkBeamMetricSource(null);
+      final CompositeSource compositeSource =
+          new CompositeSource(appName + ".Beam", metricsSource.metricRegistry(),
+              aggregatorMetricSource.metricRegistry());
+      // re-register the metrics in case of context re-use
+      metricsSystem.removeSource(compositeSource);
+      metricsSystem.registerSource(compositeSource);
+  }
+
+  /**
+   * Init Metrics/Aggregators accumulators. This method is idempotent.
+   */
+  public static void initAccumulators(SparkPipelineOptions opts, JavaSparkContext jsc) {
+    // Init metrics accumulators
+    MetricsAccumulator.init(opts, jsc);
+    AggregatorsAccumulator.init(opts, jsc);
+  }
+
   /**
    * Detect the translation mode for the pipeline and change options in case streaming
    * translation is needed.

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java
index 395acff..7f7aefc 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java
@@ -20,12 +20,14 @@ package org.apache.beam.runners.spark;
 
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeoutException;
+
 import org.apache.beam.runners.spark.translation.EvaluationContext;
 import org.apache.beam.runners.spark.translation.SparkPipelineTranslator;
 import org.apache.beam.runners.spark.translation.TransformTranslator;
 import org.apache.beam.runners.spark.translation.streaming.StreamingTransformTranslator;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsValidator;
 import org.apache.beam.sdk.runners.PipelineRunner;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
@@ -53,23 +55,34 @@ public final class SparkRunnerDebugger extends PipelineRunner<SparkPipelineResul
 
   private static final Logger LOG = LoggerFactory.getLogger(SparkRunnerDebugger.class);
 
-  private SparkRunnerDebugger() {}
+  private final SparkPipelineOptions options;
+
+  private SparkRunnerDebugger(SparkPipelineOptions options) {
+    this.options = options;
+  }
 
-  @SuppressWarnings("unused")
   public static SparkRunnerDebugger fromOptions(PipelineOptions options) {
-    return new SparkRunnerDebugger();
+    if (options instanceof TestSparkPipelineOptions) {
+      TestSparkPipelineOptions testSparkPipelineOptions =
+          PipelineOptionsValidator.validate(TestSparkPipelineOptions.class, options);
+      return new SparkRunnerDebugger(testSparkPipelineOptions);
+    } else {
+      SparkPipelineOptions sparkPipelineOptions =
+          PipelineOptionsValidator.validate(SparkPipelineOptions.class, options);
+      return new SparkRunnerDebugger(sparkPipelineOptions);
+    }
   }
 
   @Override
   public SparkPipelineResult run(Pipeline pipeline) {
-    SparkPipelineResult result;
-
-    SparkPipelineOptions options = (SparkPipelineOptions) pipeline.getOptions();
-
     JavaSparkContext jsc = new JavaSparkContext("local[1]", "Debug_Pipeline");
     JavaStreamingContext jssc =
         new JavaStreamingContext(jsc, new org.apache.spark.streaming.Duration(1000));
+
+    SparkRunner.initAccumulators(options, jsc);
+
     TransformTranslator.Translator translator = new TransformTranslator.Translator();
+
     SparkNativePipelineVisitor visitor;
     if (options.isStreaming()
         || options instanceof TestSparkPipelineOptions
@@ -82,8 +95,11 @@ public final class SparkRunnerDebugger extends PipelineRunner<SparkPipelineResul
       EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, jssc);
       visitor = new SparkNativePipelineVisitor(translator, ctxt);
     }
+
     pipeline.traverseTopologically(visitor);
+
     jsc.stop();
+
     String debugString = visitor.getDebugString();
     LOG.info("Translated Native Spark pipeline:\n" + debugString);
     return new DebugSparkPipelineResult(debugString);

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java
index d321f99..e436422 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/TestSparkRunner.java
@@ -34,7 +34,7 @@ import org.apache.beam.runners.core.UnboundedReadFromBoundedSource;
 import org.apache.beam.runners.core.construction.PTransformMatchers;
 import org.apache.beam.runners.core.construction.ReplacementOutputs;
 import org.apache.beam.runners.spark.aggregators.AggregatorsAccumulator;
-import org.apache.beam.runners.spark.metrics.SparkMetricsContainer;
+import org.apache.beam.runners.spark.metrics.MetricsAccumulator;
 import org.apache.beam.runners.spark.stateful.SparkTimerInternals;
 import org.apache.beam.runners.spark.util.GlobalWatermarkHolder;
 import org.apache.beam.sdk.Pipeline;
@@ -115,7 +115,7 @@ public final class TestSparkRunner extends PipelineRunner<SparkPipelineResult> {
 
     // clear state of Aggregators, Metrics and Watermarks if exists.
     AggregatorsAccumulator.clear();
-    SparkMetricsContainer.clear();
+    MetricsAccumulator.clear();
     GlobalWatermarkHolder.clear();
 
     LOG.info("About to run test pipeline " + testSparkPipelineOptions.getJobName());

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggregatorsAccumulator.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggregatorsAccumulator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggregatorsAccumulator.java
index 261c327..b8fc81b 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggregatorsAccumulator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggregatorsAccumulator.java
@@ -21,6 +21,7 @@ package org.apache.beam.runners.spark.aggregators;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Optional;
 import java.io.IOException;
+import org.apache.beam.runners.spark.SparkPipelineOptions;
 import org.apache.beam.runners.spark.translation.streaming.Checkpoint;
 import org.apache.beam.runners.spark.translation.streaming.Checkpoint.CheckpointDir;
 import org.apache.hadoop.fs.FileSystem;
@@ -40,30 +41,48 @@ import org.slf4j.LoggerFactory;
 public class AggregatorsAccumulator {
   private static final Logger LOG = LoggerFactory.getLogger(AggregatorsAccumulator.class);
 
+  private static final String ACCUMULATOR_NAME = "Beam.Aggregators";
   private static final String ACCUMULATOR_CHECKPOINT_FILENAME = "aggregators";
 
-  private static volatile Accumulator<NamedAggregators> instance;
+  private static volatile Accumulator<NamedAggregators> instance = null;
   private static volatile FileSystem fileSystem;
   private static volatile Path checkpointFilePath;
 
-  @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
-  static Accumulator<NamedAggregators> getInstance(
-      JavaSparkContext jsc,
-      Optional<CheckpointDir> checkpointDir) {
+  /**
+   * Init aggregators accumulator if it has not been initiated. This method is idempotent.
+   */
+  public static void init(SparkPipelineOptions opts, JavaSparkContext jsc) {
     if (instance == null) {
       synchronized (AggregatorsAccumulator.class) {
         if (instance == null) {
-          instance = jsc.sc().accumulator(new NamedAggregators(), new AggAccumParam());
-          if (checkpointDir.isPresent()) {
-            recoverValueFromCheckpoint(jsc, checkpointDir.get());
+          Optional<CheckpointDir> maybeCheckpointDir =
+              opts.isStreaming() ? Optional.of(new CheckpointDir(opts.getCheckpointDir()))
+                  : Optional.<CheckpointDir>absent();
+          Accumulator<NamedAggregators> accumulator =
+              jsc.sc().accumulator(new NamedAggregators(), ACCUMULATOR_NAME, new AggAccumParam());
+          if (maybeCheckpointDir.isPresent()) {
+            Optional<NamedAggregators> maybeRecoveredValue =
+                recoverValueFromCheckpoint(jsc, maybeCheckpointDir.get());
+            if (maybeRecoveredValue.isPresent()) {
+              accumulator.setValue(maybeRecoveredValue.get());
+            }
           }
+          instance = accumulator;
         }
       }
+      LOG.info("Instantiated aggregators accumulator: " + instance.value());
+    }
+  }
+
+  public static Accumulator<NamedAggregators> getInstance() {
+    if (instance == null) {
+      throw new IllegalStateException("Aggregrators accumulator has not been instantiated");
+    } else {
+      return instance;
     }
-    return instance;
   }
 
-  private static void recoverValueFromCheckpoint(
+  private static Optional<NamedAggregators> recoverValueFromCheckpoint(
       JavaSparkContext jsc,
       CheckpointDir checkpointDir) {
     try {
@@ -72,14 +91,15 @@ public class AggregatorsAccumulator {
       fileSystem = checkpointFilePath.getFileSystem(jsc.hadoopConfiguration());
       NamedAggregators recoveredValue = Checkpoint.readObject(fileSystem, checkpointFilePath);
       if (recoveredValue != null) {
-        LOG.info("Recovered accumulators from checkpoint: " + recoveredValue);
-        instance.setValue(recoveredValue);
+        LOG.info("Recovered aggregators from checkpoint");
+        return Optional.of(recoveredValue);
       } else {
         LOG.info("No accumulator checkpoint found.");
       }
     } catch (Exception e) {
       throw new RuntimeException("Failure while reading accumulator checkpoint.", e);
     }
+    return Optional.absent();
   }
 
   private static void checkpoint() throws IOException {

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/SparkAggregators.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/SparkAggregators.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/SparkAggregators.java
index 131b761..1da196b 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/SparkAggregators.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/SparkAggregators.java
@@ -18,19 +18,16 @@
 
 package org.apache.beam.runners.spark.aggregators;
 
-import com.google.common.base.Optional;
 import com.google.common.collect.ImmutableList;
 import java.util.Collection;
 import java.util.Map;
 import org.apache.beam.runners.core.AggregatorFactory;
 import org.apache.beam.runners.core.ExecutionContext;
 import org.apache.beam.runners.spark.translation.SparkRuntimeContext;
-import org.apache.beam.runners.spark.translation.streaming.Checkpoint.CheckpointDir;
 import org.apache.beam.sdk.AggregatorValues;
 import org.apache.beam.sdk.transforms.Aggregator;
 import org.apache.beam.sdk.transforms.Combine;
 import org.apache.spark.Accumulator;
-import org.apache.spark.api.java.JavaSparkContext;
 
 /**
  * A utility class for handling Beam {@link Aggregator}s.
@@ -64,41 +61,14 @@ public class SparkAggregators {
   }
 
   /**
-   * Retrieves the {@link NamedAggregators} instance using the provided Spark context.
-   *
-   * @param jsc a Spark context to be used in order to retrieve the name
-   * {@link NamedAggregators} instance
-   */
-  public static Accumulator<NamedAggregators> getNamedAggregators(JavaSparkContext jsc) {
-    return getOrCreateNamedAggregators(jsc, Optional.<CheckpointDir>absent());
-  }
-
-  /**
-   * Retrieves or creates the {@link NamedAggregators} instance using the provided Spark context.
-   *
-   * @param jsc a Spark context to be used in order to retrieve the name
-   * {@link NamedAggregators} instance
-   * @param checkpointDir checkpoint dir (optional, for streaming pipelines)
-   * @return a {@link NamedAggregators} instance
-   */
-  @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
-  public static Accumulator<NamedAggregators> getOrCreateNamedAggregators(
-      JavaSparkContext jsc,
-      Optional<CheckpointDir> checkpointDir) {
-    return AggregatorsAccumulator.getInstance(jsc, checkpointDir);
-  }
-
-  /**
    * Retrieves the value of an aggregator from a SparkContext instance.
    *
    * @param aggregator The aggregator whose value to retrieve
-   * @param javaSparkContext The SparkContext instance
    * @param <T> The type of the aggregator's output
    * @return The value of the aggregator
    */
-  public static <T> AggregatorValues<T> valueOf(final Aggregator<?, T> aggregator,
-                                                final JavaSparkContext javaSparkContext) {
-    return valueOf(getNamedAggregators(javaSparkContext), aggregator);
+  public static <T> AggregatorValues<T> valueOf(final Aggregator<?, T> aggregator) {
+    return valueOf(AggregatorsAccumulator.getInstance(), aggregator);
   }
 
   /**
@@ -109,10 +79,8 @@ public class SparkAggregators {
    * @param <T>            Type of object to be returned.
    * @return The value of the aggregator.
    */
-  public static <T> T valueOf(final String name,
-                              final Class<T> typeClass,
-                              final JavaSparkContext javaSparkContext) {
-    return valueOf(getNamedAggregators(javaSparkContext), name, typeClass);
+  public static <T> T valueOf(final String name, final Class<T> typeClass) {
+    return valueOf(AggregatorsAccumulator.getInstance(), name, typeClass);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/AggregatorMetricSource.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/AggregatorMetricSource.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/AggregatorMetricSource.java
index b3880e8..919e6f2 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/AggregatorMetricSource.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/AggregatorMetricSource.java
@@ -28,19 +28,20 @@ import org.apache.spark.metrics.source.Source;
  * wrapping an underlying {@link NamedAggregators} instance.
  */
 public class AggregatorMetricSource implements Source {
+  private static final String METRIC_NAME = "Aggregators";
 
-  private final String sourceName;
+  private final String name;
 
   private final MetricRegistry metricRegistry = new MetricRegistry();
 
-  public AggregatorMetricSource(final String appName, final NamedAggregators aggregators) {
-    sourceName = appName;
-    metricRegistry.register("Beam.Aggregators", AggregatorMetric.of(aggregators));
+  public AggregatorMetricSource(final String name, final NamedAggregators aggregators) {
+    this.name = name;
+    metricRegistry.register(METRIC_NAME, AggregatorMetric.of(aggregators));
   }
 
   @Override
   public String sourceName() {
-    return sourceName;
+    return name;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java
index 9d48289..1153db6 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/MetricsAccumulator.java
@@ -21,6 +21,7 @@ package org.apache.beam.runners.spark.metrics;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Optional;
 import java.io.IOException;
+import org.apache.beam.runners.spark.SparkPipelineOptions;
 import org.apache.beam.runners.spark.translation.streaming.Checkpoint;
 import org.apache.beam.runners.spark.translation.streaming.Checkpoint.CheckpointDir;
 import org.apache.hadoop.fs.FileSystem;
@@ -40,27 +41,37 @@ import org.slf4j.LoggerFactory;
 public class MetricsAccumulator {
   private static final Logger LOG = LoggerFactory.getLogger(MetricsAccumulator.class);
 
+  private static final String ACCUMULATOR_NAME = "Beam.Metrics";
   private static final String ACCUMULATOR_CHECKPOINT_FILENAME = "metrics";
 
   private static volatile Accumulator<SparkMetricsContainer> instance = null;
   private static volatile FileSystem fileSystem;
   private static volatile Path checkpointFilePath;
 
-  @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
-  public static void init(
-      JavaSparkContext jsc,
-      Optional<CheckpointDir> checkpointDir) {
+  /**
+   * Init metrics accumulator if it has not been initiated. This method is idempotent.
+   */
+  public static void init(SparkPipelineOptions opts, JavaSparkContext jsc) {
     if (instance == null) {
       synchronized (MetricsAccumulator.class) {
         if (instance == null) {
-          SparkMetricsContainer initialValue = new SparkMetricsContainer();
-          instance = jsc.sc().accumulator(initialValue, "Beam.Metrics",
-              new MetricsAccumulatorParam());
-          if (checkpointDir.isPresent()) {
-            recoverValueFromCheckpoint(jsc, checkpointDir.get());
+          Optional<CheckpointDir> maybeCheckpointDir =
+              opts.isStreaming() ? Optional.of(new CheckpointDir(opts.getCheckpointDir()))
+                  : Optional.<CheckpointDir>absent();
+          Accumulator<SparkMetricsContainer> accumulator =
+              jsc.sc().accumulator(new SparkMetricsContainer(), ACCUMULATOR_NAME,
+                  new MetricsAccumulatorParam());
+          if (maybeCheckpointDir.isPresent()) {
+            Optional<SparkMetricsContainer> maybeRecoveredValue =
+                recoverValueFromCheckpoint(jsc, maybeCheckpointDir.get());
+            if (maybeRecoveredValue.isPresent()) {
+              accumulator.setValue(maybeRecoveredValue.get());
+            }
           }
+          instance = accumulator;
         }
       }
+      LOG.info("Instantiated metrics accumulator: " + instance.value());
     }
   }
 
@@ -72,7 +83,7 @@ public class MetricsAccumulator {
     }
   }
 
-  private static void recoverValueFromCheckpoint(
+  private static Optional<SparkMetricsContainer> recoverValueFromCheckpoint(
       JavaSparkContext jsc,
       CheckpointDir checkpointDir) {
     try {
@@ -81,18 +92,19 @@ public class MetricsAccumulator {
       fileSystem = checkpointFilePath.getFileSystem(jsc.hadoopConfiguration());
       SparkMetricsContainer recoveredValue = Checkpoint.readObject(fileSystem, checkpointFilePath);
       if (recoveredValue != null) {
-        LOG.info("Recovered metrics from checkpoint: " + recoveredValue);
-        instance.setValue(recoveredValue);
+        LOG.info("Recovered metrics from checkpoint.");
+        return Optional.of(recoveredValue);
       } else {
         LOG.info("No metrics checkpoint found.");
       }
     } catch (Exception e) {
       throw new RuntimeException("Failure while reading metrics checkpoint.", e);
     }
+    return Optional.absent();
   }
 
   @VisibleForTesting
-  static void clear() {
+  public static void clear() {
     synchronized (MetricsAccumulator.class) {
       instance = null;
     }

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkBeamMetricSource.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkBeamMetricSource.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkBeamMetricSource.java
index 24231c3..9cab66d 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkBeamMetricSource.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkBeamMetricSource.java
@@ -28,19 +28,20 @@ import org.apache.spark.metrics.source.Source;
  * wrapping an underlying {@link SparkMetricsContainer} instance.
  */
 public class SparkBeamMetricSource implements Source {
+  private static final String METRIC_NAME = "Metrics";
 
-  private final String sourceName;
+  private final String name;
 
   private final MetricRegistry metricRegistry = new MetricRegistry();
 
-  public SparkBeamMetricSource(final String appName) {
-    sourceName = appName;
-    metricRegistry.register("Beam.Metrics", new SparkBeamMetric());
+  public SparkBeamMetricSource(final String name) {
+    this.name = name;
+    metricRegistry.register(METRIC_NAME, new SparkBeamMetric());
   }
 
   @Override
   public String sourceName() {
-    return sourceName;
+    return name;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkMetricsContainer.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkMetricsContainer.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkMetricsContainer.java
index 7a4b222..d376ce3 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkMetricsContainer.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/metrics/SparkMetricsContainer.java
@@ -18,7 +18,6 @@
 
 package org.apache.beam.runners.spark.metrics;
 
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
@@ -66,11 +65,15 @@ public class SparkMetricsContainer implements Serializable {
   }
 
   static Collection<MetricUpdate<Long>> getCounters() {
-    return getInstance().counters.values();
+    SparkMetricsContainer sparkMetricsContainer = getInstance();
+    sparkMetricsContainer.materialize();
+    return sparkMetricsContainer.counters.values();
   }
 
   static Collection<MetricUpdate<DistributionData>> getDistributions() {
-    return getInstance().distributions.values();
+    SparkMetricsContainer sparkMetricsContainer = getInstance();
+    sparkMetricsContainer.materialize();
+    return sparkMetricsContainer.distributions.values();
   }
 
   SparkMetricsContainer update(SparkMetricsContainer other) {
@@ -141,12 +144,4 @@ public class SparkMetricsContainer implements Serializable {
     }
     return sb.toString();
   }
-
-  @VisibleForTesting
-  public static void clear() {
-    try {
-      MetricsAccumulator.clear();
-    } catch (IllegalStateException ignored) {
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
index 44b4039..8d1b82e 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
@@ -37,8 +37,8 @@ import org.apache.avro.mapred.AvroKey;
 import org.apache.avro.mapreduce.AvroJob;
 import org.apache.avro.mapreduce.AvroKeyInputFormat;
 import org.apache.beam.runners.core.SystemReduceFn;
+import org.apache.beam.runners.spark.aggregators.AggregatorsAccumulator;
 import org.apache.beam.runners.spark.aggregators.NamedAggregators;
-import org.apache.beam.runners.spark.aggregators.SparkAggregators;
 import org.apache.beam.runners.spark.coders.CoderHelpers;
 import org.apache.beam.runners.spark.io.SourceRDD;
 import org.apache.beam.runners.spark.io.hadoop.HadoopIO;
@@ -138,8 +138,7 @@ public final class TransformTranslator {
             ((BoundedDataset<KV<K, V>>) context.borrowDataset(transform)).getRDD();
         @SuppressWarnings("unchecked")
         final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
-        final Accumulator<NamedAggregators> accum =
-            SparkAggregators.getNamedAggregators(context.getSparkContext());
+        final Accumulator<NamedAggregators> accum = AggregatorsAccumulator.getInstance();
         @SuppressWarnings("unchecked")
         final WindowingStrategy<?, W> windowingStrategy =
             (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
@@ -362,9 +361,7 @@ public final class TransformTranslator {
             ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
         WindowingStrategy<?, ?> windowingStrategy =
             context.getInput(transform).getWindowingStrategy();
-        JavaSparkContext jsc = context.getSparkContext();
-        Accumulator<NamedAggregators> aggAccum =
-            SparkAggregators.getNamedAggregators(jsc);
+        Accumulator<NamedAggregators> aggAccum = AggregatorsAccumulator.getInstance();
         Accumulator<SparkMetricsContainer> metricsAccum =
             MetricsAccumulator.getInstance();
         Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs =
@@ -395,9 +392,7 @@ public final class TransformTranslator {
             ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
         WindowingStrategy<?, ?> windowingStrategy =
             context.getInput(transform).getWindowingStrategy();
-        JavaSparkContext jsc = context.getSparkContext();
-        Accumulator<NamedAggregators> aggAccum =
-            SparkAggregators.getNamedAggregators(jsc);
+        Accumulator<NamedAggregators> aggAccum = AggregatorsAccumulator.getInstance();
         Accumulator<SparkMetricsContainer> metricsAccum =
             MetricsAccumulator.getInstance();
         JavaPairRDD<TupleTag<?>, WindowedValue<?>> all = inRDD

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/SparkRunnerStreamingContextFactory.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/SparkRunnerStreamingContextFactory.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/SparkRunnerStreamingContextFactory.java
index ffa8e69..7048be6 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/SparkRunnerStreamingContextFactory.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/SparkRunnerStreamingContextFactory.java
@@ -79,6 +79,9 @@ public class SparkRunnerStreamingContextFactory implements JavaStreamingContextF
     JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
     JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration);
 
+    // We must first init accumulators since translators expect them to be instantiated.
+    SparkRunner.initAccumulators(options, jsc);
+
     ctxt = new EvaluationContext(jsc, pipeline, jssc);
     pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt));
     ctxt.computeOutputs();

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
index 8a05fbb..2744169 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
@@ -32,8 +32,8 @@ import java.util.Map;
 import java.util.Queue;
 import java.util.concurrent.LinkedBlockingQueue;
 import javax.annotation.Nonnull;
+import org.apache.beam.runners.spark.aggregators.AggregatorsAccumulator;
 import org.apache.beam.runners.spark.aggregators.NamedAggregators;
-import org.apache.beam.runners.spark.aggregators.SparkAggregators;
 import org.apache.beam.runners.spark.coders.CoderHelpers;
 import org.apache.beam.runners.spark.io.ConsoleIO;
 import org.apache.beam.runners.spark.io.CreateStream;
@@ -92,7 +92,6 @@ import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
 
 
-
 /**
  * Supports translation between a Beam transform, and Spark's operations on DStreams.
  */
@@ -394,8 +393,7 @@ public final class StreamingTransformTranslator {
           public JavaRDD<WindowedValue<OutputT>> call(JavaRDD<WindowedValue<InputT>> rdd) throws
               Exception {
             final JavaSparkContext jsc = new JavaSparkContext(rdd.context());
-            final Accumulator<NamedAggregators> aggAccum =
-                SparkAggregators.getNamedAggregators(jsc);
+            final Accumulator<NamedAggregators> aggAccum = AggregatorsAccumulator.getInstance();
             final Accumulator<SparkMetricsContainer> metricsAccum =
                 MetricsAccumulator.getInstance();
             final Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs =
@@ -444,9 +442,7 @@ public final class StreamingTransformTranslator {
           public JavaPairRDD<TupleTag<?>, WindowedValue<?>> call(
               JavaRDD<WindowedValue<InputT>> rdd) throws Exception {
             String stepName = context.getCurrentTransform().getFullName();
-            JavaSparkContext jsc = new JavaSparkContext(rdd.context());
-            final Accumulator<NamedAggregators> aggAccum =
-                SparkAggregators.getNamedAggregators(jsc);
+            final Accumulator<NamedAggregators> aggAccum = AggregatorsAccumulator.getInstance();
             final Accumulator<SparkMetricsContainer> metricsAccum =
                 MetricsAccumulator.getInstance();
             final Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs =

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/NamedAggregatorsTest.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/NamedAggregatorsTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/NamedAggregatorsTest.java
index a192807..dbd8cac 100644
--- a/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/NamedAggregatorsTest.java
+++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/NamedAggregatorsTest.java
@@ -27,14 +27,11 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
 import org.apache.beam.runners.spark.PipelineRule;
-import org.apache.beam.runners.spark.SparkPipelineOptions;
 import org.apache.beam.runners.spark.aggregators.ClearAggregatorsRule;
 import org.apache.beam.runners.spark.aggregators.SparkAggregators;
 import org.apache.beam.runners.spark.examples.WordCount;
-import org.apache.beam.runners.spark.translation.SparkContextFactory;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.MapElements;
@@ -86,24 +83,18 @@ public class NamedAggregatorsTest {
 
   @Test
   public void testNamedAggregators() throws Exception {
-
-    // don't reuse context in this test, as is tends to mess up Spark's MetricsSystem thread-safety
-    System.setProperty("beam.spark.test.reuseSparkContext", "false");
-
     assertThat(InMemoryMetrics.valueOf("emptyLines"), is(nullValue()));
 
     runPipeline();
 
     assertThat(InMemoryMetrics.<Double>valueOf("emptyLines"), is(1d));
-
   }
 
   @Test
   public void testNonExistingAggregatorName() throws Exception {
-    final SparkPipelineOptions options = PipelineOptionsFactory.as(SparkPipelineOptions.class);
-    final Long valueOf =
-        SparkAggregators.valueOf(
-            "myMissingAggregator", Long.class, SparkContextFactory.getSparkContext(options));
+    runPipeline();
+
+    final Long valueOf = SparkAggregators.valueOf("myMissingAggregator", Long.class);
 
     assertThat(valueOf, is(nullValue()));
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/874c8d0d/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/ResumeFromCheckpointStreamingTest.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/ResumeFromCheckpointStreamingTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/ResumeFromCheckpointStreamingTest.java
index bc22980..ce502d6 100644
--- a/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/ResumeFromCheckpointStreamingTest.java
+++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/translation/streaming/ResumeFromCheckpointStreamingTest.java
@@ -40,7 +40,7 @@ import org.apache.beam.runners.spark.SparkPipelineResult;
 import org.apache.beam.runners.spark.TestSparkPipelineOptions;
 import org.apache.beam.runners.spark.aggregators.AggregatorsAccumulator;
 import org.apache.beam.runners.spark.coders.CoderHelpers;
-import org.apache.beam.runners.spark.metrics.SparkMetricsContainer;
+import org.apache.beam.runners.spark.metrics.MetricsAccumulator;
 import org.apache.beam.runners.spark.translation.streaming.utils.EmbeddedKafkaCluster;
 import org.apache.beam.runners.spark.util.GlobalWatermarkHolder;
 import org.apache.beam.sdk.Pipeline;
@@ -171,7 +171,7 @@ public class ResumeFromCheckpointStreamingTest {
 
     //- clear state.
     AggregatorsAccumulator.clear();
-    SparkMetricsContainer.clear();
+    MetricsAccumulator.clear();
     GlobalWatermarkHolder.clear();
 
     //- write a bit more.


[21/50] [abbrv] beam git commit: [BEAM-1661] This closes #2205

Posted by ke...@apache.org.
[BEAM-1661] This closes #2205


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fdba784a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fdba784a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fdba784a

Branch: refs/heads/gearpump-runner
Commit: fdba784a8ec61778642633da664cb7e8762ce47e
Parents: c12d432 818e521
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Mar 10 15:25:38 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Mar 10 15:25:38 2017 +0100

----------------------------------------------------------------------
 sdks/java/io/jdbc/pom.xml | 46 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
----------------------------------------------------------------------



[13/50] [abbrv] beam git commit: Move pipeline context and add more tests.

Posted by ke...@apache.org.
Move pipeline context and add more tests.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/deff128f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/deff128f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/deff128f

Branch: refs/heads/gearpump-runner
Commit: deff128ff07ccc67956e1d5a94a64f2a31b224c8
Parents: b2da21e
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Thu Mar 9 09:21:33 2017 -0800
Committer: Robert Bradshaw <ro...@gmail.com>
Committed: Thu Mar 9 20:29:02 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/coders/coders.py        | 93 ++++++++++++++++++++
 sdks/python/apache_beam/pipeline.py             | 62 -------------
 .../apache_beam/runners/pipeline_context.py     | 88 ++++++++++++++++++
 .../runners/pipeline_context_test.py            | 49 +++++++++++
 sdks/python/apache_beam/transforms/core.py      |  1 +
 .../apache_beam/transforms/trigger_test.py      | 18 +---
 sdks/python/apache_beam/transforms/window.py    |  2 +-
 .../apache_beam/transforms/window_test.py       |  6 +-
 sdks/python/apache_beam/utils/urns.py           |  2 +-
 9 files changed, 238 insertions(+), 83 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/coders/coders.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py
index fd72af8..9f5a97a 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -266,6 +266,12 @@ class BytesCoder(FastCoder):
   def is_deterministic(self):
     return True
 
+  def __eq__(self, other):
+    return type(self) == type(other)
+
+  def __hash__(self):
+    return hash(type(self))
+
 
 class VarIntCoder(FastCoder):
   """Variable-length integer coder."""
@@ -276,6 +282,12 @@ class VarIntCoder(FastCoder):
   def is_deterministic(self):
     return True
 
+  def __eq__(self, other):
+    return type(self) == type(other)
+
+  def __hash__(self):
+    return hash(type(self))
+
 
 class FloatCoder(FastCoder):
   """A coder used for floating-point values."""
@@ -286,6 +298,12 @@ class FloatCoder(FastCoder):
   def is_deterministic(self):
     return True
 
+  def __eq__(self, other):
+    return type(self) == type(other)
+
+  def __hash__(self):
+    return hash(type(self))
+
 
 class TimestampCoder(FastCoder):
   """A coder used for timeutil.Timestamp values."""
@@ -296,6 +314,12 @@ class TimestampCoder(FastCoder):
   def is_deterministic(self):
     return True
 
+  def __eq__(self, other):
+    return type(self) == type(other)
+
+  def __hash__(self):
+    return hash(type(self))
+
 
 class SingletonCoder(FastCoder):
   """A coder that always encodes exactly one value."""
@@ -309,6 +333,12 @@ class SingletonCoder(FastCoder):
   def is_deterministic(self):
     return True
 
+  def __eq__(self, other):
+    return type(self) == type(other) and self._value == other._value
+
+  def __hash__(self):
+    return hash(self._value)
+
 
 def maybe_dill_dumps(o):
   """Pickle using cPickle or the Dill pickler as a fallback."""
@@ -365,6 +395,12 @@ class _PickleCoderBase(FastCoder):
   def value_coder(self):
     return self
 
+  def __eq__(self, other):
+    return type(self) == type(other)
+
+  def __hash__(self):
+    return hash(type(self))
+
 
 class PickleCoder(_PickleCoderBase):
   """Coder using Python's pickle functionality."""
@@ -446,6 +482,12 @@ class FastPrimitivesCoder(FastCoder):
   def value_coder(self):
     return self
 
+  def __eq__(self, other):
+    return type(self) == type(other)
+
+  def __hash__(self):
+    return hash(type(self))
+
 
 class Base64PickleCoder(Coder):
   """Coder of objects by Python pickle, then base64 encoding."""
@@ -503,6 +545,13 @@ class ProtoCoder(FastCoder):
     # a Map.
     return False
 
+  def __eq__(self, other):
+    return (type(self) == type(other)
+            and self.proto_message_type == other.proto_message_type)
+
+  def __hash__(self):
+    return hash(self.proto_message_type)
+
   @staticmethod
   def from_type_hint(typehint, unused_registry):
     if issubclass(typehint, google.protobuf.message.Message):
@@ -563,6 +612,13 @@ class TupleCoder(FastCoder):
   def __repr__(self):
     return 'TupleCoder[%s]' % ', '.join(str(c) for c in self._coders)
 
+  def __eq__(self, other):
+    return (type(self) == type(other)
+            and self._coders == self._coders)
+
+  def __hash__(self):
+    return hash(self._coders)
+
 
 class TupleSequenceCoder(FastCoder):
   """Coder of homogeneous tuple objects."""
@@ -586,6 +642,13 @@ class TupleSequenceCoder(FastCoder):
   def __repr__(self):
     return 'TupleSequenceCoder[%r]' % self._elem_coder
 
+  def __eq__(self, other):
+    return (type(self) == type(other)
+            and self._elem_coder == self._elem_coder)
+
+  def __hash__(self):
+    return hash((type(self), self._elem_coder))
+
 
 class IterableCoder(FastCoder):
   """Coder of iterables of homogeneous objects."""
@@ -619,6 +682,13 @@ class IterableCoder(FastCoder):
   def __repr__(self):
     return 'IterableCoder[%r]' % self._elem_coder
 
+  def __eq__(self, other):
+    return (type(self) == type(other)
+            and self._elem_coder == self._elem_coder)
+
+  def __hash__(self):
+    return hash((type(self), self._elem_coder))
+
 
 class WindowCoder(PickleCoder):
   """Coder for windows in windowed values."""
@@ -663,6 +733,12 @@ class IntervalWindowCoder(FastCoder):
         '@type': 'kind:interval_window',
     }
 
+  def __eq__(self, other):
+    return type(self) == type(other)
+
+  def __hash__(self):
+    return hash(type(self))
+
 
 class WindowedValueCoder(FastCoder):
   """Coder for windowed values."""
@@ -709,6 +785,16 @@ class WindowedValueCoder(FastCoder):
   def __repr__(self):
     return 'WindowedValueCoder[%s]' % self.wrapped_value_coder
 
+  def __eq__(self, other):
+    return (type(self) == type(other)
+            and self.wrapped_value_coder == other.wrapped_value_coder
+            and self.timestamp_coder == other.timestamp_coder
+            and self.window_coder == other.window_coder)
+
+  def __hash__(self):
+    return hash(
+        (self.wrapped_value_coder, self.timestamp_coder, self.window_coder))
+
 
 class LengthPrefixCoder(FastCoder):
   """Coder which prefixes the length of the encoded object in the stream."""
@@ -740,3 +826,10 @@ class LengthPrefixCoder(FastCoder):
 
   def __repr__(self):
     return 'LengthPrefixCoder[%r]' % self._value_coder
+
+  def __eq__(self, other):
+    return (type(self) == type(other)
+            and self._value_coder == other._value_coder)
+
+  def __hash__(self):
+    return hash((type(self), self._value_coder))

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/pipeline.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py
index 9edcf9b..7db39a9 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py
@@ -52,14 +52,11 @@ import os
 import shutil
 import tempfile
 
-from apache_beam import coders
 from apache_beam import pvalue
 from apache_beam import typehints
 from apache_beam.internal import pickler
 from apache_beam.runners import create_runner
 from apache_beam.runners import PipelineRunner
-from apache_beam.runners.api import beam_runner_api_pb2
-from apache_beam.transforms import core
 from apache_beam.transforms import ptransform
 from apache_beam.typehints import TypeCheckError
 from apache_beam.utils.pipeline_options import PipelineOptions
@@ -443,62 +440,3 @@ class AppliedPTransform(object):
         if v not in visited:
           visited.add(v)
           visitor.visit_value(v, self)
-
-
-class PipelineContextMap(object):
-  """This is a bi-directional map between objects and ids.
-
-  Under the hood it encodes and decodes these objects into runner API
-  representations.
-  """
-  def __init__(self, context, obj_type, proto_map=None):
-    self._pipeline_context = context
-    self._obj_type = obj_type
-    self._obj_to_id = {}
-    self._id_to_obj = {}
-    self._id_to_proto = proto_map if proto_map else {}
-    self._counter = 0
-
-  def _unique_ref(self):
-    self._counter += 1
-    return "ref_%s_%s" % (self._obj_type.__name__, self._counter)
-
-  def populate_map(self, proto_map):
-    for id, obj in self._id_to_obj:
-      proto_map[id] = self._id_to_proto[id]
-
-  def get_id(self, obj):
-    if obj not in self._obj_to_id:
-      id = self._unique_ref()
-      self._id_to_obj[id] = obj
-      self._obj_to_id[obj] = id
-      self._id_to_proto[id] = obj.to_runner_api(self._pipeline_context)
-    return self._obj_to_id[obj]
-
-  def get_by_id(self, id):
-    if id not in self._id_to_obj:
-      self._id_to_obj[id] = self._obj_type.from_runner_api(
-        self._id_to_proto[id], self._pipeline_context)
-    return self._id_to_obj[id]
-
-
-class PipelineContext(object):
-
-  _COMPONENT_TYPES = {
-    'transforms': AppliedPTransform,
-    'pcollections': pvalue.PCollection,
-    'coders': coders.Coder,
-    'windowing_strategies': core.Windowing,
-    # TODO: environment
-  }
-
-  def __init__(self, context_proto=None):
-    for name, cls in self._COMPONENT_TYPES.items():
-      setattr(self, name,
-              PipelineContextMap(self, cls, getattr(context_proto, name, None)))
-
-  def to_runner_api(self):
-    context_proto = beam_runner_api_pb2.Components()
-    for name, cls in self._COMPONENT_TYPES:
-      getattr(self, name).populate_map(getattr(context_proto, name))
-    return context_proto

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/runners/pipeline_context.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py
new file mode 100644
index 0000000..4f82774
--- /dev/null
+++ b/sdks/python/apache_beam/runners/pipeline_context.py
@@ -0,0 +1,88 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from apache_beam import pipeline
+from apache_beam import pvalue
+from apache_beam import coders
+from apache_beam.runners.api import beam_runner_api_pb2
+from apache_beam.transforms import core
+
+
+class _PipelineContextMap(object):
+  """This is a bi-directional map between objects and ids.
+
+  Under the hood it encodes and decodes these objects into runner API
+  representations.
+  """
+  def __init__(self, context, obj_type, proto_map=None):
+    self._pipeline_context = context
+    self._obj_type = obj_type
+    self._obj_to_id = {}
+    self._id_to_obj = {}
+    self._id_to_proto = proto_map if proto_map else {}
+    self._counter = 0
+
+  def _unique_ref(self):
+    self._counter += 1
+    return "ref_%s_%s" % (self._obj_type.__name__, self._counter)
+
+  def populate_map(self, proto_map):
+    for id, proto in self._id_to_proto.items():
+      proto_map[id].CopyFrom(proto)
+
+  def get_id(self, obj):
+    if obj not in self._obj_to_id:
+      id = self._unique_ref()
+      self._id_to_obj[id] = obj
+      self._obj_to_id[obj] = id
+      self._id_to_proto[id] = obj.to_runner_api(self._pipeline_context)
+    return self._obj_to_id[obj]
+
+  def get_by_id(self, id):
+    if id not in self._id_to_obj:
+      self._id_to_obj[id] = self._obj_type.from_runner_api(
+          self._id_to_proto[id], self._pipeline_context)
+    return self._id_to_obj[id]
+
+
+class PipelineContext(object):
+  """Used for accessing and constructing the referenced objects of a Pipeline.
+  """
+
+  _COMPONENT_TYPES = {
+      'transforms': pipeline.AppliedPTransform,
+      'pcollections': pvalue.PCollection,
+      'coders': coders.Coder,
+      'windowing_strategies': core.Windowing,
+      # TODO: environment
+  }
+
+  def __init__(self, context_proto=None):
+    for name, cls in self._COMPONENT_TYPES.items():
+      setattr(
+          self, name, _PipelineContextMap(
+              self, cls, getattr(context_proto, name, None)))
+
+  @staticmethod
+  def from_runner_api(proto):
+    return PipelineContext(proto)
+
+  def to_runner_api(self):
+    context_proto = beam_runner_api_pb2.Components()
+    for name in self._COMPONENT_TYPES:
+      getattr(self, name).populate_map(getattr(context_proto, name))
+    return context_proto

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/runners/pipeline_context_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/pipeline_context_test.py b/sdks/python/apache_beam/runners/pipeline_context_test.py
new file mode 100644
index 0000000..6091ed8
--- /dev/null
+++ b/sdks/python/apache_beam/runners/pipeline_context_test.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Unit tests for the windowing classes."""
+
+import unittest
+
+from apache_beam import coders
+from apache_beam.runners import pipeline_context
+
+
+class PipelineContextTest(unittest.TestCase):
+
+  def test_deduplication(self):
+    context = pipeline_context.PipelineContext()
+    bytes_coder_ref = context.coders.get_id(coders.BytesCoder())
+    bytes_coder_ref2 = context.coders.get_id(coders.BytesCoder())
+    self.assertEqual(bytes_coder_ref, bytes_coder_ref2)
+
+  def test_serialization(self):
+    context = pipeline_context.PipelineContext()
+    float_coder_ref = context.coders.get_id(coders.FloatCoder())
+    bytes_coder_ref = context.coders.get_id(coders.BytesCoder())
+    proto = context.to_runner_api()
+    context2 = pipeline_context.PipelineContext.from_runner_api(proto)
+    self.assertEqual(
+        coders.FloatCoder(),
+        context2.coders.get_by_id(float_coder_ref))
+    self.assertEqual(
+        coders.BytesCoder(),
+        context2.coders.get_by_id(bytes_coder_ref))
+
+
+if __name__ == '__main__':
+  unittest.main()

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/transforms/core.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py
index 1fc63b2..3251671 100644
--- a/sdks/python/apache_beam/transforms/core.py
+++ b/sdks/python/apache_beam/transforms/core.py
@@ -1235,6 +1235,7 @@ class Windowing(object):
         trigger=self.triggerfn.to_runner_api(context),
         accumulation_mode=self.accumulation_mode,
         output_time=self.output_time_fn,
+        # TODO(robertwb): Support EMIT_IF_NONEMPTY
         closing_behavior=beam_runner_api_pb2.EMIT_ALWAYS,
         allowed_lateness=0)
 

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/transforms/trigger_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/trigger_test.py b/sdks/python/apache_beam/transforms/trigger_test.py
index cc9e0f5..827aa33 100644
--- a/sdks/python/apache_beam/transforms/trigger_test.py
+++ b/sdks/python/apache_beam/transforms/trigger_test.py
@@ -25,6 +25,7 @@ import unittest
 import yaml
 
 import apache_beam as beam
+from apache_beam.runners import pipeline_context
 from apache_beam.test_pipeline import TestPipeline
 from apache_beam.transforms import trigger
 from apache_beam.transforms.core import Windowing
@@ -392,22 +393,7 @@ class RunnerApiTest(unittest.TestCase):
         AfterWatermark(early=AfterCount(1000), late=AfterCount(1)),
         Repeatedly(AfterCount(100)),
         trigger.OrFinally(AfterCount(3), AfterCount(10))):
-      context = beam.pipeline.PipelineContext()
-      self.assertEqual(
-          trigger_fn,
-          TriggerFn.from_runner_api(trigger_fn.to_runner_api(context), context))
-
-  def test_windowing_strategy_encoding(self):
-    for trigger_fn in (
-        DefaultTrigger(),
-        AfterAll(AfterCount(1), AfterCount(10)),
-        AfterFirst(AfterCount(10), AfterCount(100)),
-        AfterEach(AfterCount(100), AfterCount(1000)),
-        AfterWatermark(early=AfterCount(1000)),
-        AfterWatermark(early=AfterCount(1000), late=AfterCount(1)),
-        Repeatedly(AfterCount(100)),
-        trigger.OrFinally(AfterCount(3), AfterCount(10))):
-      context = beam.pipeline.PipelineContext()
+      context = pipeline_context.PipelineContext()
       self.assertEqual(
           trigger_fn,
           TriggerFn.from_runner_api(trigger_fn.to_runner_api(context), context))

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/transforms/window.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/window.py b/sdks/python/apache_beam/transforms/window.py
index c763a96..3878dff 100644
--- a/sdks/python/apache_beam/transforms/window.py
+++ b/sdks/python/apache_beam/transforms/window.py
@@ -73,6 +73,7 @@ class OutputTimeFn(object):
   OUTPUT_AT_EOW = beam_runner_api_pb2.END_OF_WINDOW
   OUTPUT_AT_EARLIEST = beam_runner_api_pb2.EARLIEST_IN_PANE
   OUTPUT_AT_LATEST = beam_runner_api_pb2.LATEST_IN_PANE
+  # TODO(robertwb): Add this to the runner API or remove it.
   OUTPUT_AT_EARLIEST_TRANSFORMED = 'OUTPUT_AT_EARLIEST_TRANSFORMED'
 
   @staticmethod
@@ -167,7 +168,6 @@ class WindowFn(object):
     return pickler.loads(fn_parameter.value)
 
   def to_runner_api_parameter(self, context):
-    raise TypeError(self)
     return (urns.PICKLED_WINDOW_FN,
             wrappers_pb2.BytesValue(value=pickler.dumps(self)))
 

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/transforms/window_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/window_test.py b/sdks/python/apache_beam/transforms/window_test.py
index c79739a..99be02c 100644
--- a/sdks/python/apache_beam/transforms/window_test.py
+++ b/sdks/python/apache_beam/transforms/window_test.py
@@ -19,7 +19,7 @@
 
 import unittest
 
-from apache_beam import pipeline
+from apache_beam.runners import pipeline_context
 from apache_beam.test_pipeline import TestPipeline
 from apache_beam.transforms import CombinePerKey
 from apache_beam.transforms import combiners
@@ -238,7 +238,7 @@ class RunnerApiTest(unittest.TestCase):
                       FixedWindows(37),
                       SlidingWindows(2, 389),
                       Sessions(5077)):
-      context = pipeline.PipelineContext()
+      context = pipeline_context.PipelineContext()
       self.assertEqual(
           window_fn,
           WindowFn.from_runner_api(window_fn.to_runner_api(context), context))
@@ -251,7 +251,7 @@ class RunnerApiTest(unittest.TestCase):
         Windowing(SlidingWindows(10, 15, 21), AfterCount(28),
                   output_time_fn=OutputTimeFn.OUTPUT_AT_LATEST,
                   accumulation_mode=AccumulationMode.DISCARDING)):
-      context = pipeline.PipelineContext()
+      context = pipeline_context.PipelineContext()
       self.assertEqual(
           windowing,
           Windowing.from_runner_api(windowing.to_runner_api(context), context))

http://git-wip-us.apache.org/repos/asf/beam/blob/deff128f/sdks/python/apache_beam/utils/urns.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/utils/urns.py b/sdks/python/apache_beam/utils/urns.py
index 186c99c..936e2cb 100644
--- a/sdks/python/apache_beam/utils/urns.py
+++ b/sdks/python/apache_beam/utils/urns.py
@@ -21,4 +21,4 @@ FIXED_WINDOWS_FN = "beam:window_fn:fixed_windows:v0.1"
 SLIDING_WINDOWS_FN = "beam:window_fn:sliding_windows:v0.1"
 SESSION_WINDOWS_FN = "beam:window_fn:session_windows:v0.1"
 
-PICKLED_CODER = "dataflow:coder:pickled_python:v0.1"
+PICKLED_CODER = "beam:coder:pickled_python:v0.1"


[15/50] [abbrv] beam git commit: Introduce Flink-specific state GC implementations

Posted by ke...@apache.org.
Introduce Flink-specific state GC implementations

We now set the GC timer for window.maxTimestamp() + 1 to ensure that a
user timer set for window.maxTimestamp() still has all state.

This also adds tests for late data dropping and state GC specifically
for the Flink DoFnOperator.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1a8e1f74
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1a8e1f74
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1a8e1f74

Branch: refs/heads/gearpump-runner
Commit: 1a8e1f7463cbc7c6b5edfe1dbbc98502e5612511
Parents: bf6d274
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Fri Mar 10 11:07:00 2017 +0100
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri Mar 10 11:09:04 2017 +0100

----------------------------------------------------------------------
 .../apache/beam/runners/core/DoFnRunners.java   |  15 +-
 .../beam/runners/core/StatefulDoFnRunner.java   |  87 -------
 .../runners/core/StatefulDoFnRunnerTest.java    | 110 ++++++++-
 .../wrappers/streaming/DoFnOperator.java        | 111 ++++++++-
 .../flink/streaming/DoFnOperatorTest.java       | 225 +++++++++++++++++++
 5 files changed, 439 insertions(+), 109 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/1a8e1f74/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
index 9455eea..a1b7c8b 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
@@ -21,9 +21,6 @@ import java.util.List;
 import org.apache.beam.runners.core.ExecutionContext.StepContext;
 import org.apache.beam.runners.core.StatefulDoFnRunner.CleanupTimer;
 import org.apache.beam.runners.core.StatefulDoFnRunner.StateCleaner;
-import org.apache.beam.runners.core.StatefulDoFnRunner.StateInternalsStateCleaner;
-import org.apache.beam.runners.core.StatefulDoFnRunner.TimeInternalsCleanupTimer;
-import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.Aggregator;
 import org.apache.beam.sdk.transforms.DoFn;
@@ -135,18 +132,13 @@ public class DoFnRunners {
           DoFnRunner<InputT, OutputT> doFnRunner,
           StepContext stepContext,
           AggregatorFactory aggregatorFactory,
-          WindowingStrategy<?, ?> windowingStrategy) {
+          WindowingStrategy<?, ?> windowingStrategy,
+          CleanupTimer cleanupTimer,
+          StateCleaner<W> stateCleaner) {
     Aggregator<Long, Long> droppedDueToLateness = aggregatorFactory.createAggregatorForDoFn(
         fn.getClass(), stepContext, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER,
         Sum.ofLongs());
 
-    CleanupTimer cleanupTimer =
-        new TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
-
-    Coder<W> windowCoder = (Coder<W>) windowingStrategy.getWindowFn().windowCoder();
-    StateCleaner<W> stateCleaner =
-        new StateInternalsStateCleaner<>(fn, stepContext.stateInternals(), windowCoder);
-
     return new StatefulDoFnRunner<>(
         doFnRunner,
         windowingStrategy,
@@ -154,5 +146,4 @@ public class DoFnRunners {
         stateCleaner,
         droppedDueToLateness);
   }
-
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/1a8e1f74/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
index 926345e..c672902 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
@@ -17,12 +17,8 @@
  */
 package org.apache.beam.runners.core;
 
-import java.util.Map;
-import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.transforms.Aggregator;
 import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.NonMergingWindowFn;
 import org.apache.beam.sdk.transforms.windowing.WindowFn;
@@ -30,8 +26,6 @@ import org.apache.beam.sdk.util.TimeDomain;
 import org.apache.beam.sdk.util.WindowTracing;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.util.state.State;
-import org.apache.beam.sdk.util.state.StateSpec;
 import org.joda.time.Instant;
 
 /**
@@ -45,7 +39,6 @@ import org.joda.time.Instant;
 public class StatefulDoFnRunner<InputT, OutputT, W extends BoundedWindow>
     implements DoFnRunner<InputT, OutputT> {
 
-  public static final String GC_TIMER_ID = "__StatefulParDoGcTimerId";
   public static final String DROPPED_DUE_TO_LATENESS_COUNTER = "StatefulParDoDropped";
 
   private final DoFnRunner<InputT, OutputT> doFnRunner;
@@ -167,84 +160,4 @@ public class StatefulDoFnRunner<InputT, OutputT, W extends BoundedWindow>
 
     void clearForWindow(W window);
   }
-
-  /**
-   * A {@link CleanupTimer} implemented by TimerInternals.
-   */
-  public static class TimeInternalsCleanupTimer implements CleanupTimer {
-
-    private final TimerInternals timerInternals;
-    private final WindowingStrategy<?, ?> windowingStrategy;
-    private final Coder<BoundedWindow> windowCoder;
-
-    public TimeInternalsCleanupTimer(
-        TimerInternals timerInternals,
-        WindowingStrategy<?, ?> windowingStrategy) {
-      this.windowingStrategy = windowingStrategy;
-      WindowFn<?, ?> windowFn = windowingStrategy.getWindowFn();
-      windowCoder = (Coder<BoundedWindow>) windowFn.windowCoder();
-      this.timerInternals = timerInternals;
-    }
-
-    @Override
-    public Instant currentInputWatermarkTime() {
-      return timerInternals.currentInputWatermarkTime();
-    }
-
-    @Override
-    public void setForWindow(BoundedWindow window) {
-      Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
-      timerInternals.setTimer(StateNamespaces.window(windowCoder, window),
-          GC_TIMER_ID, gcTime, TimeDomain.EVENT_TIME);
-    }
-
-    @Override
-    public boolean isForWindow(
-        String timerId,
-        BoundedWindow window,
-        Instant timestamp,
-        TimeDomain timeDomain) {
-      boolean isEventTimer = timeDomain.equals(TimeDomain.EVENT_TIME);
-      Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
-      return isEventTimer && GC_TIMER_ID.equals(timerId) && gcTime.equals(timestamp);
-    }
-  }
-
-  /**
-   * A {@link StateCleaner} implemented by StateInternals.
-   */
-  public static class StateInternalsStateCleaner<W extends BoundedWindow>
-      implements StateCleaner<W> {
-
-    private final DoFn<?, ?> fn;
-    private final DoFnSignature signature;
-    private final StateInternals<?> stateInternals;
-    private final Coder<W> windowCoder;
-
-    public StateInternalsStateCleaner(
-        DoFn<?, ?> fn,
-        StateInternals<?> stateInternals,
-        Coder<W> windowCoder) {
-      this.fn = fn;
-      this.signature = DoFnSignatures.getSignature(fn.getClass());
-      this.stateInternals = stateInternals;
-      this.windowCoder = windowCoder;
-    }
-
-    @Override
-    public void clearForWindow(W window) {
-      for (Map.Entry<String, DoFnSignature.StateDeclaration> entry :
-          signature.stateDeclarations().entrySet()) {
-        try {
-          StateSpec<?, ?> spec = (StateSpec<?, ?>) entry.getValue().field().get(fn);
-          State state = stateInternals.state(StateNamespaces.window(windowCoder, window),
-              StateTags.tagForSpec(entry.getKey(), (StateSpec) spec));
-          state.clear();
-        } catch (IllegalAccessException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/1a8e1f74/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java
index 54ac77e..fd6a73c 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/StatefulDoFnRunnerTest.java
@@ -24,6 +24,7 @@ import static org.mockito.Mockito.when;
 
 import com.google.common.base.MoreObjects;
 import java.util.Collections;
+import java.util.Map;
 import org.apache.beam.runners.core.BaseExecutionContext.StepContext;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.VarIntCoder;
@@ -31,14 +32,18 @@ import org.apache.beam.sdk.transforms.Aggregator;
 import org.apache.beam.sdk.transforms.Combine;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
 import org.apache.beam.sdk.util.NullSideInputReader;
 import org.apache.beam.sdk.util.TimeDomain;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.state.State;
 import org.apache.beam.sdk.util.state.StateSpec;
 import org.apache.beam.sdk.util.state.StateSpecs;
 import org.apache.beam.sdk.util.state.ValueState;
@@ -114,7 +119,14 @@ public class StatefulDoFnRunnerTest {
     DoFn<KV<String, Integer>, Integer> fn = new MyDoFn();
 
     DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(
-        fn, getDoFnRunner(fn), mockStepContext, aggregatorFactory, WINDOWING_STRATEGY);
+        fn,
+        getDoFnRunner(fn),
+        mockStepContext,
+        aggregatorFactory,
+        WINDOWING_STRATEGY,
+        new TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY),
+        new StateInternalsStateCleaner<>(
+            fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
 
     runner.startBundle();
 
@@ -125,13 +137,6 @@ public class StatefulDoFnRunnerTest {
         WindowedValue.of(KV.of("hello", 1), timestamp, window, PaneInfo.NO_FIRING));
     assertEquals(1L, droppedDueToLateness.sum);
 
-    runner.onTimer("processTimer", window, timestamp, TimeDomain.PROCESSING_TIME);
-    assertEquals(2L, droppedDueToLateness.sum);
-
-    runner.onTimer("synchronizedProcessTimer", window, timestamp,
-        TimeDomain.SYNCHRONIZED_PROCESSING_TIME);
-    assertEquals(3L, droppedDueToLateness.sum);
-
     runner.finishBundle();
   }
 
@@ -143,7 +148,14 @@ public class StatefulDoFnRunnerTest {
     StateTag<Object, ValueState<Integer>> stateTag = StateTags.tagForSpec(fn.stateId, fn.intState);
 
     DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(
-        fn, getDoFnRunner(fn), mockStepContext, aggregatorFactory, WINDOWING_STRATEGY);
+        fn,
+        getDoFnRunner(fn),
+        mockStepContext,
+        aggregatorFactory,
+        WINDOWING_STRATEGY,
+        new TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY),
+        new StateInternalsStateCleaner<>(
+            fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
 
     Instant elementTime = new Instant(1);
 
@@ -252,4 +264,84 @@ public class StatefulDoFnRunnerTest {
     }
   }
 
+  /**
+   * A {@link StatefulDoFnRunner.CleanupTimer} implemented by TimerInternals.
+   */
+  public static class TimeInternalsCleanupTimer implements StatefulDoFnRunner.CleanupTimer {
+
+    public static final String GC_TIMER_ID = "__StatefulParDoGcTimerId";
+
+    private final TimerInternals timerInternals;
+    private final WindowingStrategy<?, ?> windowingStrategy;
+    private final Coder<BoundedWindow> windowCoder;
+
+    public TimeInternalsCleanupTimer(
+        TimerInternals timerInternals,
+        WindowingStrategy<?, ?> windowingStrategy) {
+      this.windowingStrategy = windowingStrategy;
+      WindowFn<?, ?> windowFn = windowingStrategy.getWindowFn();
+      windowCoder = (Coder<BoundedWindow>) windowFn.windowCoder();
+      this.timerInternals = timerInternals;
+    }
+
+    @Override
+    public Instant currentInputWatermarkTime() {
+      return timerInternals.currentInputWatermarkTime();
+    }
+
+    @Override
+    public void setForWindow(BoundedWindow window) {
+      Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+      timerInternals.setTimer(StateNamespaces.window(windowCoder, window),
+          GC_TIMER_ID, gcTime, TimeDomain.EVENT_TIME);
+    }
+
+    @Override
+    public boolean isForWindow(
+        String timerId,
+        BoundedWindow window,
+        Instant timestamp,
+        TimeDomain timeDomain) {
+      boolean isEventTimer = timeDomain.equals(TimeDomain.EVENT_TIME);
+      Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+      return isEventTimer && GC_TIMER_ID.equals(timerId) && gcTime.equals(timestamp);
+    }
+  }
+
+  /**
+   * A {@link StatefulDoFnRunner.StateCleaner} implemented by StateInternals.
+   */
+  public static class StateInternalsStateCleaner<W extends BoundedWindow>
+      implements StatefulDoFnRunner.StateCleaner<W> {
+
+    private final DoFn<?, ?> fn;
+    private final DoFnSignature signature;
+    private final StateInternals<?> stateInternals;
+    private final Coder<W> windowCoder;
+
+    public StateInternalsStateCleaner(
+        DoFn<?, ?> fn,
+        StateInternals<?> stateInternals,
+        Coder<W> windowCoder) {
+      this.fn = fn;
+      this.signature = DoFnSignatures.getSignature(fn.getClass());
+      this.stateInternals = stateInternals;
+      this.windowCoder = windowCoder;
+    }
+
+    @Override
+    public void clearForWindow(W window) {
+      for (Map.Entry<String, DoFnSignature.StateDeclaration> entry :
+          signature.stateDeclarations().entrySet()) {
+        try {
+          StateSpec<?, ?> spec = (StateSpec<?, ?>) entry.getValue().field().get(fn);
+          State state = stateInternals.state(StateNamespaces.window(windowCoder, window),
+              StateTags.tagForSpec(entry.getKey(), (StateSpec) spec));
+          state.clear();
+        } catch (IllegalAccessException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/1a8e1f74/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
index c4622ba..a8ce680 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
@@ -43,6 +43,7 @@ import org.apache.beam.runners.core.StateNamespaces;
 import org.apache.beam.runners.core.StateNamespaces.WindowNamespace;
 import org.apache.beam.runners.core.StateTag;
 import org.apache.beam.runners.core.StateTags;
+import org.apache.beam.runners.core.StatefulDoFnRunner;
 import org.apache.beam.runners.core.TimerInternals;
 import org.apache.beam.runners.core.TimerInternals.TimerData;
 import org.apache.beam.runners.flink.translation.types.CoderTypeSerializer;
@@ -61,13 +62,18 @@ import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.join.RawUnionValue;
 import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
 import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
 import org.apache.beam.sdk.util.NullSideInputReader;
 import org.apache.beam.sdk.util.SideInputReader;
 import org.apache.beam.sdk.util.TimeDomain;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.State;
+import org.apache.beam.sdk.util.state.StateSpec;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.flink.core.memory.DataInputViewStreamWrapper;
@@ -286,6 +292,7 @@ public class DoFnOperator<InputT, FnOutputT, OutputT>
       //
       // for some K, V
 
+
       doFnRunner = DoFnRunners.lateDataDroppingRunner(
           (DoFnRunner) doFnRunner,
           stepContext,
@@ -293,8 +300,27 @@ public class DoFnOperator<InputT, FnOutputT, OutputT>
           ((GroupAlsoByWindowViaWindowSetNewDoFn) doFn).getDroppedDueToLatenessAggregator());
     } else if (keyCoder != null) {
       // It is a stateful DoFn
+
+      StatefulDoFnRunner.CleanupTimer cleanupTimer =
+          new TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
+
+      // we don't know the window type
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      StatefulDoFnRunner.StateCleaner<?> stateCleaner =
+          new StateInternalsStateCleaner<>(
+              doFn, stepContext.stateInternals(), windowCoder);
+
       doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(
-          doFn, doFnRunner, stepContext, aggregatorFactory, windowingStrategy);
+          doFn,
+          doFnRunner,
+          stepContext,
+          aggregatorFactory,
+          windowingStrategy,
+          cleanupTimer,
+          stateCleaner);
     }
 
     pushbackDoFnRunner =
@@ -746,7 +772,90 @@ public class DoFnOperator<InputT, FnOutputT, OutputT>
     public Instant currentOutputWatermarkTime() {
       return new Instant(currentOutputWatermark);
     }
+  }
+
+
+  /**
+   * A {@link StatefulDoFnRunner.CleanupTimer} implemented by TimerInternals.
+   */
+  public static class TimeInternalsCleanupTimer implements StatefulDoFnRunner.CleanupTimer {
+
+    public static final String GC_TIMER_ID = "__StatefulParDoGcTimerId";
+
+    private final TimerInternals timerInternals;
+    private final WindowingStrategy<?, ?> windowingStrategy;
+    private final Coder<BoundedWindow> windowCoder;
+
+    public TimeInternalsCleanupTimer(
+        TimerInternals timerInternals,
+        WindowingStrategy<?, ?> windowingStrategy) {
+      this.windowingStrategy = windowingStrategy;
+      WindowFn<?, ?> windowFn = windowingStrategy.getWindowFn();
+      windowCoder = (Coder<BoundedWindow>) windowFn.windowCoder();
+      this.timerInternals = timerInternals;
+    }
 
+    @Override
+    public Instant currentInputWatermarkTime() {
+      return timerInternals.currentInputWatermarkTime();
+    }
+
+    @Override
+    public void setForWindow(BoundedWindow window) {
+      Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+      // make sure this fires after any window.maxTimestamp() timers
+      gcTime = gcTime.plus(1L);
+      timerInternals.setTimer(StateNamespaces.window(windowCoder, window),
+          GC_TIMER_ID, gcTime, TimeDomain.EVENT_TIME);
+    }
+
+    @Override
+    public boolean isForWindow(
+        String timerId,
+        BoundedWindow window,
+        Instant timestamp,
+        TimeDomain timeDomain) {
+      boolean isEventTimer = timeDomain.equals(TimeDomain.EVENT_TIME);
+      Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+      gcTime = gcTime.plus(1L);
+      return isEventTimer && GC_TIMER_ID.equals(timerId) && gcTime.equals(timestamp);
+    }
   }
 
+  /**
+   * A {@link StatefulDoFnRunner.StateCleaner} implemented by StateInternals.
+   */
+  public static class StateInternalsStateCleaner<W extends BoundedWindow>
+      implements StatefulDoFnRunner.StateCleaner<W> {
+
+    private final DoFn<?, ?> fn;
+    private final DoFnSignature signature;
+    private final StateInternals<?> stateInternals;
+    private final Coder<W> windowCoder;
+
+    public StateInternalsStateCleaner(
+        DoFn<?, ?> fn,
+        StateInternals<?> stateInternals,
+        Coder<W> windowCoder) {
+      this.fn = fn;
+      this.signature = DoFnSignatures.getSignature(fn.getClass());
+      this.stateInternals = stateInternals;
+      this.windowCoder = windowCoder;
+    }
+
+    @Override
+    public void clearForWindow(W window) {
+      for (Map.Entry<String, DoFnSignature.StateDeclaration> entry :
+          signature.stateDeclarations().entrySet()) {
+        try {
+          StateSpec<?, ?> spec = (StateSpec<?, ?>) entry.getValue().field().get(fn);
+          State state = stateInternals.state(StateNamespaces.window(windowCoder, window),
+              StateTags.tagForSpec(entry.getKey(), (StateSpec) spec));
+          state.clear();
+        } catch (IllegalAccessException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/1a8e1f74/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
index 7d14a87..bbd3428 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
@@ -17,7 +17,9 @@
  */
 package org.apache.beam.runners.flink.streaming;
 
+import static org.hamcrest.Matchers.emptyIterable;
 import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThat;
 
 import com.google.common.base.Function;
@@ -29,9 +31,12 @@ import java.util.Collections;
 import java.util.HashMap;
 import javax.annotation.Nullable;
 import org.apache.beam.runners.flink.FlinkPipelineOptions;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
 import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VarIntCoder;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.testing.PCollectionViewTesting;
 import org.apache.beam.sdk.transforms.DoFn;
@@ -40,14 +45,23 @@ import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.Timer;
+import org.apache.beam.sdk.util.TimerSpec;
+import org.apache.beam.sdk.util.TimerSpecs;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.state.StateSpec;
+import org.apache.beam.sdk.util.state.StateSpecs;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 
 import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 import org.apache.flink.api.java.functions.KeySelector;
 import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness;
 import org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness;
 import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
 import org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness;
@@ -169,6 +183,217 @@ public class DoFnOperatorTest {
     testHarness.close();
   }
 
+  @Test
+  public void testLateDroppingForStatefulFn() throws Exception {
+
+    WindowingStrategy<Object, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(new Duration(10)));
+
+    DoFn<Integer, String> fn = new DoFn<Integer, String>() {
+
+      @StateId("state")
+      private final StateSpec<Object, ValueState<String>> stateSpec =
+          StateSpecs.value(StringUtf8Coder.of());
+
+      @ProcessElement
+      public void processElement(ProcessContext context) {
+        context.output(context.element().toString());
+      }
+    };
+
+    WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            VarIntCoder.of(),
+            windowingStrategy.getWindowFn().windowCoder());
+
+    TupleTag<String> outputTag = new TupleTag<>("main-output");
+
+    DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(
+        fn,
+        windowedValueCoder,
+        outputTag,
+        Collections.<TupleTag<?>>emptyList(),
+        new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(),
+        windowingStrategy,
+        new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+        Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
+        VarIntCoder.of() /* key coder */);
+
+    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness =
+        new KeyedOneInputStreamOperatorTestHarness<>(
+            doFnOperator,
+            new KeySelector<WindowedValue<Integer>, Integer>() {
+              @Override
+              public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
+                return integerWindowedValue.getValue();
+              }
+            },
+            new CoderTypeInformation<>(VarIntCoder.of()));
+
+    testHarness.open();
+
+    testHarness.processWatermark(0);
+
+    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
+
+    // this should not be late
+    testHarness.processElement(
+        new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    testHarness.getOutput().clear();
+
+    testHarness.processWatermark(9);
+
+    // this should still not be considered late
+    testHarness.processElement(
+        new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    testHarness.getOutput().clear();
+
+    testHarness.processWatermark(10);
+
+    // this should now be considered late
+    testHarness.processElement(
+        new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        emptyIterable());
+
+    testHarness.close();
+  }
+
+  @Test
+  public void testStateGCForStatefulFn() throws Exception {
+
+    WindowingStrategy<Object, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(new Duration(10)));
+
+    final String timerId = "boo";
+    final String stateId = "dazzle";
+
+    final int offset = 5000;
+    final int timerOutput = 4093;
+
+    DoFn<KV<String, Integer>, KV<String, Integer>> fn =
+        new DoFn<KV<String, Integer>, KV<String, Integer>>() {
+
+          @TimerId(timerId)
+          private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+          @StateId(stateId)
+          private final StateSpec<Object, ValueState<String>> stateSpec =
+              StateSpecs.value(StringUtf8Coder.of());
+
+          @ProcessElement
+          public void processElement(
+              ProcessContext context,
+              @TimerId(timerId) Timer timer,
+              @StateId(stateId) ValueState<String> state,
+              BoundedWindow window) {
+            timer.set(window.maxTimestamp());
+            state.write(context.element().getKey());
+            context.output(
+                KV.of(context.element().getKey(), context.element().getValue() + offset));
+          }
+
+          @OnTimer(timerId)
+          public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
+            context.output(KV.of(state.read(), timerOutput));
+          }
+        };
+
+    WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
+            windowingStrategy.getWindowFn().windowCoder());
+
+    TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
+
+    DoFnOperator<
+        KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator =
+        new DoFnOperator<>(
+            fn,
+            windowedValueCoder,
+            outputTag,
+            Collections.<TupleTag<?>>emptyList(),
+            new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(),
+            windowingStrategy,
+            new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+            Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+            PipelineOptionsFactory.as(FlinkPipelineOptions.class),
+            StringUtf8Coder.of() /* key coder */);
+
+    KeyedOneInputStreamOperatorTestHarness<
+        String,
+        WindowedValue<KV<String, Integer>>,
+        WindowedValue<KV<String, Integer>>> testHarness =
+        new KeyedOneInputStreamOperatorTestHarness<>(
+            doFnOperator,
+            new KeySelector<WindowedValue<KV<String, Integer>>, String>() {
+              @Override
+              public String getKey(
+                  WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
+                return kvWindowedValue.getValue().getKey();
+              }
+            },
+            new CoderTypeInformation<>(StringUtf8Coder.of()));
+
+    testHarness.open();
+
+    testHarness.processWatermark(0);
+
+    assertEquals(0, testHarness.numKeyedStateEntries());
+
+    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
+
+    testHarness.processElement(
+        new StreamRecord<>(
+            WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
+
+    testHarness.processElement(
+        new StreamRecord<>(
+            WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(
+            WindowedValue.of(
+                KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
+
+    assertEquals(2, testHarness.numKeyedStateEntries());
+
+    testHarness.getOutput().clear();
+
+    // this should trigger both the window.maxTimestamp() timer and the GC timer
+    // this tests that the GC timer fires after the user timer
+    testHarness.processWatermark(15);
+
+    assertThat(
+        this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(
+            WindowedValue.of(
+                KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
+
+    // ensure the state was garbage collected
+    assertEquals(0, testHarness.numKeyedStateEntries());
+
+    testHarness.close();
+  }
+
   public void testSideInputs(boolean keyed) throws Exception {
 
     WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =


[24/50] [abbrv] beam git commit: Remove Pipeline.getRunner

Posted by ke...@apache.org.
Remove Pipeline.getRunner

Runners need not be instantiated until after pipeline construction, so
they should not be exposed by the Pipeline class.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d41fe1df
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d41fe1df
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d41fe1df

Branch: refs/heads/gearpump-runner
Commit: d41fe1df26329479b82cc59d260998f2b88b4799
Parents: 2c2424c
Author: Thomas Groh <tg...@google.com>
Authored: Thu Mar 2 10:54:29 2017 -0800
Committer: Thomas Groh <tg...@google.com>
Committed: Fri Mar 10 09:40:50 2017 -0800

----------------------------------------------------------------------
 .../beam/runners/direct/DirectRunner.java       | 95 ++++++++++----------
 .../direct/TestStreamEvaluatorFactory.java      | 22 +++--
 .../direct/TestStreamEvaluatorFactoryTest.java  |  6 +-
 .../BatchStatefulParDoOverridesTest.java        |  4 +-
 .../DataflowPipelineTranslatorTest.java         | 39 ++++----
 .../runners/dataflow/DataflowRunnerTest.java    |  4 +-
 .../testing/TestDataflowRunnerTest.java         | 50 +++++------
 .../main/java/org/apache/beam/sdk/Pipeline.java |  7 --
 8 files changed, 110 insertions(+), 117 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
index f56d225..ce8dbc0 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
@@ -70,53 +70,6 @@ import org.joda.time.Instant;
  */
 public class DirectRunner extends PipelineRunner<DirectPipelineResult> {
   /**
-   * The default set of transform overrides to use in the {@link DirectRunner}.
-   *
-   * <p>The order in which overrides is applied is important, as some overrides are expanded into a
-   * composite. If the composite contains {@link PTransform PTransforms} which are also overridden,
-   * these PTransforms must occur later in the iteration order. {@link ImmutableMap} has an
-   * iteration order based on the order at which elements are added to it.
-   */
-  @SuppressWarnings("rawtypes")
-  private static Map<PTransformMatcher, PTransformOverrideFactory> defaultTransformOverrides =
-      ImmutableMap.<PTransformMatcher, PTransformOverrideFactory>builder()
-          .put(
-              PTransformMatchers.writeWithRunnerDeterminedSharding(),
-              new WriteWithShardingFactory()) /* Uses a view internally. */
-          .put(
-              PTransformMatchers.classEqualTo(CreatePCollectionView.class),
-              new ViewOverrideFactory()) /* Uses pardos and GBKs */
-          .put(
-              PTransformMatchers.classEqualTo(TestStream.class),
-              new DirectTestStreamFactory()) /* primitive */
-          /* Single-output ParDos are implemented in terms of Multi-output ParDos. Any override
-          that is applied to a multi-output ParDo must first have all matching Single-output ParDos
-          converted to match.
-           */
-          .put(PTransformMatchers.splittableParDoSingle(), new ParDoSingleViaMultiOverrideFactory())
-          .put(
-              PTransformMatchers.stateOrTimerParDoSingle(),
-              new ParDoSingleViaMultiOverrideFactory())
-          // SplittableParMultiDo is implemented in terms of nonsplittable single ParDos
-          .put(PTransformMatchers.splittableParDoMulti(), new ParDoMultiOverrideFactory())
-          // state and timer pardos are implemented in terms of nonsplittable single ParDos
-          .put(PTransformMatchers.stateOrTimerParDoMulti(), new ParDoMultiOverrideFactory())
-          .put(
-              PTransformMatchers.classEqualTo(ParDo.Bound.class),
-              new ParDoSingleViaMultiOverrideFactory()) /* returns a BoundMulti */
-          .put(
-              PTransformMatchers.classEqualTo(BoundMulti.class),
-              /* returns one of two primitives; SplittableParDos are replaced above. */
-              new ParDoMultiOverrideFactory())
-          .put(
-              PTransformMatchers.classEqualTo(GBKIntoKeyedWorkItems.class),
-              new DirectGBKIntoKeyedWorkItemsOverrideFactory()) /* Returns a GBKO */
-          .put(
-              PTransformMatchers.classEqualTo(GroupByKey.class),
-              new DirectGroupByKeyOverrideFactory()) /* returns two chained primitives. */
-          .build();
-
-  /**
    * Part of a {@link PCollection}. Elements are output to a bundle, which will cause them to be
    * executed by {@link PTransform PTransforms} that consume the {@link PCollection} this bundle is
    * a part of at a later point. This is an uncommitted bundle and can have elements added to it.
@@ -309,7 +262,7 @@ public class DirectRunner extends PipelineRunner<DirectPipelineResult> {
   @Override
   public DirectPipelineResult run(Pipeline pipeline) {
     for (Map.Entry<PTransformMatcher, PTransformOverrideFactory> override :
-        defaultTransformOverrides.entrySet()) {
+        defaultTransformOverrides().entrySet()) {
       pipeline.replace(override.getKey(), override.getValue());
     }
     MetricsEnvironment.setMetricsSupported(true);
@@ -361,6 +314,52 @@ public class DirectRunner extends PipelineRunner<DirectPipelineResult> {
   }
 
   /**
+   * The default set of transform overrides to use in the {@link DirectRunner}.
+   *
+   * <p>The order in which overrides is applied is important, as some overrides are expanded into a
+   * composite. If the composite contains {@link PTransform PTransforms} which are also overridden,
+   * these PTransforms must occur later in the iteration order. {@link ImmutableMap} has an
+   * iteration order based on the order at which elements are added to it.
+   */
+  @SuppressWarnings("rawtypes")
+  private Map<PTransformMatcher, PTransformOverrideFactory> defaultTransformOverrides() {
+    return ImmutableMap.<PTransformMatcher, PTransformOverrideFactory>builder()
+        .put(
+            PTransformMatchers.writeWithRunnerDeterminedSharding(),
+            new WriteWithShardingFactory()) /* Uses a view internally. */
+        .put(
+            PTransformMatchers.classEqualTo(CreatePCollectionView.class),
+            new ViewOverrideFactory()) /* Uses pardos and GBKs */
+        .put(
+            PTransformMatchers.classEqualTo(TestStream.class),
+            new DirectTestStreamFactory(this)) /* primitive */
+        /* Single-output ParDos are implemented in terms of Multi-output ParDos. Any override
+        that is applied to a multi-output ParDo must first have all matching Single-output ParDos
+        converted to match.
+         */
+        .put(PTransformMatchers.splittableParDoSingle(), new ParDoSingleViaMultiOverrideFactory())
+        .put(PTransformMatchers.stateOrTimerParDoSingle(), new ParDoSingleViaMultiOverrideFactory())
+        // SplittableParMultiDo is implemented in terms of nonsplittable single ParDos
+        .put(PTransformMatchers.splittableParDoMulti(), new ParDoMultiOverrideFactory())
+        // state and timer pardos are implemented in terms of nonsplittable single ParDos
+        .put(PTransformMatchers.stateOrTimerParDoMulti(), new ParDoMultiOverrideFactory())
+        .put(
+            PTransformMatchers.classEqualTo(ParDo.Bound.class),
+            new ParDoSingleViaMultiOverrideFactory()) /* returns a BoundMulti */
+        .put(
+            PTransformMatchers.classEqualTo(BoundMulti.class),
+            /* returns one of two primitives; SplittableParDos are replaced above. */
+            new ParDoMultiOverrideFactory())
+        .put(
+            PTransformMatchers.classEqualTo(GBKIntoKeyedWorkItems.class),
+            new DirectGBKIntoKeyedWorkItemsOverrideFactory()) /* Returns a GBKO */
+        .put(
+            PTransformMatchers.classEqualTo(GroupByKey.class),
+            new DirectGroupByKeyOverrideFactory()) /* returns two chained primitives. */
+        .build();
+  }
+
+  /**
    * The result of running a {@link Pipeline} with the {@link DirectRunner}.
    *
    * <p>Throws {@link UnsupportedOperationException} for all methods.

http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactory.java
index 628aa23..0dd8919 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactory.java
@@ -18,8 +18,6 @@
 
 package org.apache.beam.runners.direct;
 
-import static com.google.common.base.Preconditions.checkState;
-
 import com.google.auto.value.AutoValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Supplier;
@@ -35,7 +33,6 @@ import org.apache.beam.runners.direct.DirectRunner.CommittedBundle;
 import org.apache.beam.runners.direct.DirectRunner.UncommittedBundle;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.runners.PTransformOverrideFactory;
-import org.apache.beam.sdk.runners.PipelineRunner;
 import org.apache.beam.sdk.testing.TestStream;
 import org.apache.beam.sdk.testing.TestStream.ElementEvent;
 import org.apache.beam.sdk.testing.TestStream.Event;
@@ -166,11 +163,16 @@ class TestStreamEvaluatorFactory implements TransformEvaluatorFactory {
 
   static class DirectTestStreamFactory<T>
       implements PTransformOverrideFactory<PBegin, PCollection<T>, TestStream<T>> {
+    private final DirectRunner runner;
+
+    DirectTestStreamFactory(DirectRunner runner) {
+      this.runner = runner;
+    }
 
     @Override
     public PTransform<PBegin, PCollection<T>> getReplacementTransform(
         TestStream<T> transform) {
-      return new DirectTestStream<>(transform);
+      return new DirectTestStream<>(runner, transform);
     }
 
     @Override
@@ -185,22 +187,18 @@ class TestStreamEvaluatorFactory implements TransformEvaluatorFactory {
     }
 
     static class DirectTestStream<T> extends PTransform<PBegin, PCollection<T>> {
+      private final transient DirectRunner runner;
       private final TestStream<T> original;
 
       @VisibleForTesting
-      DirectTestStream(TestStream<T> transform) {
+      DirectTestStream(DirectRunner runner, TestStream<T> transform) {
+        this.runner = runner;
         this.original = transform;
       }
 
       @Override
       public PCollection<T> expand(PBegin input) {
-        PipelineRunner<?> runner = input.getPipeline().getRunner();
-        checkState(
-            runner instanceof DirectRunner,
-            "%s can only be used when running with the %s",
-            getClass().getSimpleName(),
-            DirectRunner.class.getSimpleName());
-        ((DirectRunner) runner).setClockSupplier(new TestClockSupplier());
+        runner.setClockSupplier(new TestClockSupplier());
         return PCollection.<T>createPrimitiveOutputInternal(
                 input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED)
             .setCoder(original.getValueCoder());

http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/runners/direct-java/src/test/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactoryTest.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactoryTest.java b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactoryTest.java
index 9ed72d5..fc689fe 100644
--- a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactoryTest.java
+++ b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/TestStreamEvaluatorFactoryTest.java
@@ -60,10 +60,12 @@ public class TestStreamEvaluatorFactoryTest {
 
   @Rule
   public TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false);
+  private DirectRunner runner;
 
   @Before
   public void setup() {
     context = mock(EvaluationContext.class);
+    runner = DirectRunner.fromOptions(TestPipeline.testingPipelineOptions());
     factory = new TestStreamEvaluatorFactory(context);
     bundleFactory = ImmutableListBundleFactory.create();
   }
@@ -80,7 +82,7 @@ public class TestStreamEvaluatorFactoryTest {
         .advanceProcessingTime(Duration.standardMinutes(10))
         .advanceWatermarkToInfinity();
     PCollection<Integer> streamVals =
-        p.apply(new DirectTestStream<Integer>(testStream));
+        p.apply(new DirectTestStream<Integer>(runner, testStream));
 
     TestClock clock = new TestClock();
     when(context.getClock()).thenReturn(clock);
@@ -180,7 +182,7 @@ public class TestStreamEvaluatorFactoryTest {
 
   @Test
   public void overrideFactoryGetInputSucceeds() {
-    DirectTestStreamFactory<?> factory = new DirectTestStreamFactory<>();
+    DirectTestStreamFactory<?> factory = new DirectTestStreamFactory<>(runner);
     PBegin begin = factory.getInput(Collections.<TaggedPValue>emptyList(), p);
     assertThat(begin.getPipeline(), Matchers.<Pipeline>equalTo(p));
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
index ef3e414..899902a 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
@@ -71,7 +71,7 @@ public class BatchStatefulParDoOverridesTest implements Serializable {
     DummyStatefulDoFn fn = new DummyStatefulDoFn();
     pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn));
 
-    DataflowRunner runner = (DataflowRunner) pipeline.getRunner();
+    DataflowRunner runner = DataflowRunner.fromOptions(options);
     runner.replaceTransforms(pipeline);
     assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
   }
@@ -89,7 +89,7 @@ public class BatchStatefulParDoOverridesTest implements Serializable {
         .apply(Create.of(KV.of(1, 2)))
         .apply(ParDo.withOutputTags(mainOutputTag, TupleTagList.empty()).of(fn));
 
-    DataflowRunner runner = (DataflowRunner) pipeline.getRunner();
+    DataflowRunner runner = DataflowRunner.fromOptions(options);
     runner.replaceTransforms(pipeline);
     assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
index 660e92e..813e76d 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
@@ -190,7 +190,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     Map<String, Object> sdkPipelineOptions = job.getEnvironment().getSdkPipelineOptions();
@@ -223,7 +223,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -240,7 +240,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -259,7 +259,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -276,7 +276,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -292,7 +292,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -328,7 +328,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -363,7 +363,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -397,7 +397,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -417,7 +417,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -438,7 +438,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     Job job =
         DataflowPipelineTranslator.fromOptions(options)
             .translate(
-                p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList())
+                p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList())
             .getJob();
 
     assertEquals(1, job.getEnvironment().getWorkerPools().size());
@@ -653,7 +653,8 @@ public class DataflowPipelineTranslatorTest implements Serializable {
 
   @Test
   public void testMultiGraphPipelineSerialization() throws Exception {
-    Pipeline p = Pipeline.create(buildPipelineOptions());
+    DataflowPipelineOptions options = buildPipelineOptions();
+    Pipeline p = Pipeline.create(options);
 
     PCollection<Integer> input = p.begin()
         .apply(Create.of(1, 2, 3));
@@ -666,7 +667,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
 
     // Check that translation doesn't fail.
     JobSpecification jobSpecification = t.translate(
-        p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList());
+        p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList());
     assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
   }
 
@@ -710,7 +711,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     // Check that translation doesn't fail.
     JobSpecification jobSpecification = t.translate(
         pipeline,
-        (DataflowRunner) pipeline.getRunner(),
+        DataflowRunner.fromOptions(options),
         Collections.<DataflowPackage>emptyList());
     assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
   }
@@ -737,7 +738,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
         ThrowableMessageMatcher.hasMessage(containsString("Unsupported wildcard usage"))));
     t.translate(
         pipeline,
-        (DataflowRunner) pipeline.getRunner(),
+        DataflowRunner.fromOptions(options),
         Collections.<DataflowPackage>emptyList());
   }
 
@@ -764,7 +765,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     // Check that translation does not fail.
     t.translate(
         pipeline,
-        (DataflowRunner) pipeline.getRunner(),
+        DataflowRunner.fromOptions(options),
         Collections.<DataflowPackage>emptyList());
   }
 
@@ -785,7 +786,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
         translator
             .translate(
                 pipeline,
-                (DataflowRunner) pipeline.getRunner(),
+                DataflowRunner.fromOptions(options),
                 Collections.<DataflowPackage>emptyList())
             .getJob();
     assertAllStepOutputsHaveUniqueIds(job);
@@ -817,7 +818,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
         translator
             .translate(
                 pipeline,
-                (DataflowRunner) pipeline.getRunner(),
+                DataflowRunner.fromOptions(options),
                 Collections.<DataflowPackage>emptyList())
             .getJob();
     assertAllStepOutputsHaveUniqueIds(job);
@@ -1011,7 +1012,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
         translator
             .translate(
                 pipeline,
-                (DataflowRunner) pipeline.getRunner(),
+                DataflowRunner.fromOptions(options),
                 Collections.<DataflowPackage>emptyList())
             .getJob();
     assertAllStepOutputsHaveUniqueIds(job);

http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
index a788077..a4031d1 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
@@ -951,7 +951,7 @@ public class DataflowRunnerTest {
     thrown.expectMessage(Matchers.containsString("no translator registered"));
     DataflowPipelineTranslator.fromOptions(options)
         .translate(
-            p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList());
+            p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList());
 
     ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
     Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
@@ -989,7 +989,7 @@ public class DataflowRunnerTest {
         });
 
     translator.translate(
-        p, (DataflowRunner) p.getRunner(), Collections.<DataflowPackage>emptyList());
+        p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList());
     assertTrue(transform.translated);
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/testing/TestDataflowRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/testing/TestDataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/testing/TestDataflowRunnerTest.java
index 1e906d2..d3eccbb 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/testing/TestDataflowRunnerTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/testing/TestDataflowRunnerTest.java
@@ -140,7 +140,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     when(request.execute()).thenReturn(generateMockMetricResponse(true /* success */,
         true /* tentative */, null /* additionalMetrics */));
     assertEquals(mockJob, runner.run(p, mockRunner));
@@ -160,7 +160,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     when(request.execute()).thenReturn(generateMockMetricResponse(false /* success */,
         false /* tentative */, null /* additionalMetrics */));
     try {
@@ -202,7 +202,7 @@ public class TestDataflowRunnerTest {
 
     when(request.execute()).thenReturn(generateMockMetricResponse(false /* success */,
         true /* tentative */, null /* additionalMetrics */));
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     try {
       runner.run(p, mockRunner);
     } catch (AssertionError expected) {
@@ -233,7 +233,7 @@ public class TestDataflowRunnerTest {
     when(request.execute())
         .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */,
             ImmutableMap.of(WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK)));
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     runner.run(p, mockRunner);
   }
 
@@ -254,7 +254,7 @@ public class TestDataflowRunnerTest {
     when(request.execute())
         .thenReturn(generateMockStreamingMetricResponse(
             ImmutableMap.of(WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK)));
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     runner.run(p, mockRunner);
   }
 
@@ -275,7 +275,7 @@ public class TestDataflowRunnerTest {
 
     when(request.execute()).thenReturn(generateMockMetricResponse(false /* success */,
         true /* tentative */, null /* additionalMetrics */));
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     try {
       runner.run(p, mockRunner);
     } catch (AssertionError expected) {
@@ -350,7 +350,7 @@ public class TestDataflowRunnerTest {
     PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
     PAssert.that(pc).containsInAnyOrder(1, 2, 3);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     doReturn(State.DONE).when(job).getState();
     JobMetrics metrics = buildJobMetrics(
         generateMockMetrics(true /* success */, true /* tentative */));
@@ -364,7 +364,7 @@ public class TestDataflowRunnerTest {
     PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
     PAssert.that(pc).containsInAnyOrder(1, 2, 3);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     doReturn(State.DONE).when(job).getState();
     JobMetrics metrics = buildJobMetrics(
         generateMockMetrics(false /* success */, true /* tentative */));
@@ -392,7 +392,7 @@ public class TestDataflowRunnerTest {
     Pipeline p = TestPipeline.create(options);
     p.apply(Create.of(1, 2, 3));
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = buildJobMetrics(generateMockStreamingMetrics(
         ImmutableMap.of("no-watermark", new BigDecimal(100))));
     doReturn(State.RUNNING).when(job).getState();
@@ -405,7 +405,7 @@ public class TestDataflowRunnerTest {
     Pipeline p = TestPipeline.create(options);
     p.apply(Create.of(1, 2, 3));
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = buildJobMetrics(generateMockStreamingMetrics(
         ImmutableMap.of(WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK)));
     doReturn(State.RUNNING).when(job).getState();
@@ -418,7 +418,7 @@ public class TestDataflowRunnerTest {
     Pipeline p = TestPipeline.create(options);
     p.apply(Create.of(1, 2, 3));
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = buildJobMetrics(generateMockStreamingMetrics(
         ImmutableMap.of(LEGACY_WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK)));
     doReturn(State.RUNNING).when(job).getState();
@@ -431,7 +431,7 @@ public class TestDataflowRunnerTest {
     Pipeline p = TestPipeline.create(options);
     p.apply(Create.of(1, 2, 3));
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = buildJobMetrics(generateMockStreamingMetrics
         (ImmutableMap.of(WATERMARK_METRIC_SUFFIX, new BigDecimal(100))));
     doReturn(State.RUNNING).when(job).getState();
@@ -444,7 +444,7 @@ public class TestDataflowRunnerTest {
     Pipeline p = TestPipeline.create(options);
     p.apply(Create.of(1, 2, 3));
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = buildJobMetrics(generateMockStreamingMetrics(
         ImmutableMap.of("one" + WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK,
             "two" + WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK)));
@@ -458,7 +458,7 @@ public class TestDataflowRunnerTest {
     Pipeline p = TestPipeline.create(options);
     p.apply(Create.of(1, 2, 3));
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = buildJobMetrics(generateMockStreamingMetrics(
         ImmutableMap.of("one" + WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK,
             "two" + WATERMARK_METRIC_SUFFIX, new BigDecimal(100))));
@@ -472,7 +472,7 @@ public class TestDataflowRunnerTest {
     Pipeline p = TestPipeline.create(options);
     p.apply(Create.of(1, 2, 3));
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = buildJobMetrics(generateMockStreamingMetrics(
         ImmutableMap.of("one" + WATERMARK_METRIC_SUFFIX, DEFAULT_MAX_WATERMARK,
             "no-watermark", new BigDecimal(100))));
@@ -487,7 +487,7 @@ public class TestDataflowRunnerTest {
     PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
     PAssert.that(pc).containsInAnyOrder(1, 2, 3);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     doReturn(State.FAILED).when(job).getState();
     assertEquals(Optional.of(false), runner.checkForPAssertSuccess(job, null /* metrics */));
   }
@@ -522,7 +522,7 @@ public class TestDataflowRunnerTest {
 
     when(request.execute()).thenReturn(generateMockMetricResponse(false /* success */,
         true /* tentative */, null /* additionalMetrics */));
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     try {
       runner.run(p, mockRunner);
     } catch (AssertionError expected) {
@@ -543,7 +543,7 @@ public class TestDataflowRunnerTest {
 
     when(request.execute()).thenReturn(generateMockMetricResponse(true /* success */,
         true /* tentative */, null /* additionalMetrics */));
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     JobMetrics metrics = runner.getJobMetrics(job);
 
     assertEquals(1, metrics.getMetrics().size());
@@ -558,7 +558,7 @@ public class TestDataflowRunnerTest {
     p.apply(Create.of(1, 2, 3));
 
     when(request.execute()).thenThrow(new IOException());
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     assertNull(runner.getJobMetrics(job));
   }
 
@@ -576,7 +576,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     p.getOptions().as(TestPipelineOptions.class)
         .setOnCreateMatcher(new TestSuccessMatcher(mockJob, 0));
 
@@ -600,7 +600,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     p.getOptions().as(TestPipelineOptions.class)
         .setOnCreateMatcher(new TestSuccessMatcher(mockJob, 0));
 
@@ -627,7 +627,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     p.getOptions().as(TestPipelineOptions.class)
         .setOnSuccessMatcher(new TestSuccessMatcher(mockJob, 1));
 
@@ -651,7 +651,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     p.getOptions().as(TestPipelineOptions.class)
         .setOnSuccessMatcher(new TestSuccessMatcher(mockJob, 1));
 
@@ -678,7 +678,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     p.getOptions().as(TestPipelineOptions.class)
         .setOnSuccessMatcher(new TestFailureMatcher());
 
@@ -709,7 +709,7 @@ public class TestDataflowRunnerTest {
     DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
     when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);
 
-    TestDataflowRunner runner = (TestDataflowRunner) p.getRunner();
+    TestDataflowRunner runner = TestDataflowRunner.fromOptions(options);
     p.getOptions().as(TestPipelineOptions.class)
         .setOnSuccessMatcher(new TestFailureMatcher());
 

http://git-wip-us.apache.org/repos/asf/beam/blob/d41fe1df/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
index f09f2b4..2f368b1 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
@@ -456,13 +456,6 @@ public class Pipeline {
   }
 
   /**
-   * Returns the configured {@link PipelineRunner}.
-   */
-  public PipelineRunner<?> getRunner() {
-    return runner;
-  }
-
-  /**
    * Returns the configured {@link PipelineOptions}.
    *
    * @deprecated see BEAM-818 Remove Pipeline.getPipelineOptions. Configuration should be explicitly


[12/50] [abbrv] beam git commit: Runner API encoding of WindowFns.

Posted by ke...@apache.org.
Runner API encoding of WindowFns.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/aad32b7a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/aad32b7a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/aad32b7a

Branch: refs/heads/gearpump-runner
Commit: aad32b7a00d1aea1e7e51b68ff609d2fb3b82a8f
Parents: bc76a18
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Tue Mar 7 12:21:02 2017 -0800
Committer: Robert Bradshaw <ro...@gmail.com>
Committed: Thu Mar 9 20:29:01 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/transforms/window.py    | 117 +++++++++++++++++++
 .../apache_beam/transforms/window_test.py       |  11 ++
 sdks/python/apache_beam/utils/proto_utils.py    |  37 ++++++
 sdks/python/apache_beam/utils/urns.py           |   7 ++
 4 files changed, 172 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/aad32b7a/sdks/python/apache_beam/transforms/window.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/window.py b/sdks/python/apache_beam/transforms/window.py
index 14cf2f6..a562bcf 100644
--- a/sdks/python/apache_beam/transforms/window.py
+++ b/sdks/python/apache_beam/transforms/window.py
@@ -49,13 +49,20 @@ WindowFn.
 
 from __future__ import absolute_import
 
+from google.protobuf import struct_pb2
+from google.protobuf import wrappers_pb2
+
 from apache_beam import coders
+from apache_beam.internal import pickler
+from apache_beam.runners.api import beam_runner_api_pb2
 from apache_beam.transforms import timeutil
 from apache_beam.transforms.timeutil import Duration
 from apache_beam.transforms.timeutil import MAX_TIMESTAMP
 from apache_beam.transforms.timeutil import MIN_TIMESTAMP
 from apache_beam.transforms.timeutil import Timestamp
 from apache_beam.utils.windowed_value import WindowedValue
+from apache_beam.utils import proto_utils
+from apache_beam.utils import urns
 
 
 # TODO(ccy): revisit naming and semantics once Java Apache Beam finalizes their
@@ -131,6 +138,41 @@ class WindowFn(object):
     # By default, just return the input timestamp.
     return input_timestamp
 
+  _known_urns = {}
+
+  @classmethod
+  def register_urn(cls, urn, parameter_type, constructor):
+    cls._known_urns[urn] = parameter_type, constructor
+
+  @classmethod
+  def from_runner_api(cls, fn_proto, context):
+    parameter_type, constructor = cls._known_urns[fn_proto.spec.urn]
+    return constructor(
+        proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type),
+        context)
+
+  def to_runner_api(self, context):
+    urn, typed_param = self.to_runner_api_parameter(context)
+    return beam_runner_api_pb2.FunctionSpec(
+        spec=beam_runner_api_pb2.UrnWithParameter(
+            urn=urn,
+            parameter=proto_utils.pack_Any(typed_param)))
+
+  @staticmethod
+  def from_runner_api_parameter(fn_parameter, unused_context):
+    return pickler.loads(fn_parameter.value)
+
+  def to_runner_api_parameter(self, context):
+    raise TypeError(self)
+    return (urns.PICKLED_WINDOW_FN,
+            wrappers_pb2.BytesValue(value=pickler.dumps(self)))
+
+
+WindowFn.register_urn(
+    urns.PICKLED_WINDOW_FN,
+    wrappers_pb2.BytesValue,
+    WindowFn.from_runner_api_parameter)
+
 
 class BoundedWindow(object):
   """A window for timestamps in range (-infinity, end).
@@ -251,6 +293,16 @@ class GlobalWindows(WindowFn):
   def __ne__(self, other):
     return not self == other
 
+  @staticmethod
+  def from_runner_api_parameter(unused_fn_parameter, unused_context):
+    return GlobalWindows()
+
+  def to_runner_api_parameter(self, context):
+    return urns.GLOBAL_WINDOWS_FN, None
+
+WindowFn.register_urn(
+    urns.GLOBAL_WINDOWS_FN, None, GlobalWindows.from_runner_api_parameter)
+
 
 class FixedWindows(WindowFn):
   """A windowing function that assigns each element to one time interval.
@@ -280,6 +332,29 @@ class FixedWindows(WindowFn):
   def merge(self, merge_context):
     pass  # No merging.
 
+  def __eq__(self, other):
+    if type(self) == type(other) == FixedWindows:
+      return self.size == other.size and self.offset == other.offset
+
+  def __ne__(self, other):
+    return not self == other
+
+  @staticmethod
+  def from_runner_api_parameter(fn_parameter, unused_context):
+    return FixedWindows(
+        size=Duration(micros=fn_parameter['size']),
+        offset=Timestamp(micros=fn_parameter['offset']))
+
+  def to_runner_api_parameter(self, context):
+    return (urns.FIXED_WINDOWS_FN,
+            proto_utils.pack_Struct(size=self.size.micros,
+                                    offset=self.offset.micros))
+
+WindowFn.register_urn(
+    urns.FIXED_WINDOWS_FN,
+    struct_pb2.Struct,
+    FixedWindows.from_runner_api_parameter)
+
 
 class SlidingWindows(WindowFn):
   """A windowing function that assigns each element to a set of sliding windows.
@@ -312,6 +387,31 @@ class SlidingWindows(WindowFn):
   def merge(self, merge_context):
     pass  # No merging.
 
+  def __eq__(self, other):
+    if type(self) == type(other) == SlidingWindows:
+      return (self.size == other.size
+              and self.offset == other.offset
+              and self.period == other.period)
+
+  @staticmethod
+  def from_runner_api_parameter(fn_parameter, unused_context):
+    return SlidingWindows(
+        size=Duration(micros=fn_parameter['size']),
+        offset=Timestamp(micros=fn_parameter['offset']),
+        period=Duration(micros=fn_parameter['period']))
+
+  def to_runner_api_parameter(self, context):
+    return (urns.SLIDING_WINDOWS_FN,
+            proto_utils.pack_Struct(
+                size=self.size.micros,
+                offset=self.offset.micros,
+                period=self.period.micros))
+
+WindowFn.register_urn(
+    urns.SLIDING_WINDOWS_FN,
+    struct_pb2.Struct,
+    SlidingWindows.from_runner_api_parameter)
+
 
 class Sessions(WindowFn):
   """A windowing function that groups elements into sessions.
@@ -352,3 +452,20 @@ class Sessions(WindowFn):
         end = w.end
     if len(to_merge) > 1:
       merge_context.merge(to_merge, IntervalWindow(to_merge[0].start, end))
+
+  def __eq__(self, other):
+    if type(self) == type(other) == Sessions:
+      return self.gap_size == other.gap_size
+
+  @staticmethod
+  def from_runner_api_parameter(fn_parameter, unused_context):
+    return Sessions(gap_size=Duration(micros=fn_parameter['gap_size']))
+
+  def to_runner_api_parameter(self, context):
+    return (urns.SESSION_WINDOWS_FN,
+            proto_utils.pack_Struct(gap_size=self.gap_size.micros))
+
+WindowFn.register_urn(
+    urns.SESSION_WINDOWS_FN,
+    struct_pb2.Struct,
+    Sessions.from_runner_api_parameter)

http://git-wip-us.apache.org/repos/asf/beam/blob/aad32b7a/sdks/python/apache_beam/transforms/window_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/window_test.py b/sdks/python/apache_beam/transforms/window_test.py
index c4072ac..821b143 100644
--- a/sdks/python/apache_beam/transforms/window_test.py
+++ b/sdks/python/apache_beam/transforms/window_test.py
@@ -19,6 +19,7 @@
 
 import unittest
 
+from apache_beam import pipeline
 from apache_beam.test_pipeline import TestPipeline
 from apache_beam.transforms import CombinePerKey
 from apache_beam.transforms import combiners
@@ -32,6 +33,7 @@ from apache_beam.transforms.timeutil import MIN_TIMESTAMP
 from apache_beam.transforms.util import assert_that, equal_to
 from apache_beam.transforms.window import FixedWindows
 from apache_beam.transforms.window import GlobalWindow
+from apache_beam.transforms.window import GlobalWindows
 from apache_beam.transforms.window import IntervalWindow
 from apache_beam.transforms.window import Sessions
 from apache_beam.transforms.window import SlidingWindows
@@ -224,6 +226,15 @@ class WindowTest(unittest.TestCase):
                 label='assert:mean')
     p.run()
 
+  def test_runner_api(self):
+    for window_fn in (GlobalWindows(),
+                      FixedWindows(37),
+                      SlidingWindows(2, 389),
+                      Sessions(5077)):
+      context = pipeline.PipelineContext()
+      self.assertEqual(
+          window_fn,
+          WindowFn.from_runner_api(window_fn.to_runner_api(context), context))
 
 if __name__ == '__main__':
   unittest.main()

http://git-wip-us.apache.org/repos/asf/beam/blob/aad32b7a/sdks/python/apache_beam/utils/proto_utils.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/utils/proto_utils.py b/sdks/python/apache_beam/utils/proto_utils.py
new file mode 100644
index 0000000..0ece8f5
--- /dev/null
+++ b/sdks/python/apache_beam/utils/proto_utils.py
@@ -0,0 +1,37 @@
+from google.protobuf import any_pb2
+from google.protobuf import struct_pb2
+
+
+def pack_Any(msg):
+  """Creates a protobuf Any with msg as its content.
+
+  Returns None if msg is None.
+  """
+  if msg is None:
+    return None
+  else:
+    result = any_pb2.Any()
+    result.Pack(msg)
+    return result
+
+
+def unpack_Any(any_msg, msg_class):
+  """Unpacks any_msg into msg_class.
+
+  Returns None if msg_class is None.
+  """
+  if msg_class is None:
+    return None
+  else:
+    msg = msg_class()
+    any_msg.Unpack(msg)
+    return msg
+
+
+def pack_Struct(**kwargs):
+  """Returns a struct containing the values indicated by kwargs.
+  """
+  msg = struct_pb2.Struct()
+  for key, value in kwargs.items():
+    msg[key] = value  # pylint: disable=unsubscriptable-object
+  return msg

http://git-wip-us.apache.org/repos/asf/beam/blob/aad32b7a/sdks/python/apache_beam/utils/urns.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/utils/urns.py b/sdks/python/apache_beam/utils/urns.py
new file mode 100644
index 0000000..4d1c2f7
--- /dev/null
+++ b/sdks/python/apache_beam/utils/urns.py
@@ -0,0 +1,7 @@
+PICKLED_WINDOW_FN = "beam:window_fn:pickled_python:v0.1"
+GLOBAL_WINDOWS_FN = "beam:window_fn:global_windows:v0.1"
+FIXED_WINDOWS_FN = "beam:window_fn:fixed_windows:v0.1"
+SLIDING_WINDOWS_FN = "beam:window_fn:sliding_windows:v0.1"
+SESSION_WINDOWS_FN = "beam:window_fn:session_windows:v0.1"
+
+PICKLED_CODER = "dataflow:coder:pickled_python:v0.1"


[32/50] [abbrv] beam git commit: [BEAM-1551] Allow `PAssert`s to take a message

Posted by ke...@apache.org.
[BEAM-1551] Allow `PAssert`s to take a message


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e3cafb42
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e3cafb42
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e3cafb42

Branch: refs/heads/gearpump-runner
Commit: e3cafb42cdea1a35e067704e3a8f1277549aff67
Parents: 818fc94
Author: Aviem Zur <av...@gmail.com>
Authored: Sun Feb 26 19:42:13 2017 +0200
Committer: Aviem Zur <av...@gmail.com>
Committed: Fri Mar 10 23:13:38 2017 +0200

----------------------------------------------------------------------
 .../org/apache/beam/sdk/testing/PAssert.java    | 168 ++++++++++++++-----
 .../apache/beam/sdk/testing/PAssertTest.java    |   3 +-
 2 files changed, 130 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e3cafb42/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
index a6fb232e..1faa024 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
@@ -279,7 +279,14 @@ public class PAssert {
    * Constructs an {@link IterableAssert} for the elements of the provided {@link PCollection}.
    */
   public static <T> IterableAssert<T> that(PCollection<T> actual) {
-    return new PCollectionContentsAssert<>(actual);
+    return that(actual.getName(), actual);
+  }
+
+  /**
+   * Constructs an {@link IterableAssert} for the elements of the provided {@link PCollection}.
+   */
+  public static <T> IterableAssert<T> that(String reason, PCollection<T> actual) {
+    return new PCollectionContentsAssert<>(reason, actual);
   }
 
   /**
@@ -288,6 +295,15 @@ public class PAssert {
    */
   public static <T> IterableAssert<T> thatSingletonIterable(
       PCollection<? extends Iterable<T>> actual) {
+    return thatSingletonIterable(actual.getName(), actual);
+  }
+
+  /**
+   * Constructs an {@link IterableAssert} for the value of the provided {@link PCollection} which
+   * must contain a single {@code Iterable<T>} value.
+   */
+  public static <T> IterableAssert<T> thatSingletonIterable(
+      String reason, PCollection<? extends Iterable<T>> actual) {
 
     try {
     } catch (NoSuchElementException | IllegalArgumentException exc) {
@@ -300,7 +316,7 @@ public class PAssert {
     @SuppressWarnings("unchecked") // Safe covariant cast
     PCollection<Iterable<T>> actualIterables = (PCollection<Iterable<T>>) actual;
 
-    return new PCollectionSingletonIterableAssert<>(actualIterables);
+    return new PCollectionSingletonIterableAssert<>(reason, actualIterables);
   }
 
   /**
@@ -308,7 +324,15 @@ public class PAssert {
    * {@code PCollection PCollection<T>}, which must be a singleton.
    */
   public static <T> SingletonAssert<T> thatSingleton(PCollection<T> actual) {
-    return new PCollectionViewAssert<>(actual, View.<T>asSingleton(), actual.getCoder());
+    return thatSingleton(actual.getName(), actual);
+  }
+
+  /**
+   * Constructs a {@link SingletonAssert} for the value of the provided
+   * {@code PCollection PCollection<T>}, which must be a singleton.
+   */
+  public static <T> SingletonAssert<T> thatSingleton(String reason, PCollection<T> actual) {
+    return new PCollectionViewAssert<>(actual, View.<T>asSingleton(), actual.getCoder(), reason);
   }
 
   /**
@@ -319,12 +343,24 @@ public class PAssert {
    */
   public static <K, V> SingletonAssert<Map<K, Iterable<V>>> thatMultimap(
       PCollection<KV<K, V>> actual) {
+    return thatMultimap(actual.getName(), actual);
+  }
+
+  /**
+   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}.
+   *
+   * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any
+   * {@code Coder<K, V>}.
+   */
+  public static <K, V> SingletonAssert<Map<K, Iterable<V>>> thatMultimap(
+      String reason, PCollection<KV<K, V>> actual) {
     @SuppressWarnings("unchecked")
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
     return new PCollectionViewAssert<>(
         actual,
         View.<K, V>asMultimap(),
-        MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())));
+        MapCoder.of(kvCoder.getKeyCoder(), IterableCoder.of(kvCoder.getValueCoder())),
+        reason);
   }
 
   /**
@@ -335,10 +371,23 @@ public class PAssert {
    * {@code Coder<K, V>}.
    */
   public static <K, V> SingletonAssert<Map<K, V>> thatMap(PCollection<KV<K, V>> actual) {
+    return thatMap(actual.getName(), actual);
+  }
+
+  /**
+   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}, which
+   * must have at most one value per key.
+   *
+   * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any
+   * {@code Coder<K, V>}.
+   */
+  public static <K, V> SingletonAssert<Map<K, V>> thatMap(
+      String reason, PCollection<KV<K, V>> actual) {
     @SuppressWarnings("unchecked")
     KvCoder<K, V> kvCoder = (KvCoder<K, V>) actual.getCoder();
     return new PCollectionViewAssert<>(
-        actual, View.<K, V>asMap(), MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()));
+        actual, View.<K, V>asMap(), MapCoder.of(kvCoder.getKeyCoder(), kvCoder.getValueCoder()),
+        reason);
   }
 
   ////////////////////////////////////////////////////////////
@@ -351,18 +400,21 @@ public class PAssert {
     private final PCollection<T> actual;
     private final AssertionWindows rewindowingStrategy;
     private final SimpleFunction<Iterable<ValueInSingleWindow<T>>, Iterable<T>> paneExtractor;
+    private final String reason;
 
-    public PCollectionContentsAssert(PCollection<T> actual) {
-      this(actual, IntoGlobalWindow.<T>of(), PaneExtractors.<T>allPanes());
+    public PCollectionContentsAssert(String reason, PCollection<T> actual) {
+      this(actual, IntoGlobalWindow.<T>of(), PaneExtractors.<T>allPanes(), reason);
     }
 
     public PCollectionContentsAssert(
         PCollection<T> actual,
         AssertionWindows rewindowingStrategy,
-        SimpleFunction<Iterable<ValueInSingleWindow<T>>, Iterable<T>> paneExtractor) {
+        SimpleFunction<Iterable<ValueInSingleWindow<T>>, Iterable<T>> paneExtractor,
+        String reason) {
       this.actual = actual;
       this.rewindowingStrategy = rewindowingStrategy;
       this.paneExtractor = paneExtractor;
+      this.reason = reason;
     }
 
     @Override
@@ -397,7 +449,7 @@ public class PAssert {
       Coder<BoundedWindow> windowCoder =
           (Coder) actual.getWindowingStrategy().getWindowFn().windowCoder();
       return new PCollectionContentsAssert<>(
-          actual, IntoStaticWindows.<T>of(windowCoder, window), paneExtractor);
+          actual, IntoStaticWindows.<T>of(windowCoder, window), paneExtractor, reason);
     }
 
     /**
@@ -418,7 +470,7 @@ public class PAssert {
      */
     @Override
     public PCollectionContentsAssert<T> containsInAnyOrder(Iterable<T> expectedElements) {
-      return satisfies(new AssertContainsInAnyOrderRelation<T>(), expectedElements);
+      return satisfies(new AssertContainsInAnyOrderRelation<T>(reason), expectedElements);
     }
 
     @Override
@@ -471,7 +523,7 @@ public class PAssert {
       // more flexible bounds.
       @SuppressWarnings({"rawtypes", "unchecked"})
       SerializableFunction<Iterable<T>, Void> checkerFn =
-          (SerializableFunction) new MatcherCheckerFn<>(matcher);
+          (SerializableFunction) new MatcherCheckerFn<>(reason, matcher);
       actual.apply(
           "PAssert$" + (assertCount++),
           new GroupThenAssert<>(checkerFn, rewindowingStrategy, paneExtractor));
@@ -479,15 +531,17 @@ public class PAssert {
     }
 
     private static class MatcherCheckerFn<T> implements SerializableFunction<T, Void> {
-      private SerializableMatcher<T> matcher;
+      private final String reason;
+      private final SerializableMatcher<T> matcher;
 
-      public MatcherCheckerFn(SerializableMatcher<T> matcher) {
+      public MatcherCheckerFn(String reason, SerializableMatcher<T> matcher) {
+        this.reason = reason;
         this.matcher = matcher;
       }
 
       @Override
       public Void apply(T actual) {
-        assertThat(actual, matcher);
+        assertThat(reason, actual, matcher);
         return null;
       }
     }
@@ -526,16 +580,19 @@ public class PAssert {
     private final AssertionWindows rewindowingStrategy;
     private final SimpleFunction<Iterable<ValueInSingleWindow<Iterable<T>>>, Iterable<Iterable<T>>>
         paneExtractor;
+    private final String reason;
 
-    public PCollectionSingletonIterableAssert(PCollection<Iterable<T>> actual) {
-      this(actual, IntoGlobalWindow.<Iterable<T>>of(), PaneExtractors.<Iterable<T>>onlyPane());
+    public PCollectionSingletonIterableAssert(String reason, PCollection<Iterable<T>> actual) {
+      this(actual, IntoGlobalWindow.<Iterable<T>>of(), PaneExtractors.<Iterable<T>>onlyPane(),
+          reason);
     }
 
     public PCollectionSingletonIterableAssert(
         PCollection<Iterable<T>> actual,
         AssertionWindows rewindowingStrategy,
         SimpleFunction<Iterable<ValueInSingleWindow<Iterable<T>>>, Iterable<Iterable<T>>>
-            paneExtractor) {
+            paneExtractor,
+        String reason) {
       this.actual = actual;
 
       @SuppressWarnings("unchecked")
@@ -544,6 +601,7 @@ public class PAssert {
 
       this.rewindowingStrategy = rewindowingStrategy;
       this.paneExtractor = paneExtractor;
+      this.reason = reason;
     }
 
     @Override
@@ -579,7 +637,7 @@ public class PAssert {
       Coder<BoundedWindow> windowCoder =
           (Coder) actual.getWindowingStrategy().getWindowFn().windowCoder();
       return new PCollectionSingletonIterableAssert<>(
-          actual, IntoStaticWindows.<Iterable<T>>of(windowCoder, window), paneExtractor);
+          actual, IntoStaticWindows.<Iterable<T>>of(windowCoder, window), paneExtractor, reason);
     }
 
     @Override
@@ -595,7 +653,7 @@ public class PAssert {
 
     @Override
     public PCollectionSingletonIterableAssert<T> containsInAnyOrder(Iterable<T> expectedElements) {
-      return satisfies(new AssertContainsInAnyOrderRelation<T>(), expectedElements);
+      return satisfies(new AssertContainsInAnyOrderRelation<T>(reason), expectedElements);
     }
 
     @Override
@@ -626,12 +684,15 @@ public class PAssert {
     private final SimpleFunction<Iterable<ValueInSingleWindow<ElemT>>, Iterable<ElemT>>
         paneExtractor;
     private final Coder<ViewT> coder;
+    private final String reason;
 
     protected PCollectionViewAssert(
         PCollection<ElemT> actual,
         PTransform<PCollection<ElemT>, PCollectionView<ViewT>> view,
-        Coder<ViewT> coder) {
-      this(actual, view, IntoGlobalWindow.<ElemT>of(), PaneExtractors.<ElemT>onlyPane(), coder);
+        Coder<ViewT> coder,
+        String reason) {
+      this(actual, view, IntoGlobalWindow.<ElemT>of(), PaneExtractors.<ElemT>onlyPane(), coder,
+          reason);
     }
 
     private PCollectionViewAssert(
@@ -639,12 +700,14 @@ public class PAssert {
         PTransform<PCollection<ElemT>, PCollectionView<ViewT>> view,
         AssertionWindows rewindowActuals,
         SimpleFunction<Iterable<ValueInSingleWindow<ElemT>>, Iterable<ElemT>> paneExtractor,
-        Coder<ViewT> coder) {
+        Coder<ViewT> coder,
+        String reason) {
       this.actual = actual;
       this.view = view;
       this.rewindowActuals = rewindowActuals;
       this.paneExtractor = paneExtractor;
       this.coder = coder;
+      this.reason = reason;
     }
 
     @Override
@@ -671,17 +734,18 @@ public class PAssert {
           IntoStaticWindows.of(
               (Coder) actual.getWindowingStrategy().getWindowFn().windowCoder(), window),
           paneExtractor,
-          coder);
+          coder,
+          reason);
     }
 
     @Override
     public PCollectionViewAssert<ElemT, ViewT> isEqualTo(ViewT expectedValue) {
-      return satisfies(new AssertIsEqualToRelation<ViewT>(), expectedValue);
+      return satisfies(new AssertIsEqualToRelation<ViewT>(reason), expectedValue);
     }
 
     @Override
     public PCollectionViewAssert<ElemT, ViewT> notEqualTo(ViewT expectedValue) {
-      return satisfies(new AssertNotEqualToRelation<ViewT>(), expectedValue);
+      return satisfies(new AssertNotEqualToRelation<ViewT>(reason), expectedValue);
     }
 
     @Override
@@ -1119,15 +1183,17 @@ public class PAssert {
    * value.
    */
   private static class AssertIsEqualTo<T> implements SerializableFunction<T, Void> {
-    private T expected;
+    private final String reason;
+    private final T expected;
 
-    public AssertIsEqualTo(T expected) {
+    public AssertIsEqualTo(String reason, T expected) {
+      this.reason = reason;
       this.expected = expected;
     }
 
     @Override
     public Void apply(T actual) {
-      assertThat(actual, equalTo(expected));
+      assertThat(reason, actual, equalTo(expected));
       return null;
     }
   }
@@ -1137,15 +1203,17 @@ public class PAssert {
    * value.
    */
   private static class AssertNotEqualTo<T> implements SerializableFunction<T, Void> {
+    private String reason;
     private T expected;
 
-    public AssertNotEqualTo(T expected) {
+    public AssertNotEqualTo(String reason, T expected) {
+      this.reason = reason;
       this.expected = expected;
     }
 
     @Override
     public Void apply(T actual) {
-      assertThat(actual, not(equalTo(expected)));
+      assertThat(reason, actual, not(equalTo(expected)));
       return null;
     }
   }
@@ -1156,25 +1224,27 @@ public class PAssert {
    */
   private static class AssertContainsInAnyOrder<T>
       implements SerializableFunction<Iterable<T>, Void> {
-    private T[] expected;
+    private final String reason;
+    private final T[] expected;
 
     @SafeVarargs
-    public AssertContainsInAnyOrder(T... expected) {
+    public AssertContainsInAnyOrder(String reason, T... expected) {
+      this.reason = reason;
       this.expected = expected;
     }
 
     @SuppressWarnings("unchecked")
-    public AssertContainsInAnyOrder(Collection<T> expected) {
-      this((T[]) expected.toArray());
+    public AssertContainsInAnyOrder(String reason, Collection<T> expected) {
+      this(reason, (T[]) expected.toArray());
     }
 
-    public AssertContainsInAnyOrder(Iterable<T> expected) {
-      this(Lists.<T>newArrayList(expected));
+    public AssertContainsInAnyOrder(String reason, Iterable<T> expected) {
+      this(reason, Lists.<T>newArrayList(expected));
     }
 
     @Override
     public Void apply(Iterable<T> actual) {
-      assertThat(actual, containsInAnyOrder(expected));
+      assertThat(reason, actual, containsInAnyOrder(expected));
       return null;
     }
   }
@@ -1194,9 +1264,15 @@ public class PAssert {
    * An {@link AssertRelation} implementing the binary predicate that two objects are equal.
    */
   private static class AssertIsEqualToRelation<T> implements AssertRelation<T, T> {
+    private final String reason;
+
+    public AssertIsEqualToRelation(String reason) {
+      this.reason = reason;
+    }
+
     @Override
     public SerializableFunction<T, Void> assertFor(T expected) {
-      return new AssertIsEqualTo<T>(expected);
+      return new AssertIsEqualTo<T>(reason, expected);
     }
   }
 
@@ -1204,9 +1280,15 @@ public class PAssert {
    * An {@link AssertRelation} implementing the binary predicate that two objects are not equal.
    */
   private static class AssertNotEqualToRelation<T> implements AssertRelation<T, T> {
+    private final String reason;
+
+    public AssertNotEqualToRelation(String reason) {
+      this.reason = reason;
+    }
+
     @Override
     public SerializableFunction<T, Void> assertFor(T expected) {
-      return new AssertNotEqualTo<T>(expected);
+      return new AssertNotEqualTo<T>(reason, expected);
     }
   }
 
@@ -1216,9 +1298,15 @@ public class PAssert {
    */
   private static class AssertContainsInAnyOrderRelation<T>
       implements AssertRelation<Iterable<T>, Iterable<T>> {
+    private final String reason;
+
+    public AssertContainsInAnyOrderRelation(String reason) {
+      this.reason = reason;
+    }
+
     @Override
     public SerializableFunction<Iterable<T>, Void> assertFor(Iterable<T> expectedElements) {
-      return new AssertContainsInAnyOrder<T>(expectedElements);
+      return new AssertContainsInAnyOrder<T>(reason, expectedElements);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/e3cafb42/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
index 777e1af..f50adf4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
@@ -389,10 +389,11 @@ public class PAssertTest implements Serializable {
   @Category(RunnableOnService.class)
   public void testEmptyFalse() throws Exception {
     PCollection<Long> vals = pipeline.apply(CountingInput.upTo(5L));
-    PAssert.that(vals).empty();
+    PAssert.that("Vals should have been empty", vals).empty();
 
     Throwable thrown = runExpectingAssertionFailure(pipeline);
 
+    assertThat(thrown.getMessage(), containsString("Vals should have been empty"));
     assertThat(thrown.getMessage(), containsString("Expected: iterable over [] in any order"));
   }
 


[20/50] [abbrv] beam git commit: Properly deal with late processing-time timers

Posted by ke...@apache.org.
Properly deal with late processing-time timers


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/dbfcf4b4
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/dbfcf4b4
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/dbfcf4b4

Branch: refs/heads/gearpump-runner
Commit: dbfcf4b4a63b38653adc21d1cf37d6c4cfd955ad
Parents: 1a8e1f7
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Fri Mar 10 15:25:26 2017 +0100
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri Mar 10 15:25:26 2017 +0100

----------------------------------------------------------------------
 .../beam/runners/core/StatefulDoFnRunner.java   | 40 ++++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/dbfcf4b4/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
index c672902..d27193c 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
@@ -76,33 +76,31 @@ public class StatefulDoFnRunner<InputT, OutputT, W extends BoundedWindow>
   }
 
   @Override
-  public void processElement(WindowedValue<InputT> compressedElem) {
+  public void processElement(WindowedValue<InputT> input) {
 
     // StatefulDoFnRunner always observes windows, so we need to explode
-    for (WindowedValue<InputT> value : compressedElem.explodeWindows()) {
+    for (WindowedValue<InputT> value : input.explodeWindows()) {
 
       BoundedWindow window = value.getWindows().iterator().next();
 
-      if (!dropLateData(window)) {
+      if (isLate(window)) {
+        // The element is too late for this window.
+        droppedDueToLateness.addValue(1L);
+        WindowTracing.debug(
+            "StatefulDoFnRunner.processElement: Dropping element at {}; window:{} "
+                + "since too far behind inputWatermark:{}",
+            input.getTimestamp(), window, cleanupTimer.currentInputWatermarkTime());
+      } else {
         cleanupTimer.setForWindow(window);
         doFnRunner.processElement(value);
       }
     }
   }
 
-  private boolean dropLateData(BoundedWindow window) {
+  private boolean isLate(BoundedWindow window) {
     Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
     Instant inputWM = cleanupTimer.currentInputWatermarkTime();
-    if (gcTime.isBefore(inputWM)) {
-      // The element is too late for this window.
-      droppedDueToLateness.addValue(1L);
-      WindowTracing.debug(
-          "StatefulDoFnRunner.processElement/onTimer: Dropping element for window:{} "
-              + "since too far behind inputWatermark:{}", window, inputWM);
-      return true;
-    } else {
-      return false;
-    }
+    return gcTime.isBefore(inputWM);
   }
 
   @Override
@@ -112,8 +110,18 @@ public class StatefulDoFnRunner<InputT, OutputT, W extends BoundedWindow>
       stateCleaner.clearForWindow(window);
       // There should invoke the onWindowExpiration of DoFn
     } else {
-      // a timer can never be late because we don't allow setting timers after GC time
-      doFnRunner.onTimer(timerId, window, timestamp, timeDomain);
+      // An event-time timer can never be late because we don't allow setting timers after GC time.
+      // Ot can happen that a processing-time time fires for a late window, we need to ignore
+      // this.
+      if (!timeDomain.equals(TimeDomain.EVENT_TIME) && isLate(window)) {
+        // don't increment the dropped counter, only do that for elements
+        WindowTracing.debug(
+            "StatefulDoFnRunner.onTimer: Ignoring processing-time timer at {}; window:{} "
+                + "since window is too far behind inputWatermark:{}",
+            timestamp, window, cleanupTimer.currentInputWatermarkTime());
+      } else {
+        doFnRunner.onTimer(timerId, window, timestamp, timeDomain);
+      }
     }
   }
 


[50/50] [abbrv] beam git commit: This closes #2241: merge master to gearpump-master and fixup

Posted by ke...@apache.org.
This closes #2241: merge master to gearpump-master and fixup

  [BEAM-79] Fix gearpump-runner merge conflicts and test failure
  Revert BigQueryIO bit of 'Make all uses of CountingOutputStream close their resources'
  [BEAM-1629] Init metrics/aggregators accumulators before traversing pipeline
  Generate zip distribution for pyhthon
  Ignore results from the tox clean up phase
  Add README to python tarball.
  Remove exception suppression from PAssert.SideInputCheckerDoFn
  Remove duplicated dependency from Dataflow runner pom.xml
  Added assertion failure tests for `PAssert#thatSingleton`
  Added a test of default PAssert failure reason
  Javadoc changes
  [BEAM-1551] Allow `PAssert`s to take a message
  add unicode type to the typeDict attribute in Python SDK
  Remove Pipeline.getRunner
  [BEAM-1686] Use random MQTT clientID when not defined to avoid NPE
  Properly deal with late processing-time timers
  [BEAM-1661] Shade guava in the JdbcIO
  [BEAM-797] A PipelineVisitor that creates a Spark-native pipeline.
  Introduce Flink-specific state GC implementations
  Move GC timer checking to StatefulDoFnRunner.CleanupTimer
  Move pipeline context and add more tests.
  Add license to new files.
  Runner API translation of triggers and windowing strategies.
  Runner API encoding of WindowFns.
  Runner API context helper classes.
  Auto-generated runner api proto bindings.
  HadoopInputFormatIO with junits
  Test runner to stop on EOT watermark, or timeout.
  [BEAM-1184] Add integration tests to ElasticsearchIO
  Jdbc k8 script: postgres data store only accessible inside test project
  Jdbc k8 & data loading: add teardown and update names/docs
  Bump Dataflow ROS timeout to 120 minutes
  Fixup typo in WindowingStrategies
  Update archetypes
  [BEAM-1649] Fix unresolved references in Python SDK
  Upgrade Dataflow container version to beam-master-20170307
  Explicitly GBK before stateful ParDo in Dataflow batch
  Add ServicesResourceTransformer to all shading configuration
  Fix typo in proto: widow -> window.
  [BEAM-1646] Remove duplicated bigquery dependency
  Update python SDK version to the next version.
  Change Json parsing from gson to jackson for ElasticsearchIO
  [maven-release-plugin] prepare for next development iteration
  [maven-release-plugin] prepare branch release-0.6.0
  [BEAM-1633] Move .tox/ directory under target/ in Python SDK
  Make all uses of CountingOutputStream close their resources
  Flink: register known IOChannelFactories
  [BEAM-1546] Specify exact version for Python in the SDK
  [BEAM-1635] TypeError in AfterWatermark class's __repr__ method
  [BEAM-1636] UnboundedDataset action() does not materialize RDD
  [BEAM-1556] Make PipelineOptions a lazy-singleton and init IOs as part of it.
  Add tests for serialization of BigQueryIO.TableRowInfoCoder
  Fix tox warning for non-whitelisted find command
  Revert "Implement Single-Output ParDo as a composite"
  [BEAM-1623] Transform Reshuffle directly in Spark runner
  [BEAM-1626] Remove cache of MapWithStateDStream on read.
  Revert "DataflowRunner: experimental support for issuing FnAPI based jobs"
  Java examples: move shade plugin into default lifecycle
  Java 8 examples: add shade configuration for producing bundled/fat jar
  Java 8 examples: add SNAPSHOT repository to the pom.xml file
  Update Guava version from 19.0 to 20.0 in example projects
  [BEAM-1625] BoundedDataset action() does not materialize RDD
  Revert "[maven-release-plugin] prepare branch release-0.6.0"
  Updating Dataflow API client protobufs
  Implement Single-Output ParDo as a composite
  Add a Test for windowed CombineGloballyAsSingletonView
  Remove SingletonCombine
  Only Override CreatePCollectionView in Streaming
  [BEAM-1310] Add running integration tests in JdbcIO on Spark and Dataflow runners
  DataflowRunner: experimental support for issuing FnAPI based jobs
  Updates Python SDK source API so that sources can report limited parallelism signals.
  [BEAM-1188] Python Bigquery Verifier For E2E Test
  Deprecate Pipeline.getOptions
  BEAM-1567 hashStream should be closed in PackageUtil#createPackageAttributes()
  [BEAM-1565] Update Spark runner PostCommit Jenkins job.
  [maven-release-plugin] prepare branch release-0.6.0
  Do not Reassign Windows when WindowFn is null
  Fix DataflowRunner message about uploaded vs cached files
  Update javadoc ant to include runners/ and exclude modules with a wildcard
  Adding per-stage matching to metrics filters
  Upgrade dill to 0.2.6 and pin it
  Remove PipelineRunner#apply
  [BEAM-111] Move WritableCoder to hadoop-common
  [BEAM-1297] Update maven shade plugin, fix typo and remove unneeded version
  [BEAM-351] Add DisplayData to KafkaIO
  Inline rather than reference FunctionSpecs.
  [BEAM-1517] Garbage collect user state in Flink Runner
  BEAM-1417 Count should comply with PTransform style guide
  BEAM-1419 Flatten should comply with PTransform style guide
  BEAM-1416 Write transform should comply with PTransform style guide
  BEAM-1426 SortValues should comply with PTransform style guide
  BEAM-1424 ToString should comply with PTransform style guide
  BEAM-1423 Sample should comply with PTransform style guide
  BEAM-1421 Latest should comply with PTransform style guide
  BEAM-1420 GroupByKey should comply with PTransform style guide
  Use UnsupportedSideInputReader in GroupAlsoByWindowEvaluatorFactory
  Include cython tests in presubmits for linux platform
  Update output stream cython declaration
  Make side inputs a map, rather than embedding the name in the message.
  Streaming tests, especially the ones using checkpoints, need a time buffer to finish.
  ...


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/555842a1
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/555842a1
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/555842a1

Branch: refs/heads/gearpump-runner
Commit: 555842a1a0c478b3935b3987683d2645eba770c3
Parents: 15a8ad6 3eab6a6
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri Mar 17 11:15:52 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Fri Mar 17 11:15:52 2017 -0700

----------------------------------------------------------------------
 .gitignore                                      |   10 +
 .jenkins/common_job_properties.groovy           |   98 +-
 ...job_beam_PostCommit_Java_MavenInstall.groovy |    4 +-
 ...ostCommit_Java_RunnableOnService_Apex.groovy |    8 +-
 ...ommit_Java_RunnableOnService_Dataflow.groovy |    8 +-
 ...stCommit_Java_RunnableOnService_Flink.groovy |    8 +-
 ...ommit_Java_RunnableOnService_Gearpump.groovy |   12 +-
 ...stCommit_Java_RunnableOnService_Spark.groovy |   10 +-
 .../job_beam_PostCommit_Python_Verify.groovy    |    8 +-
 .../job_beam_PreCommit_Java_MavenInstall.groovy |    6 +-
 .../job_beam_PreCommit_Website_Stage.groovy     |   80 +
 .jenkins/job_beam_PreCommit_Website_Test.groovy |   65 +
 .../job_beam_Release_NightlySnapshot.groovy     |    9 +-
 .jenkins/job_seed.groovy                        |   24 +-
 .travis.yml                                     |   24 +-
 .travis/README.md                               |    2 +-
 DISCLAIMER                                      |   10 -
 NOTICE                                          |    4 +-
 README.md                                       |   57 +-
 examples/java/README.md                         |   16 +-
 examples/java/pom.xml                           |   24 +-
 .../beam/examples/DebuggingWordCount.java       |    4 +-
 .../org/apache/beam/examples/WordCount.java     |    6 +-
 .../beam/examples/complete/AutoComplete.java    |    4 +-
 .../org/apache/beam/examples/complete/README.md |   14 +-
 .../apache/beam/examples/complete/TfIdf.java    |    2 +-
 .../examples/complete/TopWikipediaSessions.java |   27 +-
 .../examples/complete/TrafficMaxLaneFlow.java   |    2 +-
 .../beam/examples/complete/TrafficRoutes.java   |    2 +-
 .../examples/cookbook/BigQueryTornadoes.java    |    2 +-
 .../cookbook/CombinePerKeyExamples.java         |    2 +-
 .../org/apache/beam/examples/cookbook/README.md |   14 +-
 .../beam/examples/cookbook/TriggerExample.java  |    4 +-
 .../beam/examples/WindowedWordCountIT.java      |   16 +-
 .../org/apache/beam/examples/WordCountTest.java |    7 +-
 .../examples/complete/AutoCompleteTest.java     |   11 +-
 .../beam/examples/complete/TfIdfTest.java       |    6 +-
 .../complete/TopWikipediaSessionsTest.java      |    7 +-
 .../examples/cookbook/DistinctExampleTest.java  |    9 +-
 .../examples/cookbook/JoinExamplesTest.java     |    6 +-
 .../examples/cookbook/TriggerExampleTest.java   |    6 +-
 examples/java8/pom.xml                          |    3 +-
 .../beam/examples/complete/game/GameStats.java  |    7 +-
 .../examples/complete/game/LeaderBoard.java     |    5 +-
 .../beam/examples/complete/game/UserScore.java  |    2 +-
 .../examples/MinimalWordCountJava8Test.java     |    6 +-
 .../examples/complete/game/GameStatsTest.java   |    7 +-
 .../complete/game/HourlyTeamScoreTest.java      |    5 +-
 .../examples/complete/game/LeaderBoardTest.java |   11 +-
 .../examples/complete/game/UserScoreTest.java   |   10 +-
 examples/pom.xml                                |   16 +-
 pom.xml                                         |  319 +-
 runners/apex/README.md                          |    4 +-
 runners/apex/pom.xml                            |   32 +-
 .../beam/runners/apex/ApexPipelineOptions.java  |    7 +-
 .../apache/beam/runners/apex/ApexRunner.java    |  144 +-
 .../beam/runners/apex/ApexYarnLauncher.java     |   29 +-
 .../beam/runners/apex/TestApexRunner.java       |   10 -
 .../translation/ApexPipelineTranslator.java     |   17 +-
 .../translation/CreateValuesTranslator.java     |   25 +-
 .../FlattenPCollectionTranslator.java           |   37 +-
 .../apex/translation/GroupByKeyTranslator.java  |    2 +-
 .../translation/ParDoBoundMultiTranslator.java  |   32 +-
 .../apex/translation/ParDoBoundTranslator.java  |    9 +-
 .../apex/translation/TranslationContext.java    |   43 +-
 .../translation/WindowAssignTranslator.java     |   78 +
 .../apex/translation/WindowBoundTranslator.java |   78 -
 .../operators/ApexFlattenOperator.java          |    4 +-
 .../operators/ApexGroupByKeyOperator.java       |   22 +-
 .../operators/ApexParDoOperator.java            |   12 +-
 .../ApexReadUnboundedInputOperator.java         |   17 +-
 .../translation/utils/ApexStateInternals.java   |   30 +-
 .../apex/translation/utils/NoOpStepContext.java |    7 +-
 .../beam/runners/apex/ApexRunnerTest.java       |   76 +
 .../beam/runners/apex/ApexYarnLauncherTest.java |    9 +-
 .../runners/apex/examples/WordCountTest.java    |    2 +-
 .../FlattenPCollectionTranslatorTest.java       |   24 +-
 .../translation/ParDoBoundTranslatorTest.java   |   38 +-
 .../translation/ReadUnboundTranslatorTest.java  |    8 +-
 .../utils/ApexStateInternalsTest.java           |   14 +-
 .../test/resources/beam-runners-apex.properties |   20 +
 runners/core-construction-java/pom.xml          |  138 +
 .../EmptyFlattenAsCreateFactory.java            |   71 +
 .../core/construction/PTransformMatchers.java   |  192 +
 .../core/construction/PrimitiveCreate.java      |   77 +
 .../core/construction/ReplacementOutputs.java   |  105 +
 .../SingleInputOutputOverrideFactory.java       |   50 +
 .../UnsupportedOverrideFactory.java             |   71 +
 .../runners/core/construction/package-info.java |   22 +
 .../construction/PTransformMatchersTest.java    |  425 ++
 .../construction/ReplacementOutputsTest.java    |  254 +
 .../SingleInputOutputOverrideFactoryTest.java   |  114 +
 .../UnsupportedOverrideFactoryTest.java         |   65 +
 runners/core-java/pom.xml                       |   24 +-
 .../beam/runners/core/AggregatorFactory.java    |    1 -
 .../beam/runners/core/AssignWindowsDoFn.java    |    3 +-
 .../beam/runners/core/BaseExecutionContext.java |  174 +
 .../apache/beam/runners/core/DoFnAdapters.java  |  323 ++
 .../apache/beam/runners/core/DoFnRunner.java    |   21 -
 .../apache/beam/runners/core/DoFnRunners.java   |  158 +-
 .../beam/runners/core/ExecutionContext.java     |  100 +
 .../GroupAlsoByWindowViaOutputBufferDoFn.java   |  114 +
 .../core/GroupAlsoByWindowViaWindowSetDoFn.java |   17 +-
 .../GroupAlsoByWindowViaWindowSetNewDoFn.java   |  154 +
 .../runners/core/GroupAlsoByWindowsDoFn.java    |    5 +-
 .../GroupAlsoByWindowsViaOutputBufferDoFn.java  |  134 -
 .../runners/core/InMemoryStateInternals.java    |  643 +++
 .../runners/core/InMemoryTimerInternals.java    |  300 ++
 .../apache/beam/runners/core/KeyedWorkItem.java |    2 +-
 .../beam/runners/core/KeyedWorkItemCoder.java   |    8 +-
 .../beam/runners/core/KeyedWorkItems.java       |    2 +-
 .../core/LateDataDroppingDoFnRunner.java        |    2 -
 .../apache/beam/runners/core/LateDataUtils.java |   88 +
 .../runners/core/MergingActiveWindowSet.java    |    4 -
 .../beam/runners/core/MergingStateAccessor.java |   41 +
 .../apache/beam/runners/core/NonEmptyPanes.java |    7 +-
 .../org/apache/beam/runners/core/OldDoFn.java   |  471 ++
 ...eBoundedSplittableProcessElementInvoker.java |  285 ++
 .../beam/runners/core/PaneInfoTracker.java      |    4 -
 .../runners/core/PerKeyCombineFnRunner.java     |   70 -
 .../runners/core/PerKeyCombineFnRunners.java    |  101 -
 .../org/apache/beam/runners/core/ReduceFn.java  |    2 -
 .../runners/core/ReduceFnContextFactory.java    |   11 +-
 .../beam/runners/core/ReduceFnRunner.java       |    6 +-
 .../beam/runners/core/SideInputHandler.java     |    4 -
 .../beam/runners/core/SimpleDoFnRunner.java     |  190 +-
 .../beam/runners/core/SimpleOldDoFnRunner.java  |   12 +-
 .../beam/runners/core/SplittableParDo.java      |  233 +-
 .../core/SplittableProcessElementInvoker.java   |   65 +
 .../apache/beam/runners/core/StateAccessor.java |   38 +
 .../beam/runners/core/StateInternals.java       |   59 +
 .../runners/core/StateInternalsFactory.java     |   35 +
 .../apache/beam/runners/core/StateMerging.java  |  309 ++
 .../beam/runners/core/StateNamespace.java       |   56 +
 .../runners/core/StateNamespaceForTest.java     |   65 +
 .../beam/runners/core/StateNamespaces.java      |  278 ++
 .../apache/beam/runners/core/StateTable.java    |   84 +
 .../org/apache/beam/runners/core/StateTag.java  |  125 +
 .../org/apache/beam/runners/core/StateTags.java |  382 ++
 .../beam/runners/core/StatefulDoFnRunner.java   |  171 +
 .../beam/runners/core/SystemReduceFn.java       |    5 -
 .../core/TestInMemoryStateInternals.java        |   63 +
 .../beam/runners/core/TimerInternals.java       |  286 ++
 .../runners/core/TimerInternalsFactory.java     |   35 +
 .../core/UnboundedReadFromBoundedSource.java    |   14 +-
 .../core/UnsupportedSideInputReader.java        |   52 +
 .../apache/beam/runners/core/WatermarkHold.java |    9 +-
 .../beam/runners/core/WindowingInternals.java   |   81 +
 .../core/WindowingInternalsAdapters.java        |    1 -
 .../core/triggers/AfterAllStateMachine.java     |    2 +-
 .../AfterDelayFromFirstElementStateMachine.java |   16 +-
 .../core/triggers/AfterFirstStateMachine.java   |    2 +-
 .../core/triggers/AfterPaneStateMachine.java    |   12 +-
 .../AfterProcessingTimeStateMachine.java        |    2 +
 ...rSynchronizedProcessingTimeStateMachine.java |    7 +-
 .../triggers/AfterWatermarkStateMachine.java    |   15 +-
 .../core/triggers/OrFinallyStateMachine.java    |    2 +-
 .../triggers/ReshuffleTriggerStateMachine.java  |    6 +-
 .../core/triggers/TriggerStateMachine.java      |    6 +-
 .../TriggerStateMachineContextFactory.java      |   12 +-
 .../triggers/TriggerStateMachineRunner.java     |    8 +-
 .../core/triggers/TriggerStateMachines.java     |  250 +-
 .../core/DoFnDelegatingAggregatorTest.java      |  144 +
 ...roupAlsoByWindowViaOutputBufferDoFnTest.java |  109 +
 .../core/GroupAlsoByWindowsProperties.java      |    7 +-
 ...oupAlsoByWindowsViaOutputBufferDoFnTest.java |  110 -
 .../core/InMemoryStateInternalsTest.java        |  574 +++
 .../core/InMemoryTimerInternalsTest.java        |  195 +
 .../runners/core/KeyedWorkItemCoderTest.java    |    9 +-
 .../core/LateDataDroppingDoFnRunnerTest.java    |    3 +-
 .../core/MergingActiveWindowSetTest.java        |    2 -
 .../apache/beam/runners/core/NoOpOldDoFn.java   |   72 +
 .../beam/runners/core/OldDoFnContextTest.java   |   72 +
 .../apache/beam/runners/core/OldDoFnTest.java   |  192 +
 ...ndedSplittableProcessElementInvokerTest.java |  146 +
 .../core/PushbackSideInputDoFnRunnerTest.java   |    8 +-
 .../beam/runners/core/ReduceFnRunnerTest.java   |   12 +-
 .../beam/runners/core/ReduceFnTester.java       |   19 +-
 .../beam/runners/core/SideInputHandlerTest.java |    1 -
 .../beam/runners/core/SimpleDoFnRunnerTest.java |    6 +-
 .../runners/core/SimpleOldDoFnRunnerTest.java   |    4 +-
 .../beam/runners/core/SplittableParDoTest.java  |  254 +-
 .../beam/runners/core/StateNamespacesTest.java  |  130 +
 .../apache/beam/runners/core/StateTagTest.java  |  205 +
 .../runners/core/StatefulDoFnRunnerTest.java    |  347 ++
 .../beam/runners/core/TimerInternalsTest.java   |  105 +
 .../UnboundedReadFromBoundedSourceTest.java     |   18 +-
 .../beam/runners/core/WindowMatchers.java       |    3 +-
 ...chronizedProcessingTimeStateMachineTest.java |    3 +-
 .../AfterWatermarkStateMachineTest.java         |   19 +
 .../core/triggers/NeverStateMachineTest.java    |    1 -
 .../ReshuffleTriggerStateMachineTest.java       |    8 +-
 .../triggers/TriggerStateMachineTester.java     |   16 +-
 .../core/triggers/TriggerStateMachinesTest.java |  167 +-
 runners/direct-java/pom.xml                     |   27 +-
 .../runners/direct/AggregatorContainer.java     |    2 +-
 .../direct/BoundedReadEvaluatorFactory.java     |   10 +-
 .../CopyOnAccessInMemoryStateInternals.java     |   68 +-
 .../runners/direct/DirectExecutionContext.java  |    8 +-
 ...ectGBKIntoKeyedWorkItemsOverrideFactory.java |    4 +-
 .../beam/runners/direct/DirectGraphVisitor.java |   33 +-
 .../direct/DirectGroupByKeyOverrideFactory.java |    3 +-
 .../beam/runners/direct/DirectMetrics.java      |   31 +-
 .../beam/runners/direct/DirectRunner.java       |  114 +-
 .../runners/direct/DirectTimerInternals.java    |   14 +-
 ...ecycleManagerRemovingTransformEvaluator.java |   19 +-
 .../beam/runners/direct/EmptyInputProvider.java |    4 +-
 .../beam/runners/direct/EvaluationContext.java  |   40 +-
 .../direct/ExecutorServiceParallelExecutor.java |   14 +-
 .../runners/direct/FlattenEvaluatorFactory.java |    8 +-
 .../GroupAlsoByWindowEvaluatorFactory.java      |   38 +-
 .../direct/GroupByKeyOnlyEvaluatorFactory.java  |   10 +-
 .../direct/ImmutableListBundleFactory.java      |    6 +
 .../direct/KeyedPValueTrackingVisitor.java      |   52 +-
 .../beam/runners/direct/ParDoEvaluator.java     |   29 +-
 .../runners/direct/ParDoEvaluatorFactory.java   |   25 +-
 .../direct/ParDoMultiOverrideFactory.java       |  154 +-
 .../ParDoSingleViaMultiOverrideFactory.java     |    5 +-
 .../runners/direct/RootProviderRegistry.java    |    4 +-
 ...littableProcessElementsEvaluatorFactory.java |   84 +-
 .../direct/StatefulParDoEvaluatorFactory.java   |   72 +-
 .../direct/TestStreamEvaluatorFactory.java      |   44 +-
 .../direct/TransformEvaluatorRegistry.java      |    6 +-
 .../direct/UnboundedReadEvaluatorFactory.java   |   22 +-
 .../runners/direct/ViewEvaluatorFactory.java    |   16 +-
 .../beam/runners/direct/WatermarkManager.java   |  107 +-
 .../runners/direct/WindowEvaluatorFactory.java  |   17 +-
 .../direct/WriteWithShardingFactory.java        |  148 +-
 .../runners/direct/AggregatorContainerTest.java |   18 +-
 .../direct/BoundedReadEvaluatorFactoryTest.java |   18 +-
 .../direct/CloningBundleFactoryTest.java        |   16 +-
 .../runners/direct/CommittedResultTest.java     |   23 +-
 .../CopyOnAccessInMemoryStateInternalsTest.java |   79 +-
 .../runners/direct/DirectGraphVisitorTest.java  |   55 +-
 .../DirectGroupByKeyOverrideFactoryTest.java    |   51 +
 .../beam/runners/direct/DirectMetricsTest.java  |  128 +-
 .../beam/runners/direct/DirectRunnerTest.java   |    8 +-
 .../direct/DirectTimerInternalsTest.java        |    4 +-
 ...leManagerRemovingTransformEvaluatorTest.java |  103 +-
 .../runners/direct/EvaluationContextTest.java   |   30 +-
 .../direct/FlattenEvaluatorFactoryTest.java     |    8 +-
 .../direct/GroupByKeyEvaluatorFactoryTest.java  |    5 +-
 .../GroupByKeyOnlyEvaluatorFactoryTest.java     |    5 +-
 .../ImmutabilityCheckingBundleFactoryTest.java  |    6 +-
 .../ImmutabilityEnforcementFactoryTest.java     |    3 +-
 .../direct/ImmutableListBundleFactoryTest.java  |   37 +-
 .../direct/KeyedPValueTrackingVisitorTest.java  |  167 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |    9 +-
 .../direct/ParDoMultiOverrideFactoryTest.java   |   45 +
 .../ParDoSingleViaMultiOverrideFactoryTest.java |   46 +
 .../runners/direct/SideInputContainerTest.java  |    5 +-
 .../StatefulParDoEvaluatorFactoryTest.java      |  129 +-
 .../runners/direct/StepTransformResultTest.java |    5 +-
 .../direct/TestStreamEvaluatorFactoryTest.java  |   38 +-
 .../runners/direct/TransformExecutorTest.java   |    4 +-
 .../UnboundedReadEvaluatorFactoryTest.java      |   22 +-
 .../direct/ViewEvaluatorFactoryTest.java        |   18 +-
 .../direct/WatermarkCallbackExecutorTest.java   |    5 +-
 .../runners/direct/WatermarkManagerTest.java    |   37 +-
 .../direct/WindowEvaluatorFactoryTest.java      |   87 +-
 .../direct/WriteWithShardingFactoryTest.java    |  154 +-
 runners/flink/README.md                         |  250 -
 runners/flink/examples/pom.xml                  |    2 +-
 .../beam/runners/flink/examples/WordCount.java  |    2 +-
 .../flink/examples/streaming/AutoComplete.java  |    4 +-
 .../examples/streaming/KafkaIOExamples.java     |    4 +-
 .../KafkaWindowedWordCountExample.java          |    2 +-
 .../examples/streaming/WindowedWordCount.java   |    2 +-
 runners/flink/pom.xml                           |    4 +-
 runners/flink/runner/pom.xml                    |   56 +-
 .../flink/DefaultParallelismFactory.java        |    3 +-
 .../flink/FlinkBatchPipelineTranslator.java     |  139 +
 .../flink/FlinkBatchTransformTranslators.java   |  797 ++++
 .../flink/FlinkBatchTranslationContext.java     |  154 +
 .../FlinkPipelineExecutionEnvironment.java      |   11 +-
 .../runners/flink/FlinkPipelineOptions.java     |    6 +-
 .../runners/flink/FlinkPipelineTranslator.java  |   53 +
 .../apache/beam/runners/flink/FlinkRunner.java  |  395 +-
 .../flink/FlinkStreamingPipelineTranslator.java |  230 +
 .../FlinkStreamingTransformTranslators.java     | 1043 +++++
 .../flink/FlinkStreamingTranslationContext.java |  130 +
 .../flink/FlinkStreamingViewOverrides.java      |  372 ++
 .../flink/PipelineTranslationOptimizer.java     |   72 +
 .../beam/runners/flink/TestFlinkRunner.java     |   24 +-
 .../beam/runners/flink/TranslationMode.java     |   31 +
 .../FlinkBatchPipelineTranslator.java           |  143 -
 .../FlinkBatchTransformTranslators.java         |  745 ---
 .../FlinkBatchTranslationContext.java           |  145 -
 .../translation/FlinkPipelineTranslator.java    |   53 -
 .../FlinkStreamingPipelineTranslator.java       |  154 -
 .../FlinkStreamingTransformTranslators.java     |  978 ----
 .../FlinkStreamingTranslationContext.java       |  110 -
 .../PipelineTranslationOptimizer.java           |   73 -
 .../flink/translation/TranslationMode.java      |   31 -
 .../functions/FlinkAggregatorFactory.java       |   53 +
 .../functions/FlinkAssignWindows.java           |    2 +-
 .../functions/FlinkDoFnFunction.java            |  136 +-
 .../FlinkMergingNonShuffleReduceFunction.java   |   57 +-
 .../FlinkMergingPartialReduceFunction.java      |   37 +-
 .../functions/FlinkMergingReduceFunction.java   |   31 +-
 .../functions/FlinkMultiOutputDoFnFunction.java |  126 -
 .../FlinkMultiOutputProcessContext.java         |  118 -
 .../FlinkMultiOutputPruningFunction.java        |    2 +-
 .../functions/FlinkNoElementAssignContext.java  |   68 -
 .../functions/FlinkNoOpStepContext.java         |   73 +
 .../functions/FlinkPartialReduceFunction.java   |   45 +-
 .../functions/FlinkProcessContextBase.java      |  267 --
 .../functions/FlinkReduceFunction.java          |   41 +-
 .../functions/FlinkSideInputReader.java         |   80 +
 .../FlinkSingleOutputProcessContext.java        |   69 -
 .../functions/FlinkStatefulDoFnFunction.java    |  198 +
 .../utils/SerializedPipelineOptions.java        |    4 +-
 .../wrappers/streaming/DoFnOperator.java        |  671 ++-
 .../wrappers/streaming/FlinkStateInternals.java | 1037 -----
 .../streaming/KvToByteBufferKeySelector.java    |   56 +
 .../streaming/SingletonKeyedWorkItem.java       |    2 +-
 .../streaming/SingletonKeyedWorkItemCoder.java  |   10 +-
 .../wrappers/streaming/WindowDoFnOperator.java  |  457 +-
 .../streaming/io/BoundedSourceWrapper.java      |    7 +-
 .../streaming/io/UnboundedFlinkSink.java        |    6 +
 .../streaming/io/UnboundedSourceWrapper.java    |  159 +-
 .../state/FlinkBroadcastStateInternals.java     |  865 ++++
 .../state/FlinkKeyGroupStateInternals.java      |  487 ++
 .../state/FlinkSplitStateInternals.java         |  260 ++
 .../streaming/state/FlinkStateInternals.java    | 1053 +++++
 .../state/KeyGroupCheckpointedOperator.java     |   35 +
 .../state/KeyGroupRestoringOperator.java        |   32 +
 .../wrappers/streaming/state/package-info.java  |   22 +
 .../beam/runners/flink/PipelineOptionsTest.java |   42 +-
 .../beam/runners/flink/WriteSinkITCase.java     |    5 +-
 .../flink/streaming/DoFnOperatorTest.java       |  337 +-
 .../FlinkBroadcastStateInternalsTest.java       |  245 +
 .../FlinkKeyGroupStateInternalsTest.java        |  262 ++
 .../streaming/FlinkSplitStateInternalsTest.java |  101 +
 .../streaming/FlinkStateInternalsTest.java      |   39 +-
 .../streaming/UnboundedSourceWrapperTest.java   |  570 ++-
 runners/gearpump/pom.xml                        |   51 +-
 .../gearpump/GearpumpPipelineResult.java        |   21 +-
 .../gearpump/GearpumpPipelineTranslator.java    |  388 +-
 .../beam/runners/gearpump/GearpumpRunner.java   |  376 +-
 .../runners/gearpump/TestGearpumpRunner.java    |   38 +-
 .../gearpump/examples/StreamingWordCount.java   |   98 -
 .../gearpump/examples/UnboundedTextSource.java  |  139 -
 .../runners/gearpump/examples/package-info.java |   22 -
 ...CreateGearpumpPCollectionViewTranslator.java |   14 +-
 .../CreatePCollectionViewTranslator.java        |    6 +-
 .../translators/CreateValuesTranslator.java     |   51 -
 .../FlattenPCollectionTranslator.java           |   84 -
 .../FlattenPCollectionsTranslator.java          |   83 +
 .../translators/GroupByKeyTranslator.java       |    4 +-
 .../translators/ParDoBoundMultiTranslator.java  |   32 +-
 .../translators/ParDoBoundTranslator.java       |    7 +-
 .../translators/ReadBoundedTranslator.java      |    4 +-
 .../translators/ReadUnboundedTranslator.java    |    4 +-
 .../translators/TransformTranslator.java        |    2 +-
 .../translators/TranslationContext.java         |   29 +-
 .../translators/WindowAssignTranslator.java     |  100 +
 .../translators/WindowBoundTranslator.java      |  100 -
 .../translators/functions/DoFnFunction.java     |   12 +-
 .../translators/io/UnboundedSourceWrapper.java  |    1 +
 .../translators/utils/DoFnRunnerFactory.java    |    4 +-
 .../utils/NoOpAggregatorFactory.java            |    2 +-
 .../translators/utils/NoOpStepContext.java      |    6 +-
 .../translators/utils/TranslatorUtils.java      |    2 -
 .../translators/utils/TranslatorUtilsTest.java  |    1 -
 runners/google-cloud-dataflow-java/pom.xml      |   40 +-
 .../beam/runners/dataflow/AssignWindows.java    |   89 +
 .../dataflow/BatchStatefulParDoOverrides.java   |  283 ++
 .../runners/dataflow/BatchViewOverrides.java    | 1391 ++++++
 .../dataflow/DataflowAggregatorTransforms.java  |   79 +
 .../beam/runners/dataflow/DataflowClient.java   |   44 +-
 .../dataflow/DataflowMetricUpdateExtractor.java |  109 +
 .../runners/dataflow/DataflowPipelineJob.java   |    4 +-
 .../dataflow/DataflowPipelineTranslator.java    |  606 +--
 .../beam/runners/dataflow/DataflowRunner.java   | 2466 ++--------
 .../DataflowUnboundedReadFromBoundedSource.java |  547 +++
 .../beam/runners/dataflow/ReadTranslator.java   |  102 +
 .../dataflow/StreamingViewOverrides.java        |  110 +
 .../runners/dataflow/TransformTranslator.java   |  131 +
 .../dataflow/internal/AssignWindows.java        |   89 -
 .../dataflow/internal/CustomSources.java        |   12 +-
 .../internal/DataflowAggregatorTransforms.java  |   79 -
 .../internal/DataflowMetricUpdateExtractor.java |  109 -
 .../DataflowUnboundedReadFromBoundedSource.java |  556 ---
 .../runners/dataflow/internal/IsmFormat.java    |   20 +-
 .../dataflow/internal/ReadTranslator.java       |  107 -
 .../options/DataflowPipelineOptions.java        |   36 +-
 .../DataflowPipelineWorkerPoolOptions.java      |   16 +-
 .../dataflow/testing/TestDataflowRunner.java    |   17 +-
 .../beam/runners/dataflow/util/DoFnInfo.java    |   75 +-
 .../beam/runners/dataflow/util/GcsStager.java   |   18 +-
 .../beam/runners/dataflow/util/PackageUtil.java |  363 +-
 .../beam/runners/dataflow/dataflow.properties   |    6 +-
 .../BatchStatefulParDoOverridesTest.java        |  169 +
 .../dataflow/BatchViewOverridesTest.java        |  633 +++
 .../dataflow/DataflowPipelineJobTest.java       |  184 +-
 .../DataflowPipelineTranslatorTest.java         |  145 +-
 .../runners/dataflow/DataflowRunnerTest.java    |  614 +--
 ...aflowUnboundedReadFromBoundedSourceTest.java |   79 +
 ...aflowUnboundedReadFromBoundedSourceTest.java |   83 -
 .../DataflowPipelineDebugOptionsTest.java       |    2 +-
 .../options/DataflowPipelineOptionsTest.java    |   20 +-
 .../options/DataflowProfilingOptionsTest.java   |    4 +-
 .../testing/TestDataflowRunnerTest.java         |   53 +-
 .../runners/dataflow/util/PackageUtilTest.java  |   99 +-
 runners/pom.xml                                 |   17 +-
 runners/spark/README.md                         |    8 +-
 runners/spark/pom.xml                           |   70 +-
 .../spark/SparkNativePipelineVisitor.java       |  198 +
 .../runners/spark/SparkPipelineOptions.java     |    6 +-
 .../beam/runners/spark/SparkPipelineResult.java |   67 +-
 .../apache/beam/runners/spark/SparkRunner.java  |  124 +-
 .../beam/runners/spark/SparkRunnerDebugger.java |  137 +
 .../runners/spark/TestSparkPipelineOptions.java |   61 +
 .../beam/runners/spark/TestSparkRunner.java     |  204 +-
 .../spark/aggregators/AccumulatorSingleton.java |   53 -
 .../aggregators/AggregatorsAccumulator.java     |  131 +
 .../spark/aggregators/NamedAggregators.java     |   12 +-
 .../spark/aggregators/SparkAggregators.java     |   26 +-
 .../aggregators/metrics/AggregatorMetric.java   |   44 -
 .../metrics/AggregatorMetricSource.java         |   50 -
 .../metrics/WithNamedAggregatorsSupport.java    |  174 -
 .../spark/aggregators/metrics/sink/CsvSink.java |   39 -
 .../aggregators/metrics/sink/GraphiteSink.java  |   39 -
 .../aggregators/metrics/sink/package-info.java  |   23 -
 .../coders/BeamSparkRunnerRegistrator.java      |   48 +-
 .../beam/runners/spark/coders/CoderHelpers.java |   23 +
 .../runners/spark/coders/NullWritableCoder.java |   76 -
 .../spark/coders/StatelessJavaSerializer.java   |   97 +
 .../runners/spark/coders/WritableCoder.java     |  122 -
 .../beam/runners/spark/examples/WordCount.java  |    2 +-
 .../apache/beam/runners/spark/io/ConsoleIO.java |    4 +-
 .../beam/runners/spark/io/CreateStream.java     |  198 +-
 .../beam/runners/spark/io/MicrobatchSource.java |    9 +-
 .../runners/spark/io/SparkUnboundedSource.java  |  166 +-
 .../runners/spark/metrics/AggregatorMetric.java |   43 +
 .../spark/metrics/AggregatorMetricSource.java   |   51 +
 .../runners/spark/metrics/CompositeSource.java  |   49 +
 .../spark/metrics/MetricsAccumulator.java       |  132 +
 .../spark/metrics/MetricsAccumulatorParam.java  |   42 +
 .../runners/spark/metrics/SparkBeamMetric.java  |   69 +
 .../spark/metrics/SparkBeamMetricSource.java    |   51 +
 .../spark/metrics/SparkMetricResults.java       |  181 +
 .../spark/metrics/SparkMetricsContainer.java    |  147 +
 .../spark/metrics/WithMetricsSupport.java       |  209 +
 .../runners/spark/metrics/package-info.java     |   20 +
 .../runners/spark/metrics/sink/CsvSink.java     |   38 +
 .../spark/metrics/sink/GraphiteSink.java        |   38 +
 .../spark/metrics/sink/package-info.java        |   22 +
 .../SparkGroupAlsoByWindowViaWindowSet.java     |  431 ++
 .../spark/stateful/SparkStateInternals.java     |  418 ++
 .../spark/stateful/SparkTimerInternals.java     |  193 +
 .../spark/stateful/StateSpecFunctions.java      |   60 +-
 .../spark/translation/BoundedDataset.java       |    3 +-
 .../runners/spark/translation/DoFnFunction.java |   41 +-
 .../translation/DoFnRunnerWithMetrics.java      |   91 +
 .../spark/translation/EvaluationContext.java    |   89 +-
 .../translation/GroupCombineFunctions.java      |  259 +-
 .../spark/translation/MultiDoFnFunction.java    |   39 +-
 .../translation/SparkAbstractCombineFn.java     |   12 +-
 .../spark/translation/SparkContextFactory.java  |   13 +-
 .../spark/translation/SparkGlobalCombineFn.java |   13 +-
 .../translation/SparkGroupAlsoByWindowFn.java   |  214 -
 ...SparkGroupAlsoByWindowViaOutputBufferFn.java |  179 +
 .../spark/translation/SparkKeyedCombineFn.java  |   13 +-
 .../spark/translation/SparkPCollectionView.java |   99 +
 .../spark/translation/SparkProcessContext.java  |    8 +-
 .../spark/translation/SparkRuntimeContext.java  |   84 +-
 .../spark/translation/TransformEvaluator.java   |    1 +
 .../spark/translation/TransformTranslator.java  |  467 +-
 .../spark/translation/TranslationUtils.java     |   93 +-
 .../spark/translation/streaming/Checkpoint.java |  137 +
 .../SparkRunnerStreamingContextFactory.java     |   57 +-
 .../streaming/StreamingTransformTranslator.java |  492 +-
 .../translation/streaming/UnboundedDataset.java |   63 +-
 .../runners/spark/util/BroadcastHelper.java     |  127 -
 .../spark/util/GlobalWatermarkHolder.java       |  200 +
 .../runners/spark/util/SideInputBroadcast.java  |   77 +
 .../spark/util/SparkSideInputReader.java        |    8 +-
 .../beam/runners/spark/ClearWatermarksRule.java |   37 +
 .../beam/runners/spark/ForceStreamingTest.java  |   60 +-
 .../spark/GlobalWatermarkHolderTest.java        |  151 +
 .../apache/beam/runners/spark/PipelineRule.java |  109 +
 .../runners/spark/ProvidedSparkContextTest.java |   72 +-
 .../runners/spark/ReuseSparkContextRule.java    |   46 +
 .../runners/spark/SparkPipelineStateTest.java   |   27 +-
 .../runners/spark/SparkRunnerDebuggerTest.java  |  180 +
 .../spark/aggregators/ClearAggregatorsRule.java |    5 +-
 .../metrics/sink/InMemoryMetrics.java           |   10 +-
 .../metrics/sink/NamedAggregatorsTest.java      |   25 +-
 .../coders/BeamSparkRunnerRegistratorTest.java  |   57 -
 .../runners/spark/coders/WritableCoderTest.java |   45 -
 .../beam/runners/spark/io/AvroPipelineTest.java |    6 +-
 .../beam/runners/spark/io/NumShardsTest.java    |    6 +-
 .../io/hadoop/HadoopFileFormatPipelineTest.java |    8 +-
 .../spark/metrics/SparkBeamMetricTest.java      |   60 +
 .../spark/translation/StorageLevelTest.java     |    8 +-
 .../translation/streaming/CreateStreamTest.java |  376 ++
 .../streaming/EmptyStreamAssertionTest.java     |   87 -
 .../streaming/FlattenStreamingTest.java         |  103 -
 .../streaming/KafkaStreamingTest.java           |  215 -
 .../ResumeFromCheckpointStreamingTest.java      |  308 +-
 .../streaming/SimpleStreamingWordCountTest.java |   84 -
 .../streaming/TrackStreamingSourcesTest.java    |  171 +
 .../utils/KafkaWriteOnBatchCompleted.java       |  105 -
 .../streaming/utils/PAssertStreaming.java       |  121 -
 .../utils/SparkTestPipelineOptions.java         |   42 -
 .../SparkTestPipelineOptionsForStreaming.java   |   37 -
 .../spark/src/test/resources/log4j.properties   |   30 +
 .../spark/src/test/resources/metrics.properties |   10 +-
 sdks/common/fn-api/pom.xml                      |  109 +
 .../fn-api/src/main/proto/beam_fn_api.proto     |  771 +++
 .../org/apache/beam/fn/v1/standard_coders.yaml  |  195 +
 sdks/common/pom.xml                             |   39 +
 sdks/common/runner-api/pom.xml                  |   89 +
 .../src/main/proto/beam_runner_api.proto        |  711 +++
 sdks/java/build-tools/pom.xml                   |    2 +-
 .../src/main/resources/beam/findbugs-filter.xml |   95 +-
 sdks/java/core/pom.xml                          |   28 +-
 .../main/java/org/apache/beam/sdk/Pipeline.java |   75 +-
 .../beam/sdk/annotations/Experimental.java      |    5 +-
 .../org/apache/beam/sdk/coders/AtomicCoder.java |    2 +-
 .../org/apache/beam/sdk/coders/AvroCoder.java   |   30 +-
 .../apache/beam/sdk/coders/BigDecimalCoder.java |    6 +-
 .../beam/sdk/coders/BigEndianIntegerCoder.java  |    7 +
 .../beam/sdk/coders/BigEndianLongCoder.java     |    7 +
 .../apache/beam/sdk/coders/ByteArrayCoder.java  |    7 +
 .../org/apache/beam/sdk/coders/ByteCoder.java   |    7 +
 .../apache/beam/sdk/coders/ByteStringCoder.java |    8 +
 .../java/org/apache/beam/sdk/coders/Coder.java  |    7 +
 .../apache/beam/sdk/coders/CollectionCoder.java |   12 +-
 .../org/apache/beam/sdk/coders/CustomCoder.java |   18 +-
 .../apache/beam/sdk/coders/DelegateCoder.java   |   29 +-
 .../org/apache/beam/sdk/coders/DoubleCoder.java |    7 +
 .../apache/beam/sdk/coders/DurationCoder.java   |    8 +
 .../apache/beam/sdk/coders/InstantCoder.java    |    7 +
 .../apache/beam/sdk/coders/IterableCoder.java   |   12 +-
 .../beam/sdk/coders/IterableLikeCoder.java      |    6 +-
 .../org/apache/beam/sdk/coders/JAXBCoder.java   |   48 +-
 .../org/apache/beam/sdk/coders/KvCoder.java     |   35 +-
 .../beam/sdk/coders/LengthPrefixCoder.java      |  145 +
 .../org/apache/beam/sdk/coders/ListCoder.java   |    7 +
 .../org/apache/beam/sdk/coders/MapCoder.java    |   62 +-
 .../apache/beam/sdk/coders/NullableCoder.java   |    6 +
 .../beam/sdk/coders/SerializableCoder.java      |   17 +-
 .../org/apache/beam/sdk/coders/SetCoder.java    |   12 +-
 .../apache/beam/sdk/coders/StandardCoder.java   |   42 +-
 .../beam/sdk/coders/StringDelegateCoder.java    |   16 +-
 .../apache/beam/sdk/coders/StringUtf8Coder.java |   18 +-
 .../beam/sdk/coders/TableRowJsonCoder.java      |    7 +
 .../beam/sdk/coders/TextualIntegerCoder.java    |    8 +
 .../org/apache/beam/sdk/coders/VarIntCoder.java |   10 +-
 .../apache/beam/sdk/coders/VarLongCoder.java    |    7 +
 .../org/apache/beam/sdk/coders/VoidCoder.java   |    7 +
 .../beam/sdk/coders/protobuf/ProtoCoder.java    |    8 +-
 .../java/org/apache/beam/sdk/io/AvroIO.java     |    4 +-
 .../java/org/apache/beam/sdk/io/AvroSource.java |    5 -
 .../sdk/io/BoundedReadFromUnboundedSource.java  |   79 +-
 .../org/apache/beam/sdk/io/BoundedSource.java   |    8 -
 .../apache/beam/sdk/io/CompressedSource.java    |   40 +-
 .../org/apache/beam/sdk/io/CountingSource.java  |    5 -
 .../org/apache/beam/sdk/io/FileBasedSink.java   |   22 +
 .../java/org/apache/beam/sdk/io/FileSystem.java |  115 +-
 .../org/apache/beam/sdk/io/FileSystems.java     |   88 +-
 .../org/apache/beam/sdk/io/LocalFileSystem.java |  195 +-
 .../org/apache/beam/sdk/io/LocalResourceId.java |  136 +
 .../java/org/apache/beam/sdk/io/PubsubIO.java   | 1142 ++---
 .../apache/beam/sdk/io/PubsubUnboundedSink.java |   88 +-
 .../beam/sdk/io/PubsubUnboundedSource.java      |  104 +-
 .../main/java/org/apache/beam/sdk/io/Read.java  |    7 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     |  401 +-
 .../main/java/org/apache/beam/sdk/io/Write.java |  706 +--
 .../java/org/apache/beam/sdk/io/XmlSource.java  |    5 -
 .../apache/beam/sdk/io/fs/CreateOptions.java    |   60 +
 .../org/apache/beam/sdk/io/fs/MatchResult.java  |  125 +
 .../apache/beam/sdk/io/fs/ResolveOptions.java   |   41 +
 .../org/apache/beam/sdk/io/fs/ResourceId.java   |   85 +
 .../org/apache/beam/sdk/io/fs/package-info.java |   22 +
 .../beam/sdk/metrics/DistributionData.java      |    3 +-
 .../org/apache/beam/sdk/metrics/MetricKey.java  |    3 +-
 .../apache/beam/sdk/metrics/MetricUpdates.java  |    3 +-
 .../org/apache/beam/sdk/options/GcpOptions.java |   25 +-
 .../org/apache/beam/sdk/options/GcsOptions.java |    4 +-
 .../beam/sdk/options/PipelineOptions.java       |    2 +-
 .../sdk/options/PipelineOptionsFactory.java     |   10 +-
 .../apache/beam/sdk/options/ValueProvider.java  |    6 +-
 .../beam/sdk/runners/PTransformMatcher.java     |   32 +
 .../sdk/runners/PTransformOverrideFactory.java  |   31 +
 .../apache/beam/sdk/runners/PipelineRunner.java |   14 -
 .../beam/sdk/runners/TransformHierarchy.java    |  285 +-
 .../apache/beam/sdk/testing/Annotations.java    |   72 +
 .../beam/sdk/testing/CoderProperties.java       |   85 +-
 .../testing/FlattenWithHeterogeneousCoders.java |   29 +
 .../org/apache/beam/sdk/testing/PAssert.java    |  259 +-
 .../apache/beam/sdk/testing/RegexMatcher.java   |   49 +
 .../beam/sdk/testing/RunnableOnService.java     |   14 +-
 .../beam/sdk/testing/SourceTestUtils.java       |    5 -
 .../apache/beam/sdk/testing/TestPipeline.java   |  111 +-
 .../org/apache/beam/sdk/testing/TestStream.java |   18 +-
 .../beam/sdk/testing/UsesAttemptedMetrics.java  |   28 +
 .../beam/sdk/testing/UsesCommittedMetrics.java  |   28 +
 .../apache/beam/sdk/testing/UsesMapState.java   |   25 +
 .../apache/beam/sdk/testing/UsesMetrics.java    |   24 -
 .../apache/beam/sdk/testing/UsesSetState.java   |   25 +
 .../apache/beam/sdk/testing/UsesTestStream.java |   24 +
 .../sdk/testing/UsesUnboundedPCollections.java  |   23 +
 .../beam/sdk/testing/ValueInSingleWindow.java   |    6 +-
 .../apache/beam/sdk/transforms/Aggregator.java  |   19 -
 .../sdk/transforms/AggregatorRetriever.java     |   13 +-
 .../beam/sdk/transforms/AppliedPTransform.java  |   31 +-
 .../org/apache/beam/sdk/transforms/Combine.java |  204 +-
 .../apache/beam/sdk/transforms/CombineFns.java  |   14 +-
 .../org/apache/beam/sdk/transforms/Count.java   |   28 +-
 .../org/apache/beam/sdk/transforms/Create.java  |   88 +-
 .../sdk/transforms/DelegatingAggregator.java    |    2 +-
 .../org/apache/beam/sdk/transforms/DoFn.java    |   77 +-
 .../beam/sdk/transforms/DoFnAdapters.java       |  504 --
 .../apache/beam/sdk/transforms/DoFnTester.java  |   77 +-
 .../org/apache/beam/sdk/transforms/Flatten.java |   15 +-
 .../apache/beam/sdk/transforms/GroupByKey.java  |   10 +-
 .../org/apache/beam/sdk/transforms/Latest.java  |   80 +-
 .../org/apache/beam/sdk/transforms/Max.java     |  124 +-
 .../org/apache/beam/sdk/transforms/Mean.java    |   27 +-
 .../org/apache/beam/sdk/transforms/Min.java     |  122 +-
 .../org/apache/beam/sdk/transforms/OldDoFn.java |  758 ---
 .../apache/beam/sdk/transforms/PTransform.java  |    9 +-
 .../org/apache/beam/sdk/transforms/ParDo.java   |   55 +-
 .../org/apache/beam/sdk/transforms/Regex.java   |  589 ++-
 .../org/apache/beam/sdk/transforms/Sample.java  |  121 +-
 .../beam/sdk/transforms/SimpleFunction.java     |   44 +-
 .../org/apache/beam/sdk/transforms/Sum.java     |   57 +-
 .../apache/beam/sdk/transforms/ToString.java    |  181 +
 .../org/apache/beam/sdk/transforms/Top.java     |   27 +-
 .../org/apache/beam/sdk/transforms/View.java    |   10 +
 .../sdk/transforms/display/DisplayData.java     |    6 +-
 .../beam/sdk/transforms/join/CoGbkResult.java   |   35 +-
 .../transforms/join/KeyedPCollectionTuple.java  |   41 +-
 .../reflect/ByteBuddyDoFnInvokerFactory.java    |   16 +-
 .../reflect/ByteBuddyOnTimerInvokerFactory.java |    4 +-
 .../sdk/transforms/reflect/DoFnInvoker.java     |   42 +-
 .../sdk/transforms/reflect/DoFnInvokers.java    |  142 +-
 .../sdk/transforms/reflect/DoFnSignature.java   |   71 +-
 .../sdk/transforms/reflect/DoFnSignatures.java  |   82 +-
 .../transforms/splittabledofn/OffsetRange.java  |   71 +
 .../splittabledofn/OffsetRangeTracker.java      |   75 +
 .../splittabledofn/RestrictionTracker.java      |    2 +-
 .../beam/sdk/transforms/windowing/AfterAll.java |    7 +
 .../windowing/AfterDelayFromFirstElement.java   |  251 -
 .../sdk/transforms/windowing/AfterEach.java     |    7 +
 .../sdk/transforms/windowing/AfterFirst.java    |    7 +
 .../sdk/transforms/windowing/AfterPane.java     |   10 -
 .../windowing/AfterProcessingTime.java          |  105 +-
 .../AfterSynchronizedProcessingTime.java        |   31 +-
 .../sdk/transforms/windowing/BoundedWindow.java |   31 +
 .../sdk/transforms/windowing/GlobalWindow.java  |    6 +
 .../sdk/transforms/windowing/GlobalWindows.java |   10 +
 .../transforms/windowing/IntervalWindow.java    |   19 +-
 .../sdk/transforms/windowing/OutputTimeFns.java |   45 +
 .../beam/sdk/transforms/windowing/PaneInfo.java |    2 -
 .../windowing/TimestampTransform.java           |   65 +
 .../beam/sdk/transforms/windowing/Triggers.java |  320 ++
 .../beam/sdk/transforms/windowing/Window.java   |   96 +-
 .../org/apache/beam/sdk/util/ApiSurface.java    |  446 +-
 .../beam/sdk/util/BaseExecutionContext.java     |  174 -
 .../org/apache/beam/sdk/util/CoderUtils.java    |   30 +-
 .../beam/sdk/util/CombineContextFactory.java    |   18 -
 .../org/apache/beam/sdk/util/DefaultBucket.java |  105 +
 .../util/EmptyOnDeserializationThreadLocal.java |   39 +
 .../apache/beam/sdk/util/ExecutionContext.java  |  100 -
 .../apache/beam/sdk/util/GcpProjectUtil.java    |    2 +-
 .../apache/beam/sdk/util/GcsPathValidator.java  |    3 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  334 +-
 .../org/apache/beam/sdk/util/NameUtils.java     |  167 +
 .../org/apache/beam/sdk/util/PropertyNames.java |    1 +
 .../org/apache/beam/sdk/util/PubsubClient.java  |   28 +-
 .../apache/beam/sdk/util/PubsubGrpcClient.java  |    6 +-
 .../apache/beam/sdk/util/PubsubJsonClient.java  |    4 +-
 .../apache/beam/sdk/util/PubsubTestClient.java  |    6 +-
 .../org/apache/beam/sdk/util/StringUtils.java   |  100 -
 .../java/org/apache/beam/sdk/util/Timer.java    |   11 +
 .../apache/beam/sdk/util/TimerInternals.java    |  273 --
 .../org/apache/beam/sdk/util/WindowedValue.java |   23 +-
 .../beam/sdk/util/WindowingInternals.java       |   82 -
 .../beam/sdk/util/WindowingStrategies.java      |  266 ++
 .../apache/beam/sdk/util/WindowingStrategy.java |   53 +-
 .../beam/sdk/util/common/ReflectHelpers.java    |   16 +-
 .../sdk/util/state/InMemoryStateInternals.java  |  430 --
 .../sdk/util/state/InMemoryTimerInternals.java  |  275 --
 .../apache/beam/sdk/util/state/MapState.java    |   93 +
 .../sdk/util/state/MergingStateAccessor.java    |   40 -
 .../beam/sdk/util/state/ReadableState.java      |    4 +-
 .../apache/beam/sdk/util/state/SetState.java    |   71 +
 .../beam/sdk/util/state/StateAccessor.java      |   37 -
 .../apache/beam/sdk/util/state/StateBinder.java |    6 +
 .../beam/sdk/util/state/StateContexts.java      |   63 -
 .../beam/sdk/util/state/StateInternals.java     |   57 -
 .../sdk/util/state/StateInternalsFactory.java   |   35 -
 .../beam/sdk/util/state/StateMerging.java       |  259 --
 .../beam/sdk/util/state/StateNamespace.java     |   56 -
 .../sdk/util/state/StateNamespaceForTest.java   |   65 -
 .../beam/sdk/util/state/StateNamespaces.java    |  278 --
 .../apache/beam/sdk/util/state/StateSpecs.java  |  155 +-
 .../apache/beam/sdk/util/state/StateTable.java  |   82 -
 .../apache/beam/sdk/util/state/StateTag.java    |  111 -
 .../apache/beam/sdk/util/state/StateTags.java   |  290 --
 .../util/state/TestInMemoryStateInternals.java  |   61 -
 .../sdk/util/state/TimerInternalsFactory.java   |   36 -
 .../java/org/apache/beam/sdk/values/PBegin.java |    9 +-
 .../apache/beam/sdk/values/PCollectionList.java |   62 +-
 .../beam/sdk/values/PCollectionTuple.java       |   33 +-
 .../java/org/apache/beam/sdk/values/PDone.java  |    4 +-
 .../java/org/apache/beam/sdk/values/PInput.java |   13 +-
 .../org/apache/beam/sdk/values/POutput.java     |   24 +-
 .../beam/sdk/values/POutputValueBase.java       |    4 +-
 .../java/org/apache/beam/sdk/values/PValue.java |   24 +-
 .../org/apache/beam/sdk/values/PValueBase.java  |   18 +-
 .../apache/beam/sdk/values/TaggedPValue.java    |   42 +
 .../beam/sdk/values/TimestampedValue.java       |   10 +-
 .../apache/beam/sdk/values/TupleTagList.java    |    6 +
 .../org/apache/beam/sdk/values/TypedPValue.java |   78 +-
 .../org/apache/beam/SdkCoreApiSurfaceTest.java  |   62 +
 .../sdk/AggregatorPipelineExtractorTest.java    |   16 +-
 .../java/org/apache/beam/sdk/PipelineTest.java  |   37 +-
 .../apache/beam/sdk/coders/AvroCoderTest.java   |   18 +-
 .../beam/sdk/coders/BigDecimalCoderTest.java    |   46 +-
 .../sdk/coders/BigEndianIntegerCoderTest.java   |    9 +
 .../beam/sdk/coders/BigEndianLongCoderTest.java |    9 +
 .../beam/sdk/coders/ByteArrayCoderTest.java     |    6 +
 .../apache/beam/sdk/coders/ByteCoderTest.java   |    9 +
 .../beam/sdk/coders/ByteStringCoderTest.java    |    8 +
 .../beam/sdk/coders/CoderRegistryTest.java      |   12 +-
 .../org/apache/beam/sdk/coders/CoderTest.java   |    8 +
 .../beam/sdk/coders/CollectionCoderTest.java    |   16 +
 .../apache/beam/sdk/coders/CommonCoderTest.java |  351 ++
 .../beam/sdk/coders/DefaultCoderTest.java       |    4 +-
 .../beam/sdk/coders/DelegateCoderTest.java      |   35 +-
 .../apache/beam/sdk/coders/DoubleCoderTest.java |    9 +
 .../beam/sdk/coders/DurationCoderTest.java      |   10 +
 .../beam/sdk/coders/InstantCoderTest.java       |    9 +
 .../beam/sdk/coders/IterableCoderTest.java      |   27 +-
 .../apache/beam/sdk/coders/JAXBCoderTest.java   |   26 +-
 .../org/apache/beam/sdk/coders/KvCoderTest.java |   29 +
 .../beam/sdk/coders/LengthPrefixCoderTest.java  |  129 +
 .../apache/beam/sdk/coders/ListCoderTest.java   |   16 +-
 .../apache/beam/sdk/coders/MapCoderTest.java    |   21 +-
 .../beam/sdk/coders/NullableCoderTest.java      |   12 +
 .../beam/sdk/coders/SerializableCoderTest.java  |   16 +-
 .../apache/beam/sdk/coders/SetCoderTest.java    |   16 +
 .../beam/sdk/coders/StandardCoderTest.java      |   40 +
 .../sdk/coders/StringDelegateCoderTest.java     |   11 +
 .../beam/sdk/coders/StringUtf8CoderTest.java    |    9 +
 .../beam/sdk/coders/TableRowJsonCoderTest.java  |    9 +
 .../sdk/coders/TextualIntegerCoderTest.java     |    9 +
 .../apache/beam/sdk/coders/VarIntCoderTest.java |    9 +
 .../beam/sdk/coders/VarLongCoderTest.java       |    9 +
 .../apache/beam/sdk/coders/VoidCoderTest.java   |   40 +
 .../beam/sdk/io/AvroIOGeneratedClassTest.java   |  285 --
 .../java/org/apache/beam/sdk/io/AvroIOTest.java |   18 +-
 .../apache/beam/sdk/io/AvroIOTransformTest.java |  324 ++
 .../io/BoundedReadFromUnboundedSourceTest.java  |    6 +-
 .../beam/sdk/io/CompressedSourceTest.java       |   89 +-
 .../apache/beam/sdk/io/CountingInputTest.java   |   12 +-
 .../apache/beam/sdk/io/CountingSourceTest.java  |   13 +-
 .../apache/beam/sdk/io/FileBasedSinkTest.java   |   17 +-
 .../apache/beam/sdk/io/FileBasedSourceTest.java |    9 +-
 .../org/apache/beam/sdk/io/FileSystemsTest.java |   61 +-
 .../apache/beam/sdk/io/LocalFileSystemTest.java |  318 ++
 .../apache/beam/sdk/io/LocalResourceIdTest.java |  226 +
 .../beam/sdk/io/OffsetBasedSourceTest.java      |    5 -
 .../org/apache/beam/sdk/io/PubsubIOTest.java    |   86 +-
 .../beam/sdk/io/PubsubUnboundedSinkTest.java    |   43 +-
 .../beam/sdk/io/PubsubUnboundedSourceTest.java  |   22 +-
 .../java/org/apache/beam/sdk/io/ReadTest.java   |    5 -
 .../java/org/apache/beam/sdk/io/TextIOTest.java |  222 +-
 .../java/org/apache/beam/sdk/io/WriteTest.java  |  158 +-
 .../org/apache/beam/sdk/io/XmlSourceTest.java   |   10 +-
 .../apache/beam/sdk/metrics/MetricMatchers.java |  192 +-
 .../apache/beam/sdk/metrics/MetricsTest.java    |  124 +-
 .../apache/beam/sdk/options/GcpOptionsTest.java |   32 +-
 .../sdk/options/PipelineOptionsFactoryTest.java |    6 +-
 .../beam/sdk/options/PipelineOptionsTest.java   |    3 +-
 .../sdk/options/ProxyInvocationHandlerTest.java |    5 +-
 .../beam/sdk/options/ValueProviderTest.java     |   36 +-
 .../sdk/options/ValueProviderUtilsTest.java     |    2 +-
 .../sdk/runners/TransformHierarchyTest.java     |  320 +-
 .../beam/sdk/runners/TransformTreeTest.java     |   34 +-
 .../beam/sdk/testing/GatherAllPanesTest.java    |    7 +-
 .../apache/beam/sdk/testing/PAssertTest.java    |  118 +-
 .../beam/sdk/testing/TestPipelineTest.java      |  504 +-
 .../apache/beam/sdk/testing/TestStreamTest.java |   45 +-
 .../testing/ValueInSingleWindowCoderTest.java   |    7 +
 .../transforms/ApproximateQuantilesTest.java    |   12 +-
 .../sdk/transforms/ApproximateUniqueTest.java   |  487 +-
 .../beam/sdk/transforms/CombineFnsTest.java     |   25 +-
 .../apache/beam/sdk/transforms/CombineTest.java |  174 +-
 .../apache/beam/sdk/transforms/CountTest.java   |   15 +-
 .../apache/beam/sdk/transforms/CreateTest.java  |  119 +-
 .../beam/sdk/transforms/DistinctTest.java       |   12 +-
 .../DoFnDelegatingAggregatorTest.java           |  142 -
 .../apache/beam/sdk/transforms/DoFnTest.java    |   19 +-
 .../beam/sdk/transforms/DoFnTesterTest.java     |   12 +-
 .../apache/beam/sdk/transforms/FilterTest.java  |   18 +-
 .../sdk/transforms/FlatMapElementsTest.java     |   10 +-
 .../apache/beam/sdk/transforms/FlattenTest.java |   74 +-
 .../beam/sdk/transforms/GroupByKeyTest.java     |   32 +-
 .../apache/beam/sdk/transforms/KeysTest.java    |    9 +-
 .../apache/beam/sdk/transforms/KvSwapTest.java  |   22 +-
 .../apache/beam/sdk/transforms/LatestTest.java  |   23 +-
 .../beam/sdk/transforms/MapElementsTest.java    |   14 +-
 .../org/apache/beam/sdk/transforms/MaxTest.java |   20 +-
 .../apache/beam/sdk/transforms/MeanTest.java    |    7 +-
 .../org/apache/beam/sdk/transforms/MinTest.java |   21 +-
 .../apache/beam/sdk/transforms/NoOpOldDoFn.java |   71 -
 .../beam/sdk/transforms/OldDoFnContextTest.java |   69 -
 .../apache/beam/sdk/transforms/OldDoFnTest.java |  188 -
 .../beam/sdk/transforms/ParDoLifecycleTest.java |   17 +-
 .../apache/beam/sdk/transforms/ParDoTest.java   |  704 ++-
 .../beam/sdk/transforms/PartitionTest.java      |    8 +-
 .../apache/beam/sdk/transforms/RegexTest.java   |  148 +-
 .../apache/beam/sdk/transforms/SampleTest.java  |  419 +-
 .../beam/sdk/transforms/SimpleFunctionTest.java |   43 +
 .../beam/sdk/transforms/SimpleStatsFnsTest.java |   36 +-
 .../beam/sdk/transforms/SplittableDoFnTest.java |   83 +-
 .../org/apache/beam/sdk/transforms/SumTest.java |   24 +-
 .../beam/sdk/transforms/ToStringTest.java       |  125 +
 .../org/apache/beam/sdk/transforms/TopTest.java |   33 +-
 .../apache/beam/sdk/transforms/ValuesTest.java  |    7 +-
 .../apache/beam/sdk/transforms/ViewTest.java    |  108 +-
 .../beam/sdk/transforms/WithKeysTest.java       |    8 +-
 .../beam/sdk/transforms/WithTimestampsTest.java |    9 +-
 .../display/DisplayDataEvaluator.java           |    8 +-
 .../sdk/transforms/display/DisplayDataTest.java |   15 +
 .../transforms/join/CoGbkResultCoderTest.java   |   10 +-
 .../sdk/transforms/join/CoGroupByKeyTest.java   |   11 +-
 .../sdk/transforms/join/UnionCoderTest.java     |   17 +-
 .../transforms/reflect/DoFnInvokersTest.java    |   70 +-
 .../DoFnSignaturesProcessElementTest.java       |   40 +-
 .../DoFnSignaturesSplittableDoFnTest.java       |    3 +-
 .../transforms/reflect/DoFnSignaturesTest.java  |    6 +-
 .../splittabledofn/OffsetRangeTrackerTest.java  |  111 +
 .../windowing/AfterProcessingTimeTest.java      |    2 +-
 .../AfterSynchronizedProcessingTimeTest.java    |    2 +-
 .../transforms/windowing/GlobalWindowTest.java  |   64 +
 .../transforms/windowing/OutputTimeFnsTest.java |   51 +
 .../sdk/transforms/windowing/TriggersTest.java  |  100 +
 .../sdk/transforms/windowing/WindowTest.java    |  204 +-
 .../sdk/transforms/windowing/WindowingTest.java |   15 +-
 .../apache/beam/sdk/util/ApiSurfaceTest.java    |  152 +-
 .../apache/beam/sdk/util/CombineFnUtilTest.java |    8 +-
 .../apache/beam/sdk/util/DefaultBucketTest.java |  112 +
 .../beam/sdk/util/FileIOChannelFactoryTest.java |   13 +-
 .../beam/sdk/util/GcsPathValidatorTest.java     |   17 +-
 .../org/apache/beam/sdk/util/GcsUtilTest.java   |   86 +-
 .../org/apache/beam/sdk/util/NameUtilsTest.java |  177 +
 .../beam/sdk/util/PubsubGrpcClientTest.java     |    8 +-
 .../beam/sdk/util/PubsubJsonClientTest.java     |    3 +-
 .../beam/sdk/util/PubsubTestClientTest.java     |    4 +-
 .../org/apache/beam/sdk/util/ReshuffleTest.java |   11 +-
 .../beam/sdk/util/SerializableUtilsTest.java    |    4 +-
 .../apache/beam/sdk/util/StringUtilsTest.java   |  100 -
 .../beam/sdk/util/TimerInternalsTest.java       |  101 -
 .../beam/sdk/util/ValueWithRecordIdTest.java    |   34 +
 .../apache/beam/sdk/util/WindowedValueTest.java |   23 +
 .../beam/sdk/util/WindowingStrategiesTest.java  |   91 +
 .../util/state/InMemoryStateInternalsTest.java  |  348 --
 .../util/state/InMemoryTimerInternalsTest.java  |  153 -
 .../sdk/util/state/StateNamespacesTest.java     |  130 -
 .../beam/sdk/util/state/StateTagTest.java       |  173 -
 .../beam/sdk/values/PCollectionListTest.java    |  117 +
 .../beam/sdk/values/PCollectionTupleTest.java   |   80 +-
 .../org/apache/beam/sdk/values/PDoneTest.java   |    9 +-
 .../beam/sdk/values/TimestampedValueTest.java   |   19 +-
 .../apache/beam/sdk/values/TypedPValueTest.java |   17 +-
 sdks/java/extensions/jackson/pom.xml            |  125 +
 .../beam/sdk/extensions/jackson/AsJsons.java    |   76 +
 .../beam/sdk/extensions/jackson/ParseJsons.java |   75 +
 .../sdk/extensions/jackson/package-info.java    |   22 +
 .../jackson/JacksonTransformsTest.java          |  242 +
 sdks/java/extensions/join-library/README.md     |   10 -
 sdks/java/extensions/join-library/pom.xml       |    3 +-
 .../extensions/joinlibrary/InnerJoinTest.java   |   23 +-
 .../joinlibrary/OuterLeftJoinTest.java          |   31 +-
 .../joinlibrary/OuterRightJoinTest.java         |   31 +-
 sdks/java/extensions/pom.xml                    |    3 +-
 sdks/java/extensions/sorter/pom.xml             |    5 +-
 .../sorter/BufferedExternalSorter.java          |   23 +-
 .../sorter/BufferedExternalSorterTest.java      |   46 +-
 .../sdk/extensions/sorter/SortValuesTest.java   |    9 +-
 sdks/java/harness/pom.xml                       |  177 +
 .../org/apache/beam/fn/harness/FnHarness.java   |  131 +
 .../harness/channel/ManagedChannelFactory.java  |   80 +
 .../harness/channel/SocketAddressFactory.java   |   64 +
 .../beam/fn/harness/channel/package-info.java   |   22 +
 .../fn/harness/control/BeamFnControlClient.java |  166 +
 .../harness/control/ProcessBundleHandler.java   |  334 ++
 .../fn/harness/control/RegisterHandler.java     |   92 +
 .../beam/fn/harness/control/package-info.java   |   22 +
 .../BeamFnDataBufferingOutboundObserver.java    |  135 +
 .../beam/fn/harness/data/BeamFnDataClient.java  |   64 +
 .../fn/harness/data/BeamFnDataGrpcClient.java   |  122 +
 .../harness/data/BeamFnDataGrpcMultiplexer.java |  141 +
 .../harness/data/BeamFnDataInboundObserver.java |   81 +
 .../beam/fn/harness/data/package-info.java      |   22 +
 .../fn/harness/fake/FakeAggregatorFactory.java  |   52 +
 .../beam/fn/harness/fake/FakeStepContext.java   |   70 +
 .../beam/fn/harness/fake/package-info.java      |   22 +
 .../harness/fn/CloseableThrowingConsumer.java   |   23 +
 .../beam/fn/harness/fn/ThrowingBiFunction.java  |   32 +
 .../beam/fn/harness/fn/ThrowingConsumer.java    |   32 +
 .../beam/fn/harness/fn/ThrowingFunction.java    |   32 +
 .../beam/fn/harness/fn/ThrowingRunnable.java    |   30 +
 .../apache/beam/fn/harness/fn/package-info.java |   22 +
 .../fn/harness/logging/BeamFnLoggingClient.java |  310 ++
 .../beam/fn/harness/logging/package-info.java   |   22 +
 .../apache/beam/fn/harness/package-info.java    |   22 +
 .../beam/fn/harness/stream/AdvancingPhaser.java |   36 +
 .../harness/stream/BufferingStreamObserver.java |  166 +
 .../fn/harness/stream/DirectStreamObserver.java |   71 +
 .../ForwardingClientResponseObserver.java       |   63 +
 .../harness/stream/StreamObserverFactory.java   |   91 +
 .../beam/fn/harness/stream/package-info.java    |   22 +
 .../beam/runners/core/BeamFnDataReadRunner.java |  104 +
 .../runners/core/BeamFnDataWriteRunner.java     |   87 +
 .../beam/runners/core/BoundedSourceRunner.java  |  105 +
 .../apache/beam/runners/core/package-info.java  |   22 +
 .../apache/beam/fn/harness/FnHarnessTest.java   |  130 +
 .../channel/ManagedChannelFactoryTest.java      |   74 +
 .../channel/SocketAddressFactoryTest.java       |   56 +
 .../control/BeamFnControlClientTest.java        |  182 +
 .../control/ProcessBundleHandlerTest.java       |  675 +++
 .../fn/harness/control/RegisterHandlerTest.java |   80 +
 ...BeamFnDataBufferingOutboundObserverTest.java |  147 +
 .../harness/data/BeamFnDataGrpcClientTest.java  |  318 ++
 .../data/BeamFnDataGrpcMultiplexerTest.java     |   98 +
 .../data/BeamFnDataInboundObserverTest.java     |  116 +
 .../logging/BeamFnLoggingClientTest.java        |  169 +
 .../fn/harness/stream/AdvancingPhaserTest.java  |   48 +
 .../stream/BufferingStreamObserverTest.java     |  146 +
 .../stream/DirectStreamObserverTest.java        |  139 +
 .../ForwardingClientResponseObserverTest.java   |   60 +
 .../stream/StreamObserverFactoryTest.java       |   84 +
 .../beam/fn/harness/test/TestExecutors.java     |   85 +
 .../beam/fn/harness/test/TestExecutorsTest.java |  160 +
 .../beam/fn/harness/test/TestStreams.java       |  162 +
 .../beam/fn/harness/test/TestStreamsTest.java   |   84 +
 .../runners/core/BeamFnDataReadRunnerTest.java  |  187 +
 .../runners/core/BeamFnDataWriteRunnerTest.java |  155 +
 .../runners/core/BoundedSourceRunnerTest.java   |  113 +
 sdks/java/io/elasticsearch/pom.xml              |  149 +
 .../sdk/io/elasticsearch/ElasticsearchIO.java   |  815 ++++
 .../beam/sdk/io/elasticsearch/package-info.java |   20 +
 .../src/test/contrib/create_elk_container.sh    |   24 +
 .../elasticsearch/ElasticSearchIOTestUtils.java |  129 +
 .../sdk/io/elasticsearch/ElasticsearchIOIT.java |  154 +
 .../io/elasticsearch/ElasticsearchIOTest.java   |  358 ++
 .../elasticsearch/ElasticsearchTestDataSet.java |  109 +
 .../elasticsearch/ElasticsearchTestOptions.java |   46 +
 sdks/java/io/google-cloud-platform/pom.xml      |   16 +-
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  450 +-
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |   23 +-
 .../io/gcp/bigquery/BigQueryServicesImpl.java   |  118 +-
 .../gcp/bigquery/BigQueryTableRowIterator.java  |   82 +-
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java    |    8 -
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 -
 .../beam/sdk/io/gcp/datastore/DatastoreV1.java  |  149 +-
 .../beam/sdk/io/gcp/storage/GcsFileSystem.java  |  210 +-
 .../io/gcp/storage/GcsFileSystemRegistrar.java  |    9 +-
 .../beam/sdk/io/gcp/storage/GcsResourceId.java  |  116 +
 .../apache/beam/sdk/io/gcp/ApiSurfaceTest.java  |  134 -
 .../beam/sdk/io/gcp/GcpApiSurfaceTest.java      |   79 +
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |  335 +-
 .../gcp/bigquery/BigQueryServicesImplTest.java  |  141 +
 .../bigquery/BigQueryTableRowIteratorTest.java  |   51 +-
 .../sdk/io/gcp/bigquery/BigQueryUtilTest.java   |    3 +-
 .../sdk/io/gcp/bigtable/BigtableIOTest.java     |   24 +-
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 +
 .../sdk/io/gcp/datastore/DatastoreV1Test.java   |   15 +-
 .../sdk/io/gcp/datastore/SplitQueryFnIT.java    |    2 +-
 .../sdk/io/gcp/storage/GcsFileSystemTest.java   |  274 ++
 .../sdk/io/gcp/storage/GcsResourceIdTest.java   |  147 +
 sdks/java/io/hadoop-common/pom.xml              |   86 +
 .../io/hadoop/SerializableConfiguration.java    |   96 +
 .../beam/sdk/io/hadoop/WritableCoder.java       |  116 +
 .../apache/beam/sdk/io/hadoop/package-info.java |   22 +
 .../hadoop/SerializableConfigurationTest.java   |   75 +
 .../beam/sdk/io/hadoop/WritableCoderTest.java   |   45 +
 sdks/java/io/hadoop-input-format/README.md      |  167 +
 sdks/java/io/hadoop-input-format/pom.xml        |  136 +
 .../hadoop/inputformat/HadoopInputFormatIO.java |  941 ++++
 .../sdk/io/hadoop/inputformat/package-info.java |   23 +
 .../ConfigurableEmployeeInputFormat.java        |  131 +
 .../sdk/io/hadoop/inputformat/Employee.java     |   85 +
 .../hadoop/inputformat/EmployeeInputFormat.java |  172 +
 .../inputformat/HadoopInputFormatIOTest.java    |  844 ++++
 .../ReuseObjectsEmployeeInputFormat.java        |  176 +
 .../hadoop/inputformat/TestEmployeeDataSet.java |   76 +
 sdks/java/io/hbase/pom.xml                      |  233 +
 .../org/apache/beam/sdk/io/hbase/HBaseIO.java   |  693 +++
 .../beam/sdk/io/hbase/HBaseMutationCoder.java   |   71 +
 .../beam/sdk/io/hbase/HBaseResultCoder.java     |   55 +
 .../beam/sdk/io/hbase/SerializableScan.java     |   55 +
 .../apache/beam/sdk/io/hbase/package-info.java  |   24 +
 .../apache/beam/sdk/io/hbase/HBaseIOTest.java   |  430 ++
 .../sdk/io/hbase/HBaseMutationCoderTest.java    |   52 +
 .../beam/sdk/io/hbase/HBaseResultCoderTest.java |   41 +
 .../beam/sdk/io/hbase/SerializableScanTest.java |   56 +
 sdks/java/io/hdfs/pom.xml                       |   64 +-
 .../beam/sdk/io/hdfs/AvroHDFSFileSource.java    |  142 -
 .../beam/sdk/io/hdfs/AvroWrapperCoder.java      |  114 -
 .../apache/beam/sdk/io/hdfs/HDFSFileSink.java   |  301 +-
 .../apache/beam/sdk/io/hdfs/HDFSFileSource.java |  528 ++-
 .../beam/sdk/io/hdfs/HadoopFileSystem.java      |   44 +-
 .../beam/sdk/io/hdfs/HadoopResourceId.java      |   42 +
 .../org/apache/beam/sdk/io/hdfs/UGIHelper.java  |   38 +
 .../apache/beam/sdk/io/hdfs/WritableCoder.java  |  116 -
 .../SimpleAuthAvroHDFSFileSource.java           |   82 -
 .../hdfs/simpleauth/SimpleAuthHDFSFileSink.java |  131 -
 .../simpleauth/SimpleAuthHDFSFileSource.java    |  117 -
 .../sdk/io/hdfs/simpleauth/package-info.java    |   22 -
 .../beam/sdk/io/hdfs/AvroWrapperCoderTest.java  |   51 -
 .../beam/sdk/io/hdfs/HDFSFileSinkTest.java      |  173 +
 .../beam/sdk/io/hdfs/HDFSFileSourceTest.java    |   60 +-
 .../beam/sdk/io/hdfs/WritableCoderTest.java     |   45 -
 sdks/java/io/jdbc/pom.xml                       |  125 +-
 sdks/java/io/jdbc/src/test/README.md            |   32 +
 .../org/apache/beam/sdk/io/jdbc/JdbcIOIT.java   |  178 +
 .../org/apache/beam/sdk/io/jdbc/JdbcIOTest.java |  120 +-
 .../beam/sdk/io/jdbc/JdbcTestDataSet.java       |  128 +
 .../beam/sdk/io/jdbc/PostgresTestOptions.java   |   60 +
 .../kubernetes/postgres-pod-no-vol.yml          |   32 +
 .../kubernetes/postgres-service-public.yml      |   28 +
 .../jdbc/src/test/resources/kubernetes/setup.sh |   20 +
 .../src/test/resources/kubernetes/teardown.sh   |   20 +
 sdks/java/io/jms/pom.xml                        |   22 +-
 .../java/org/apache/beam/sdk/io/jms/JmsIO.java  |  103 +-
 .../org/apache/beam/sdk/io/jms/JmsIOTest.java   |  137 +-
 sdks/java/io/kafka/pom.xml                      |   65 +-
 .../apache/beam/sdk/io/kafka/ConsumerSpEL.java  |   60 +
 .../beam/sdk/io/kafka/KafkaCheckpointMark.java  |   16 +
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   |  683 ++-
 .../beam/sdk/io/kafka/KafkaRecordCoder.java     |    4 +-
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   |  109 +-
 .../beam/sdk/io/kafka/KafkaRecordCoderTest.java |   34 +
 sdks/java/io/kinesis/pom.xml                    |   12 +-
 .../beam/sdk/io/kinesis/KinesisRecordCoder.java |    4 +-
 .../beam/sdk/io/kinesis/package-info.java       |    2 +-
 .../sdk/io/kinesis/KinesisMockReadTest.java     |    7 +-
 .../beam/sdk/io/kinesis/KinesisReaderIT.java    |    6 +-
 sdks/java/io/mongodb/pom.xml                    |   20 +-
 .../beam/sdk/io/mongodb/MongoDbGridFSIO.java    |    5 -
 .../apache/beam/sdk/io/mongodb/MongoDbIO.java   |    5 -
 .../sdk/io/mongodb/MongoDBGridFSIOTest.java     |    9 +-
 .../beam/sdk/io/mongodb/MongoDbIOTest.java      |    7 +-
 sdks/java/io/mqtt/pom.xml                       |  122 +
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |  592 +++
 .../apache/beam/sdk/io/mqtt/package-info.java   |   22 +
 .../org/apache/beam/sdk/io/mqtt/MqttIOTest.java |  257 +
 sdks/java/io/pom.xml                            |   76 +-
 sdks/java/java8tests/pom.xml                    |    3 +-
 .../beam/sdk/transforms/CombineJava8Test.java   |    8 +-
 .../beam/sdk/transforms/DistinctJava8Test.java  |    5 +-
 .../beam/sdk/transforms/FilterJava8Test.java    |    9 +-
 .../transforms/FlatMapElementsJava8Test.java    |    7 +-
 .../sdk/transforms/MapElementsJava8Test.java    |   33 +-
 .../beam/sdk/transforms/PartitionJava8Test.java |    7 +-
 .../sdk/transforms/SimpleFunctionJava8Test.java |   69 +
 .../beam/sdk/transforms/WithKeysJava8Test.java  |    6 +-
 .../sdk/transforms/WithTimestampsJava8Test.java |   14 +-
 sdks/java/javadoc/ant.xml                       |   96 +
 sdks/java/javadoc/pom.xml                       |  295 ++
 .../maven-archetypes/examples-java8/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   78 +-
 sdks/java/maven-archetypes/examples/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   96 +-
 sdks/java/maven-archetypes/pom.xml              |    2 +-
 sdks/java/maven-archetypes/starter/pom.xml      |    2 +-
 .../main/resources/archetype-resources/pom.xml  |    4 +-
 .../resources/projects/basic/reference/pom.xml  |    4 +-
 sdks/java/pom.xml                               |    6 +-
 sdks/pom.xml                                    |   17 +-
 sdks/python/.pylintrc                           |  164 +
 sdks/python/MANIFEST.in                         |   21 +
 sdks/python/README.md                           |  298 ++
 sdks/python/apache_beam/__init__.py             |   82 +
 sdks/python/apache_beam/coders/__init__.py      |   19 +
 sdks/python/apache_beam/coders/coder_impl.pxd   |  143 +
 sdks/python/apache_beam/coders/coder_impl.py    |  734 +++
 sdks/python/apache_beam/coders/coders.py        |  835 ++++
 sdks/python/apache_beam/coders/coders_test.py   |  115 +
 .../apache_beam/coders/coders_test_common.py    |  389 ++
 .../apache_beam/coders/fast_coders_test.py      |   37 +
 sdks/python/apache_beam/coders/observable.py    |   38 +
 .../apache_beam/coders/observable_test.py       |   57 +
 .../coders/proto2_coder_test_messages_pb2.py    |  318 ++
 .../apache_beam/coders/slow_coders_test.py      |   45 +
 sdks/python/apache_beam/coders/slow_stream.py   |  163 +
 .../apache_beam/coders/standard_coders_test.py  |  156 +
 sdks/python/apache_beam/coders/stream.pxd       |   66 +
 sdks/python/apache_beam/coders/stream.pyx       |  226 +
 sdks/python/apache_beam/coders/stream_test.py   |  180 +
 sdks/python/apache_beam/coders/typecoders.py    |  182 +
 .../apache_beam/coders/typecoders_test.py       |  124 +
 sdks/python/apache_beam/error.py                |   42 +
 sdks/python/apache_beam/examples/__init__.py    |   16 +
 .../apache_beam/examples/complete/__init__.py   |   16 +
 .../examples/complete/autocomplete.py           |   90 +
 .../examples/complete/autocomplete_test.py      |   52 +
 .../examples/complete/estimate_pi.py            |  128 +
 .../examples/complete/estimate_pi_test.py       |   52 +
 .../examples/complete/juliaset/__init__.py      |   16 +
 .../complete/juliaset/juliaset/__init__.py      |   16 +
 .../complete/juliaset/juliaset/juliaset.py      |  124 +
 .../complete/juliaset/juliaset/juliaset_test.py |   86 +
 .../examples/complete/juliaset/juliaset_main.py |   58 +
 .../examples/complete/juliaset/setup.py         |  116 +
 .../apache_beam/examples/complete/tfidf.py      |  208 +
 .../apache_beam/examples/complete/tfidf_test.py |   91 +
 .../examples/complete/top_wikipedia_sessions.py |  182 +
 .../complete/top_wikipedia_sessions_test.py     |   62 +
 .../apache_beam/examples/cookbook/__init__.py   |   16 +
 .../examples/cookbook/bigquery_schema.py        |  129 +
 .../examples/cookbook/bigquery_side_input.py    |  121 +
 .../cookbook/bigquery_side_input_test.py        |   54 +
 .../examples/cookbook/bigquery_tornadoes.py     |   99 +
 .../cookbook/bigquery_tornadoes_it_test.py      |   62 +
 .../cookbook/bigquery_tornadoes_test.py         |   45 +
 .../apache_beam/examples/cookbook/bigshuffle.py |   94 +
 .../examples/cookbook/bigshuffle_test.py        |   63 +
 .../apache_beam/examples/cookbook/coders.py     |  101 +
 .../examples/cookbook/coders_test.py            |   49 +
 .../examples/cookbook/combiners_test.py         |   74 +
 .../examples/cookbook/custom_ptransform.py      |  134 +
 .../examples/cookbook/custom_ptransform_test.py |   53 +
 .../examples/cookbook/datastore_wordcount.py    |  261 ++
 .../apache_beam/examples/cookbook/filters.py    |  107 +
 .../examples/cookbook/filters_test.py           |   69 +
 .../examples/cookbook/group_with_coder.py       |  122 +
 .../examples/cookbook/group_with_coder_test.py  |   89 +
 .../examples/cookbook/mergecontacts.py          |  133 +
 .../examples/cookbook/mergecontacts_test.py     |  125 +
 .../examples/cookbook/multiple_output_pardo.py  |  184 +
 .../cookbook/multiple_output_pardo_test.py      |   72 +
 .../apache_beam/examples/snippets/__init__.py   |   16 +
 .../apache_beam/examples/snippets/snippets.py   | 1158 +++++
 .../examples/snippets/snippets_test.py          |  904 ++++
 .../apache_beam/examples/streaming_wordcap.py   |   64 +
 .../apache_beam/examples/streaming_wordcount.py |   74 +
 sdks/python/apache_beam/examples/wordcount.py   |  116 +
 .../apache_beam/examples/wordcount_debugging.py |  163 +
 .../examples/wordcount_debugging_test.py        |   59 +
 .../apache_beam/examples/wordcount_it_test.py   |   59 +
 .../apache_beam/examples/wordcount_minimal.py   |  121 +
 .../examples/wordcount_minimal_test.py          |   59 +
 .../apache_beam/examples/wordcount_test.py      |   58 +
 sdks/python/apache_beam/internal/__init__.py    |   16 +
 .../python/apache_beam/internal/gcp/__init__.py |   16 +
 sdks/python/apache_beam/internal/gcp/auth.py    |  185 +
 .../apache_beam/internal/gcp/auth_test.py       |   44 +
 .../apache_beam/internal/gcp/json_value.py      |  147 +
 .../apache_beam/internal/gcp/json_value_test.py |   93 +
 sdks/python/apache_beam/internal/module_test.py |   62 +
 sdks/python/apache_beam/internal/pickler.py     |  230 +
 .../python/apache_beam/internal/pickler_test.py |   84 +
 sdks/python/apache_beam/internal/util.py        |  127 +
 sdks/python/apache_beam/internal/util_test.py   |   61 +
 sdks/python/apache_beam/io/__init__.py          |   38 +
 sdks/python/apache_beam/io/avroio.py            |  372 ++
 sdks/python/apache_beam/io/avroio_test.py       |  381 ++
 sdks/python/apache_beam/io/concat_source.py     |  263 ++
 .../python/apache_beam/io/concat_source_test.py |  231 +
 sdks/python/apache_beam/io/filebasedsource.py   |  329 ++
 .../apache_beam/io/filebasedsource_test.py      |  708 +++
 sdks/python/apache_beam/io/fileio.py            |  746 +++
 sdks/python/apache_beam/io/fileio_test.py       |  352 ++
 sdks/python/apache_beam/io/gcp/__init__.py      |   16 +
 sdks/python/apache_beam/io/gcp/bigquery.py      | 1081 +++++
 sdks/python/apache_beam/io/gcp/bigquery_test.py |  828 ++++
 .../apache_beam/io/gcp/datastore/__init__.py    |   16 +
 .../apache_beam/io/gcp/datastore/v1/__init__.py |   16 +
 .../io/gcp/datastore/v1/datastoreio.py          |  397 ++
 .../io/gcp/datastore/v1/datastoreio_test.py     |  245 +
 .../io/gcp/datastore/v1/fake_datastore.py       |   98 +
 .../apache_beam/io/gcp/datastore/v1/helper.py   |  274 ++
 .../io/gcp/datastore/v1/helper_test.py          |  265 ++
 .../io/gcp/datastore/v1/query_splitter.py       |  275 ++
 .../io/gcp/datastore/v1/query_splitter_test.py  |  208 +
 sdks/python/apache_beam/io/gcp/gcsio.py         |  871 ++++
 sdks/python/apache_beam/io/gcp/gcsio_test.py    |  796 ++++
 .../apache_beam/io/gcp/internal/__init__.py     |   16 +
 .../io/gcp/internal/clients/__init__.py         |   16 +
 .../gcp/internal/clients/bigquery/__init__.py   |   33 +
 .../clients/bigquery/bigquery_v2_client.py      |  660 +++
 .../clients/bigquery/bigquery_v2_messages.py    | 1910 ++++++++
 .../io/gcp/internal/clients/storage/__init__.py |   33 +
 .../clients/storage/storage_v1_client.py        | 1039 +++++
 .../clients/storage/storage_v1_messages.py      | 1920 ++++++++
 sdks/python/apache_beam/io/gcp/pubsub.py        |   91 +
 sdks/python/apache_beam/io/gcp/pubsub_test.py   |   63 +
 .../python/apache_beam/io/gcp/tests/__init__.py |   16 +
 .../io/gcp/tests/bigquery_matcher.py            |  108 +
 .../io/gcp/tests/bigquery_matcher_test.py       |  108 +
 sdks/python/apache_beam/io/iobase.py            |  987 ++++
 sdks/python/apache_beam/io/range_trackers.py    |  532 +++
 .../apache_beam/io/range_trackers_test.py       |  590 +++
 sdks/python/apache_beam/io/source_test_utils.py |  642 +++
 .../apache_beam/io/source_test_utils_test.py    |  122 +
 sdks/python/apache_beam/io/sources_test.py      |  111 +
 sdks/python/apache_beam/io/textio.py            |  448 ++
 sdks/python/apache_beam/io/textio_test.py       |  718 +++
 sdks/python/apache_beam/io/tfrecordio.py        |  271 ++
 sdks/python/apache_beam/io/tfrecordio_test.py   |  389 ++
 sdks/python/apache_beam/metrics/__init__.py     |   17 +
 sdks/python/apache_beam/metrics/cells.py        |  315 ++
 sdks/python/apache_beam/metrics/cells_test.py   |  143 +
 sdks/python/apache_beam/metrics/execution.pxd   |   31 +
 sdks/python/apache_beam/metrics/execution.py    |  229 +
 .../apache_beam/metrics/execution_test.py       |  131 +
 sdks/python/apache_beam/metrics/metric.py       |  202 +
 sdks/python/apache_beam/metrics/metric_test.py  |  128 +
 sdks/python/apache_beam/metrics/metricbase.py   |   82 +
 sdks/python/apache_beam/pipeline.py             |  442 ++
 sdks/python/apache_beam/pipeline_test.py        |  444 ++
 sdks/python/apache_beam/pvalue.py               |  468 ++
 sdks/python/apache_beam/pvalue_test.py          |   68 +
 sdks/python/apache_beam/runners/__init__.py     |   30 +
 sdks/python/apache_beam/runners/api/__init__.py |   16 +
 .../runners/api/beam_runner_api_pb2.py          | 2772 +++++++++++
 sdks/python/apache_beam/runners/common.pxd      |   77 +
 sdks/python/apache_beam/runners/common.py       |  436 ++
 .../apache_beam/runners/dataflow/__init__.py    |   16 +
 .../runners/dataflow/dataflow_metrics.py        |  111 +
 .../runners/dataflow/dataflow_metrics_test.py   |  148 +
 .../runners/dataflow/dataflow_runner.py         |  729 +++
 .../runners/dataflow/dataflow_runner_test.py    |  181 +
 .../runners/dataflow/internal/__init__.py       |   16 +
 .../runners/dataflow/internal/apiclient.py      |  739 +++
 .../runners/dataflow/internal/apiclient_test.py |   96 +
 .../dataflow/internal/clients/__init__.py       |   16 +
 .../internal/clients/dataflow/__init__.py       |   33 +
 .../clients/dataflow/dataflow_v1b3_client.py    |  694 +++
 .../clients/dataflow/dataflow_v1b3_messages.py  | 4392 ++++++++++++++++++
 .../clients/dataflow/message_matchers.py        |  124 +
 .../clients/dataflow/message_matchers_test.py   |   77 +
 .../runners/dataflow/internal/dependency.py     |  522 +++
 .../dataflow/internal/dependency_test.py        |  425 ++
 .../runners/dataflow/internal/names.py          |   82 +
 .../runners/dataflow/native_io/__init__.py      |   16 +
 .../runners/dataflow/native_io/iobase.py        |  318 ++
 .../runners/dataflow/template_runner_test.py    |   97 +
 .../runners/dataflow/test_dataflow_runner.py    |   40 +
 .../apache_beam/runners/direct/__init__.py      |   19 +
 .../runners/direct/bundle_factory.py            |  201 +
 sdks/python/apache_beam/runners/direct/clock.py |   50 +
 .../consumer_tracking_pipeline_visitor.py       |   59 +
 .../consumer_tracking_pipeline_visitor_test.py  |  127 +
 .../runners/direct/direct_metrics.py            |  112 +
 .../runners/direct/direct_metrics_test.py       |  211 +
 .../apache_beam/runners/direct/direct_runner.py |  173 +
 .../runners/direct/evaluation_context.py        |  283 ++
 .../apache_beam/runners/direct/executor.py      |  578 +++
 .../runners/direct/helper_transforms.py         |   99 +
 .../runners/direct/transform_evaluator.py       |  558 +++
 .../runners/direct/transform_result.py          |   64 +
 .../runners/direct/watermark_manager.py         |  224 +
 .../apache_beam/runners/pipeline_context.py     |   88 +
 .../runners/pipeline_context_test.py            |   49 +
 sdks/python/apache_beam/runners/runner.py       |  368 ++
 sdks/python/apache_beam/runners/runner_test.py  |  123 +
 .../python/apache_beam/runners/test/__init__.py |   30 +
 sdks/python/apache_beam/test_pipeline.py        |  163 +
 sdks/python/apache_beam/test_pipeline_test.py   |  112 +
 sdks/python/apache_beam/tests/__init__.py       |   16 +
 sdks/python/apache_beam/tests/data/README.md    |   20 +
 .../apache_beam/tests/data/privatekey.p12       |  Bin 0 -> 2452 bytes
 .../apache_beam/tests/data/standard_coders.yaml |  196 +
 .../apache_beam/tests/pipeline_verifiers.py     |  119 +
 .../tests/pipeline_verifiers_test.py            |  123 +
 sdks/python/apache_beam/tests/test_utils.py     |   69 +
 sdks/python/apache_beam/transforms/__init__.py  |   25 +
 sdks/python/apache_beam/transforms/combiners.py |  595 +++
 .../apache_beam/transforms/combiners_test.py    |  324 ++
 sdks/python/apache_beam/transforms/core.py      | 1389 ++++++
 .../apache_beam/transforms/cy_combiners.pxd     |   92 +
 .../apache_beam/transforms/cy_combiners.py      |  306 ++
 sdks/python/apache_beam/transforms/display.py   |  331 ++
 .../apache_beam/transforms/display_test.py      |  216 +
 .../python/apache_beam/transforms/ptransform.py |  671 +++
 .../apache_beam/transforms/ptransform_test.py   | 1941 ++++++++
 .../python/apache_beam/transforms/sideinputs.py |  214 +
 .../apache_beam/transforms/sideinputs_test.py   |  337 ++
 sdks/python/apache_beam/transforms/timeutil.py  |  133 +
 sdks/python/apache_beam/transforms/trigger.py   | 1109 +++++
 .../apache_beam/transforms/trigger_test.py      |  601 +++
 .../transforms/trigger_transcripts.yaml         |  224 +
 sdks/python/apache_beam/transforms/util.py      |  235 +
 sdks/python/apache_beam/transforms/window.py    |  475 ++
 .../apache_beam/transforms/window_test.py       |  261 ++
 .../transforms/write_ptransform_test.py         |  126 +
 sdks/python/apache_beam/typehints/__init__.py   |   22 +
 sdks/python/apache_beam/typehints/decorators.py |  532 +++
 sdks/python/apache_beam/typehints/opcodes.py    |  334 ++
 .../apache_beam/typehints/trivial_inference.py  |  417 ++
 .../typehints/trivial_inference_test.py         |  151 +
 sdks/python/apache_beam/typehints/typecheck.py  |  178 +
 .../typehints/typed_pipeline_test.py            |  251 +
 sdks/python/apache_beam/typehints/typehints.py  | 1062 +++++
 .../apache_beam/typehints/typehints_test.py     | 1062 +++++
 sdks/python/apache_beam/utils/__init__.py       |   22 +
 sdks/python/apache_beam/utils/annotations.py    |  103 +
 .../apache_beam/utils/annotations_test.py       |  126 +
 sdks/python/apache_beam/utils/counters.pxd      |   30 +
 sdks/python/apache_beam/utils/counters.py       |  183 +
 sdks/python/apache_beam/utils/path.py           |   47 +
 sdks/python/apache_beam/utils/path_test.py      |   70 +
 .../apache_beam/utils/pipeline_options.py       |  557 +++
 .../apache_beam/utils/pipeline_options_test.py  |  192 +
 .../utils/pipeline_options_validator.py         |  199 +
 .../utils/pipeline_options_validator_test.py    |  342 ++
 sdks/python/apache_beam/utils/processes.py      |   52 +
 sdks/python/apache_beam/utils/processes_test.py |  106 +
 sdks/python/apache_beam/utils/profiler.py       |  148 +
 sdks/python/apache_beam/utils/proto_utils.py    |   54 +
 sdks/python/apache_beam/utils/retry.py          |  207 +
 sdks/python/apache_beam/utils/retry_test.py     |  221 +
 sdks/python/apache_beam/utils/timestamp.py      |  213 +
 sdks/python/apache_beam/utils/timestamp_test.py |  168 +
 sdks/python/apache_beam/utils/urns.py           |   24 +
 .../python/apache_beam/utils/windowed_value.pxd |   38 +
 sdks/python/apache_beam/utils/windowed_value.py |  122 +
 .../apache_beam/utils/windowed_value_test.py    |   71 +
 sdks/python/apache_beam/version.py              |   57 +
 sdks/python/generate_pydoc.sh                   |   80 +
 sdks/python/pom.xml                             |  191 +
 sdks/python/run_postcommit.sh                   |  102 +
 sdks/python/run_pylint.sh                       |   52 +
 sdks/python/setup.cfg                           |   27 +
 sdks/python/setup.py                            |  155 +
 sdks/python/test_config.py                      |   44 +
 sdks/python/tox.ini                             |   89 +
 1339 files changed, 138577 insertions(+), 31086 deletions(-)
----------------------------------------------------------------------



[35/50] [abbrv] beam git commit: This closes #2221

Posted by ke...@apache.org.
This closes #2221


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/9299e263
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/9299e263
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/9299e263

Branch: refs/heads/gearpump-runner
Commit: 9299e2632da4c368e29c6a38df5e0b0293faf5e5
Parents: e31cb2b ed5cb8a
Author: Davor Bonaci <da...@google.com>
Authored: Fri Mar 10 13:46:11 2017 -0800
Committer: Davor Bonaci <da...@google.com>
Committed: Fri Mar 10 13:46:11 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml | 4 ----
 1 file changed, 4 deletions(-)
----------------------------------------------------------------------



[05/50] [abbrv] beam git commit: This closes #2197: Bump Dataflow ROS timeout to 120 minutes

Posted by ke...@apache.org.
This closes #2197: Bump Dataflow ROS timeout to 120 minutes


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f13a84d6
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f13a84d6
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f13a84d6

Branch: refs/heads/gearpump-runner
Commit: f13a84d67e30dbf1a74df79ce964c18de850bf65
Parents: b79dd64 f117913
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Mar 9 19:31:58 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Thu Mar 9 19:31:58 2017 -0800

----------------------------------------------------------------------
 .jenkins/common_job_properties.groovy                           | 5 +++--
 .../job_beam_PostCommit_Java_RunnableOnService_Dataflow.groovy  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------



[09/50] [abbrv] beam git commit: Auto-generated runner api proto bindings.

Posted by ke...@apache.org.
Auto-generated runner api proto bindings.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3bb125e1
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3bb125e1
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3bb125e1

Branch: refs/heads/gearpump-runner
Commit: 3bb125e12d625216c234fe396168843e6669c1e5
Parents: f13a84d
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Tue Mar 7 12:02:08 2017 -0800
Committer: Robert Bradshaw <ro...@gmail.com>
Committed: Thu Mar 9 20:29:00 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/runners/api/__init__.py |    0
 .../runners/api/beam_runner_api_pb2.py          | 2755 ++++++++++++++++++
 sdks/python/run_pylint.sh                       |    3 +-
 3 files changed, 2757 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3bb125e1/sdks/python/apache_beam/runners/api/__init__.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/api/__init__.py b/sdks/python/apache_beam/runners/api/__init__.py
new file mode 100644
index 0000000..e69de29


[45/50] [abbrv] beam git commit: Revert BigQueryIO bit of 'Make all uses of CountingOutputStream close their resources'

Posted by ke...@apache.org.
Revert BigQueryIO bit of 'Make all uses of CountingOutputStream close their resources'

This reverts the portion of commit 3115dbdca1858511e98476b5c79e6cca98782b0b
that touches BigQueryIO, which caused a double close bug.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/839c906a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/839c906a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/839c906a

Branch: refs/heads/gearpump-runner
Commit: 839c906a583bef6ef7c0739479231f096df58bef
Parents: b6ca062
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri Mar 10 19:01:23 2017 -0800
Committer: Amit Sela <am...@gmail.com>
Committed: Sun Mar 12 11:58:43 2017 +0200

----------------------------------------------------------------------
 .../java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/839c906a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index 81aa50b..0e1c6fc 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -2272,9 +2272,7 @@ public class BigQueryIO {
 
       public final KV<String, Long> close() throws IOException {
         channel.close();
-        KV<String, Long> record = KV.of(fileName, out.getCount());
-        out.close();
-        return record;
+        return KV.of(fileName, out.getCount());
       }
     }
 


[25/50] [abbrv] beam git commit: add unicode type to the typeDict attribute in Python SDK

Posted by ke...@apache.org.
add unicode type to the typeDict attribute in Python SDK


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7daf9abd
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7daf9abd
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7daf9abd

Branch: refs/heads/gearpump-runner
Commit: 7daf9abd52d4b89c563d370396506d2db71b7700
Parents: 818fc94
Author: tajima <ta...@tajima-taso.jp>
Authored: Thu Mar 9 18:08:25 2017 +0900
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 11:56:06 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/transforms/display.py      |  1 +
 sdks/python/apache_beam/transforms/display_test.py | 11 +++++++++++
 2 files changed, 12 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/7daf9abd/sdks/python/apache_beam/transforms/display.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py
index 5e25060..2ced1af 100644
--- a/sdks/python/apache_beam/transforms/display.py
+++ b/sdks/python/apache_beam/transforms/display.py
@@ -160,6 +160,7 @@ class DisplayDataItem(object):
   display item belongs to.
   """
   typeDict = {str:'STRING',
+              unicode:'STRING',
               int:'INTEGER',
               float:'FLOAT',
               bool: 'BOOLEAN',

http://git-wip-us.apache.org/repos/asf/beam/blob/7daf9abd/sdks/python/apache_beam/transforms/display_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/display_test.py b/sdks/python/apache_beam/transforms/display_test.py
index 5a95c42..5e106e5 100644
--- a/sdks/python/apache_beam/transforms/display_test.py
+++ b/sdks/python/apache_beam/transforms/display_test.py
@@ -122,6 +122,17 @@ class DisplayDataTest(unittest.TestCase):
         DisplayDataItemMatcher('extra_packages',
                                str(['package1', 'package2']))))
 
+  def test_unicode_type_display_data(self):
+    class MyDoFn(beam.DoFn):
+      def display_data(self):
+        return {'unicode_string': unicode('my string'),
+                'unicode_literal_string': u'my literal string'}
+
+    fn = MyDoFn()
+    dd = DisplayData.create_from(fn)
+    for item in dd.items:
+      self.assertEqual(item.type, 'STRING')
+
   def test_base_cases(self):
     """ Tests basic display data cases (key:value, key:dict)
     It does not test subcomponent inclusion


[42/50] [abbrv] beam git commit: This closes #2109: Allow `PAssert`s to take a message

Posted by ke...@apache.org.
This closes #2109: Allow `PAssert`s to take a message

  Remove exception suppression from PAssert.SideInputCheckerDoFn
  Added assertion failure tests for `PAssert#thatSingleton`
  Added a test of default PAssert failure reason
  Javadoc changes
  [BEAM-1551] Allow `PAssert`s to take a message


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d1671530
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d1671530
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d1671530

Branch: refs/heads/gearpump-runner
Commit: d1671530951bd67956f6d022b523053c12a04df3
Parents: 7c78480 3669146
Author: Kenneth Knowles <kl...@google.com>
Authored: Sat Mar 11 07:04:30 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Sat Mar 11 07:04:30 2017 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/testing/PAssert.java    | 183 ++++++++++++++-----
 .../apache/beam/sdk/testing/PAssertTest.java    |  56 +++++-
 2 files changed, 188 insertions(+), 51 deletions(-)
----------------------------------------------------------------------



[16/50] [abbrv] beam git commit: Move GC timer checking to StatefulDoFnRunner.CleanupTimer

Posted by ke...@apache.org.
Move GC timer checking to StatefulDoFnRunner.CleanupTimer


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bf6d2748
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bf6d2748
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bf6d2748

Branch: refs/heads/gearpump-runner
Commit: bf6d2748c8876a7415290069163625598928f02f
Parents: 2c2424c
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Fri Mar 10 08:29:27 2017 +0100
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri Mar 10 11:09:04 2017 +0100

----------------------------------------------------------------------
 .../beam/runners/core/StatefulDoFnRunner.java   | 29 ++++++++++++++++----
 1 file changed, 23 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/bf6d2748/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
index 154d8bc..926345e 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/StatefulDoFnRunner.java
@@ -115,15 +115,12 @@ public class StatefulDoFnRunner<InputT, OutputT, W extends BoundedWindow>
   @Override
   public void onTimer(
       String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
-    boolean isEventTimer = timeDomain.equals(TimeDomain.EVENT_TIME);
-    Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
-    if (isEventTimer && GC_TIMER_ID.equals(timerId) && gcTime.equals(timestamp)) {
+    if (cleanupTimer.isForWindow(timerId, window, timestamp, timeDomain)) {
       stateCleaner.clearForWindow(window);
       // There should invoke the onWindowExpiration of DoFn
     } else {
-      if (isEventTimer || !dropLateData(window)) {
-        doFnRunner.onTimer(timerId, window, timestamp, timeDomain);
-      }
+      // a timer can never be late because we don't allow setting timers after GC time
+      doFnRunner.onTimer(timerId, window, timestamp, timeDomain);
     }
   }
 
@@ -151,6 +148,16 @@ public class StatefulDoFnRunner<InputT, OutputT, W extends BoundedWindow>
      * Set the garbage collect time of the window to timer.
      */
     void setForWindow(BoundedWindow window);
+
+    /**
+     * Checks whether the given timer is a cleanup timer for the window.
+     */
+    boolean isForWindow(
+        String timerId,
+        BoundedWindow window,
+        Instant timestamp,
+        TimeDomain timeDomain);
+
   }
 
   /**
@@ -191,6 +198,16 @@ public class StatefulDoFnRunner<InputT, OutputT, W extends BoundedWindow>
           GC_TIMER_ID, gcTime, TimeDomain.EVENT_TIME);
     }
 
+    @Override
+    public boolean isForWindow(
+        String timerId,
+        BoundedWindow window,
+        Instant timestamp,
+        TimeDomain timeDomain) {
+      boolean isEventTimer = timeDomain.equals(TimeDomain.EVENT_TIME);
+      Instant gcTime = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness());
+      return isEventTimer && GC_TIMER_ID.equals(timerId) && gcTime.equals(timestamp);
+    }
   }
 
   /**


[23/50] [abbrv] beam git commit: [BEAM-1686] This closes #2219

Posted by ke...@apache.org.
[BEAM-1686] This closes #2219


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/818fc941
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/818fc941
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/818fc941

Branch: refs/heads/gearpump-runner
Commit: 818fc9412f0b5604c6c6ae4ba4ed957bafcb0535
Parents: fdba784 752ad8a
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Fri Mar 10 17:02:54 2017 +0100
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Mar 10 17:02:54 2017 +0100

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |  4 ++
 .../org/apache/beam/sdk/io/mqtt/MqttIOTest.java | 61 ++++++++++++++++++++
 2 files changed, 65 insertions(+)
----------------------------------------------------------------------



[34/50] [abbrv] beam git commit: Remove exception suppression from PAssert.SideInputCheckerDoFn

Posted by ke...@apache.org.
Remove exception suppression from PAssert.SideInputCheckerDoFn


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3669146c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3669146c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3669146c

Branch: refs/heads/gearpump-runner
Commit: 3669146c95e265c0fbde4444ce7d04f6b787cdac
Parents: 59fd45b
Author: Aviem Zur <av...@gmail.com>
Authored: Fri Mar 10 23:15:15 2017 +0200
Committer: Aviem Zur <av...@gmail.com>
Committed: Fri Mar 10 23:15:15 2017 +0200

----------------------------------------------------------------------
 .../main/java/org/apache/beam/sdk/testing/PAssert.java  | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3669146c/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
index d88c4d6..2596335 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
@@ -40,7 +40,6 @@ import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.MapCoder;
 import org.apache.beam.sdk.coders.VarIntCoder;
-import org.apache.beam.sdk.options.StreamingOptions;
 import org.apache.beam.sdk.runners.PipelineRunner;
 import org.apache.beam.sdk.runners.TransformHierarchy.Node;
 import org.apache.beam.sdk.transforms.Aggregator;
@@ -1102,15 +1101,8 @@ public class PAssert {
 
     @ProcessElement
     public void processElement(ProcessContext c) {
-      try {
-        ActualT actualContents = c.sideInput(actual);
-        doChecks(actualContents, checkerFn, success, failure);
-      } catch (Throwable t) {
-        // Suppress exception in streaming
-        if (!c.getPipelineOptions().as(StreamingOptions.class).isStreaming()) {
-          throw t;
-        }
-      }
+      ActualT actualContents = c.sideInput(actual);
+      doChecks(actualContents, checkerFn, success, failure);
     }
   }
 


[38/50] [abbrv] beam git commit: Ignore results from the tox clean up phase

Posted by ke...@apache.org.
Ignore results from the tox clean up phase

Some temporary files are generated only under certain conditions and
this should not fail tox.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/12016e59
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/12016e59
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/12016e59

Branch: refs/heads/gearpump-runner
Commit: 12016e59e84a841afcfcd55402daf7701460dcbc
Parents: 39688d8
Author: Ahmet Altay <al...@google.com>
Authored: Fri Mar 10 16:21:17 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 16:21:17 2017 -0800

----------------------------------------------------------------------
 sdks/python/tox.ini | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/12016e59/sdks/python/tox.ini
----------------------------------------------------------------------
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 807fe3f..2ed21c6 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -51,11 +51,11 @@ commands =
   pip install -e .[test]
   python apache_beam/examples/complete/autocomplete_test.py
   python setup.py test
-  # Clean up all cython generated files.
-  find apache_beam -type f -name '*.c' -delete
-  find apache_beam -type f -name '*.so' -delete
-  find target/build -type f -name '*.c' -delete
-  find target/build -type f -name '*.so' -delete
+  # Clean up all cython generated files. Ignore if deletion fails.
+  - find apache_beam -type f -name '*.c' -delete
+  - find apache_beam -type f -name '*.so' -delete
+  - find target/build -type f -name '*.c' -delete
+  - find target/build -type f -name '*.so' -delete
 passenv = TRAVIS*
 
 [testenv:py27gcp]


[37/50] [abbrv] beam git commit: This closes #2222

Posted by ke...@apache.org.
This closes #2222


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/39688d8d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/39688d8d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/39688d8d

Branch: refs/heads/gearpump-runner
Commit: 39688d8d48b92702b7b3a0b8744ba0f6f018f967
Parents: 9299e26 ec6da89
Author: Ahmet Altay <al...@google.com>
Authored: Fri Mar 10 14:23:09 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 14:23:09 2017 -0800

----------------------------------------------------------------------
 sdks/python/MANIFEST.in | 2 ++
 sdks/python/tox.ini     | 2 ++
 2 files changed, 4 insertions(+)
----------------------------------------------------------------------



[11/50] [abbrv] beam git commit: Runner API translation of triggers and windowing strategies.

Posted by ke...@apache.org.
Runner API translation of triggers and windowing strategies.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/5b86e1fc
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/5b86e1fc
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/5b86e1fc

Branch: refs/heads/gearpump-runner
Commit: 5b86e1fc22234a7a6dd00696326fa0fae8fe7a2d
Parents: aad32b7
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Tue Mar 7 16:18:02 2017 -0800
Committer: Robert Bradshaw <ro...@gmail.com>
Committed: Thu Mar 9 20:29:01 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/coders/coders.py        |  20 +++
 sdks/python/apache_beam/pipeline.py             |   2 +-
 sdks/python/apache_beam/transforms/core.py      |  38 +++++
 sdks/python/apache_beam/transforms/trigger.py   | 143 ++++++++++++++++++-
 .../apache_beam/transforms/trigger_test.py      |  33 +++++
 sdks/python/apache_beam/transforms/window.py    |  34 +++--
 .../apache_beam/transforms/window_test.py       |  23 ++-
 7 files changed, 272 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/5b86e1fc/sdks/python/apache_beam/coders/coders.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py
index 1d29f32..fd72af8 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -22,6 +22,8 @@ import cPickle as pickle
 import google.protobuf
 
 from apache_beam.coders import coder_impl
+from apache_beam.utils import urns
+from apache_beam.utils import proto_utils
 
 # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
 try:
@@ -182,6 +184,24 @@ class Coder(object):
             and self._dict_without_impl() == other._dict_without_impl())
     # pylint: enable=protected-access
 
+  def to_runner_api(self, context):
+    # TODO(BEAM-115): Use specialized URNs and components.
+    from apache_beam.runners.api import beam_runner_api_pb2
+    return beam_runner_api_pb2.Coder(
+        spec=beam_runner_api_pb2.FunctionSpec(
+            spec=beam_runner_api_pb2.UrnWithParameter(
+                urn=urns.PICKLED_CODER,
+                parameter=proto_utils.pack_Any(
+                    google.protobuf.wrappers_pb2.BytesValue(
+                        value=serialize_coder(self))))))
+
+  @staticmethod
+  def from_runner_api(proto, context):
+    any_proto = proto.spec.spec.parameter
+    bytes_proto = google.protobuf.wrappers_pb2.BytesValue()
+    any_proto.Unpack(bytes_proto)
+    return deserialize_coder(bytes_proto.value)
+
 
 class StrUtf8Coder(Coder):
   """A coder used for reading and writing strings as UTF-8."""

http://git-wip-us.apache.org/repos/asf/beam/blob/5b86e1fc/sdks/python/apache_beam/pipeline.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py
index 4ec2e47..9edcf9b 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py
@@ -499,6 +499,6 @@ class PipelineContext(object):
 
   def to_runner_api(self):
     context_proto = beam_runner_api_pb2.Components()
-    for name, cls in self._COMPONENT_TYEPS:
+    for name, cls in self._COMPONENT_TYPES:
       getattr(self, name).populate_map(getattr(context_proto, name))
     return context_proto

http://git-wip-us.apache.org/repos/asf/beam/blob/5b86e1fc/sdks/python/apache_beam/transforms/core.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py
index 7abd784..1fc63b2 100644
--- a/sdks/python/apache_beam/transforms/core.py
+++ b/sdks/python/apache_beam/transforms/core.py
@@ -27,6 +27,7 @@ from apache_beam import pvalue
 from apache_beam import typehints
 from apache_beam.coders import typecoders
 from apache_beam.internal import util
+from apache_beam.runners.api import beam_runner_api_pb2
 from apache_beam.transforms import ptransform
 from apache_beam.transforms.display import HasDisplayData, DisplayDataItem
 from apache_beam.transforms.ptransform import PTransform
@@ -49,6 +50,7 @@ from apache_beam.typehints import WithTypeHints
 from apache_beam.typehints.trivial_inference import element_type
 from apache_beam.utils.pipeline_options import TypeOptions
 
+
 # Type variables
 T = typehints.TypeVariable('T')
 K = typehints.TypeVariable('K')
@@ -1207,9 +1209,45 @@ class Windowing(object):
                                           self.accumulation_mode,
                                           self.output_time_fn)
 
+  def __eq__(self, other):
+    if type(self) == type(other):
+      if self._is_default and other._is_default:
+        return True
+      else:
+        return (
+            self.windowfn == other.windowfn
+            and self.triggerfn == other.triggerfn
+            and self.accumulation_mode == other.accumulation_mode
+            and self.output_time_fn == other.output_time_fn)
+
   def is_default(self):
     return self._is_default
 
+  def to_runner_api(self, context):
+    return beam_runner_api_pb2.WindowingStrategy(
+        window_fn=self.windowfn.to_runner_api(context),
+        # TODO(robertwb): Prohibit implicit multi-level merging.
+        merge_status=(beam_runner_api_pb2.NEEDS_MERGE
+                      if self.windowfn.is_merging()
+                      else beam_runner_api_pb2.NON_MERGING),
+        window_coder_id=context.coders.get_id(
+            self.windowfn.get_window_coder()),
+        trigger=self.triggerfn.to_runner_api(context),
+        accumulation_mode=self.accumulation_mode,
+        output_time=self.output_time_fn,
+        closing_behavior=beam_runner_api_pb2.EMIT_ALWAYS,
+        allowed_lateness=0)
+
+  @staticmethod
+  def from_runner_api(proto, context):
+    # pylint: disable=wrong-import-order, wrong-import-position
+    from apache_beam.transforms.trigger import TriggerFn
+    return Windowing(
+        windowfn=WindowFn.from_runner_api(proto.window_fn, context),
+        triggerfn=TriggerFn.from_runner_api(proto.trigger, context),
+        accumulation_mode=proto.accumulation_mode,
+        output_time_fn=proto.output_time)
+
 
 @typehints.with_input_types(T)
 @typehints.with_output_types(T)

http://git-wip-us.apache.org/repos/asf/beam/blob/5b86e1fc/sdks/python/apache_beam/transforms/trigger.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/trigger.py b/sdks/python/apache_beam/transforms/trigger.py
index 04198ba..b55d602 100644
--- a/sdks/python/apache_beam/transforms/trigger.py
+++ b/sdks/python/apache_beam/transforms/trigger.py
@@ -35,13 +35,14 @@ from apache_beam.transforms.window import GlobalWindow
 from apache_beam.transforms.window import OutputTimeFn
 from apache_beam.transforms.window import WindowedValue
 from apache_beam.transforms.window import WindowFn
+from apache_beam.runners.api import beam_runner_api_pb2
 
 
 class AccumulationMode(object):
   """Controls what to do with data when a trigger fires multiple times.
   """
-  DISCARDING = 1
-  ACCUMULATING = 2
+  DISCARDING = beam_runner_api_pb2.DISCARDING
+  ACCUMULATING = beam_runner_api_pb2.ACCUMULATING
   # TODO(robertwb): Provide retractions of previous outputs.
   # RETRACTING = 3
 
@@ -185,6 +186,26 @@ class TriggerFn(object):
     pass
 # pylint: enable=unused-argument
 
+  @staticmethod
+  def from_runner_api(proto, context):
+    return {
+        'after_all': AfterAll,
+        'after_any': AfterFirst,
+        'after_each': AfterEach,
+        'after_end_of_widow': AfterWatermark,
+        # after_processing_time, after_synchronized_processing_time
+        # always
+        'default': DefaultTrigger,
+        'element_count': AfterCount,
+        # never
+        'or_finally': OrFinally,
+        'repeat': Repeatedly,
+    }[proto.WhichOneof('trigger')].from_runner_api(proto, context)
+
+  @abstractmethod
+  def to_runner_api(self, unused_context):
+    pass
+
 
 class DefaultTrigger(TriggerFn):
   """Semantically Repeatedly(AfterWatermark()), but more optimized."""
@@ -216,6 +237,14 @@ class DefaultTrigger(TriggerFn):
   def __eq__(self, other):
     return type(self) == type(other)
 
+  @staticmethod
+  def from_runner_api(proto, context):
+    return DefaultTrigger()
+
+  def to_runner_api(self, unused_context):
+    return beam_runner_api_pb2.Trigger(
+        default=beam_runner_api_pb2.Trigger.Default())
+
 
 class AfterWatermark(TriggerFn):
   """Fire exactly once when the watermark passes the end of the window.
@@ -235,9 +264,9 @@ class AfterWatermark(TriggerFn):
   def __repr__(self):
     qualifiers = []
     if self.early:
-      qualifiers.append('early=%s' % self.early)
+      qualifiers.append('early=%s' % self.early.underlying)
     if self.late:
-      qualifiers.append('late=%s' % self.late)
+      qualifiers.append('late=%s' % self.late.underlying)
     return 'AfterWatermark(%s)' % ', '.join(qualifiers)
 
   def is_late(self, context):
@@ -305,6 +334,28 @@ class AfterWatermark(TriggerFn):
   def __hash__(self):
     return hash((type(self), self.early, self.late))
 
+  @staticmethod
+  def from_runner_api(proto, context):
+    return AfterWatermark(
+        early=TriggerFn.from_runner_api(
+            proto.after_end_of_widow.early_firings, context)
+        if proto.after_end_of_widow.HasField('early_firings')
+        else None,
+        late=TriggerFn.from_runner_api(
+            proto.after_end_of_widow.late_firings, context)
+        if proto.after_end_of_widow.HasField('late_firings')
+        else None)
+
+  def to_runner_api(self, context):
+    early_proto = self.early.underlying.to_runner_api(
+        context) if self.early else None
+    late_proto = self.late.underlying.to_runner_api(
+        context) if self.late else None
+    return beam_runner_api_pb2.Trigger(
+        after_end_of_widow=beam_runner_api_pb2.Trigger.AfterEndOfWindow(
+            early_firings=early_proto,
+            late_firings=late_proto))
+
 
 class AfterCount(TriggerFn):
   """Fire when there are at least count elements in this window pane."""
@@ -317,6 +368,9 @@ class AfterCount(TriggerFn):
   def __repr__(self):
     return 'AfterCount(%s)' % self.count
 
+  def __eq__(self, other):
+    return type(self) == type(other) and self.count == other.count
+
   def on_element(self, element, window, context):
     context.add_state(self.COUNT_TAG, 1)
 
@@ -333,6 +387,15 @@ class AfterCount(TriggerFn):
   def reset(self, window, context):
     context.clear_state(self.COUNT_TAG)
 
+  @staticmethod
+  def from_runner_api(proto, unused_context):
+    return AfterCount(proto.element_count.element_count)
+
+  def to_runner_api(self, unused_context):
+    return beam_runner_api_pb2.Trigger(
+        element_count=beam_runner_api_pb2.Trigger.ElementCount(
+            element_count=self.count))
+
 
 class Repeatedly(TriggerFn):
   """Repeatedly invoke the given trigger, never finishing."""
@@ -343,6 +406,9 @@ class Repeatedly(TriggerFn):
   def __repr__(self):
     return 'Repeatedly(%s)' % self.underlying
 
+  def __eq__(self, other):
+    return type(self) == type(other) and self.underlying == other.underlying
+
   def on_element(self, element, window, context):  # get window from context?
     self.underlying.on_element(element, window, context)
 
@@ -360,6 +426,16 @@ class Repeatedly(TriggerFn):
   def reset(self, window, context):
     self.underlying.reset(window, context)
 
+  @staticmethod
+  def from_runner_api(proto, context):
+    return Repeatedly(
+        TriggerFn.from_runner_api(proto.repeat.subtrigger, context))
+
+  def to_runner_api(self, context):
+    return beam_runner_api_pb2.Trigger(
+        repeat=beam_runner_api_pb2.Trigger.Repeat(
+            subtrigger=self.underlying.to_runner_api(context)))
+
 
 class ParallelTriggerFn(TriggerFn):
 
@@ -372,6 +448,9 @@ class ParallelTriggerFn(TriggerFn):
     return '%s(%s)' % (self.__class__.__name__,
                        ', '.join(str(t) for t in self.triggers))
 
+  def __eq__(self, other):
+    return type(self) == type(other) and self.triggers == other.triggers
+
   @abstractmethod
   def combine_op(self, trigger_results):
     pass
@@ -406,6 +485,31 @@ class ParallelTriggerFn(TriggerFn):
   def _sub_context(context, index):
     return NestedContext(context, '%d/' % index)
 
+  @staticmethod
+  def from_runner_api(proto, context):
+    subtriggers = [
+        TriggerFn.from_runner_api(subtrigger, context)
+        for subtrigger
+        in proto.after_all.subtriggers or proto.after_any.subtriggers]
+    if proto.after_all.subtriggers:
+      return AfterAll(*subtriggers)
+    else:
+      return AfterFirst(*subtriggers)
+
+  def to_runner_api(self, context):
+    subtriggers = [
+        subtrigger.to_runner_api(context) for subtrigger in self.triggers]
+    if self.combine_op == all:
+      return beam_runner_api_pb2.Trigger(
+          after_all=beam_runner_api_pb2.Trigger.AfterAll(
+              subtriggers=subtriggers))
+    elif self.combine_op == any:
+      return beam_runner_api_pb2.Trigger(
+          after_any=beam_runner_api_pb2.Trigger.AfterAny(
+              subtriggers=subtriggers))
+    else:
+      raise NotImplementedError(self)
+
 
 class AfterFirst(ParallelTriggerFn):
   """Fires when any subtrigger fires.
@@ -435,6 +539,9 @@ class AfterEach(TriggerFn):
     return '%s(%s)' % (self.__class__.__name__,
                        ', '.join(str(t) for t in self.triggers))
 
+  def __eq__(self, other):
+    return type(self) == type(other) and self.triggers == other.triggers
+
   def on_element(self, element, window, context):
     ix = context.get_state(self.INDEX_TAG)
     if ix < len(self.triggers):
@@ -474,12 +581,40 @@ class AfterEach(TriggerFn):
   def _sub_context(context, index):
     return NestedContext(context, '%d/' % index)
 
+  @staticmethod
+  def from_runner_api(proto, context):
+    return AfterEach(*[
+        TriggerFn.from_runner_api(subtrigger, context)
+        for subtrigger in proto.after_each.subtriggers])
+
+  def to_runner_api(self, context):
+    return beam_runner_api_pb2.Trigger(
+        after_each=beam_runner_api_pb2.Trigger.AfterEach(
+            subtriggers=[
+                subtrigger.to_runner_api(context)
+                for subtrigger in self.triggers]))
+
 
 class OrFinally(AfterFirst):
 
   def __init__(self, body_trigger, exit_trigger):
     super(OrFinally, self).__init__(body_trigger, exit_trigger)
 
+  @staticmethod
+  def from_runner_api(proto, context):
+    return OrFinally(
+        TriggerFn.from_runner_api(proto.or_finally.main, context),
+        # getattr is used as finally is a keyword in Python
+        TriggerFn.from_runner_api(getattr(proto.or_finally, 'finally'),
+                                  context))
+
+  def to_runner_api(self, context):
+    return beam_runner_api_pb2.Trigger(
+        or_finally=beam_runner_api_pb2.Trigger.OrFinally(
+            main=self.triggers[0].to_runner_api(context),
+            # dict keyword argument is used as finally is a keyword in Python
+            **{'finally': self.triggers[1].to_runner_api(context)}))
+
 
 class TriggerContext(object):
 

http://git-wip-us.apache.org/repos/asf/beam/blob/5b86e1fc/sdks/python/apache_beam/transforms/trigger_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/trigger_test.py b/sdks/python/apache_beam/transforms/trigger_test.py
index 72bab2e..cc9e0f5 100644
--- a/sdks/python/apache_beam/transforms/trigger_test.py
+++ b/sdks/python/apache_beam/transforms/trigger_test.py
@@ -38,6 +38,7 @@ from apache_beam.transforms.trigger import DefaultTrigger
 from apache_beam.transforms.trigger import GeneralTriggerDriver
 from apache_beam.transforms.trigger import InMemoryUnmergedState
 from apache_beam.transforms.trigger import Repeatedly
+from apache_beam.transforms.trigger import TriggerFn
 from apache_beam.transforms.util import assert_that, equal_to
 from apache_beam.transforms.window import FixedWindows
 from apache_beam.transforms.window import IntervalWindow
@@ -380,6 +381,38 @@ class TriggerTest(unittest.TestCase):
                        range(10))
 
 
+class RunnerApiTest(unittest.TestCase):
+
+  def test_trigger_encoding(self):
+    for trigger_fn in (
+        DefaultTrigger(),
+        AfterAll(AfterCount(1), AfterCount(10)),
+        AfterFirst(AfterCount(10), AfterCount(100)),
+        AfterWatermark(early=AfterCount(1000)),
+        AfterWatermark(early=AfterCount(1000), late=AfterCount(1)),
+        Repeatedly(AfterCount(100)),
+        trigger.OrFinally(AfterCount(3), AfterCount(10))):
+      context = beam.pipeline.PipelineContext()
+      self.assertEqual(
+          trigger_fn,
+          TriggerFn.from_runner_api(trigger_fn.to_runner_api(context), context))
+
+  def test_windowing_strategy_encoding(self):
+    for trigger_fn in (
+        DefaultTrigger(),
+        AfterAll(AfterCount(1), AfterCount(10)),
+        AfterFirst(AfterCount(10), AfterCount(100)),
+        AfterEach(AfterCount(100), AfterCount(1000)),
+        AfterWatermark(early=AfterCount(1000)),
+        AfterWatermark(early=AfterCount(1000), late=AfterCount(1)),
+        Repeatedly(AfterCount(100)),
+        trigger.OrFinally(AfterCount(3), AfterCount(10))):
+      context = beam.pipeline.PipelineContext()
+      self.assertEqual(
+          trigger_fn,
+          TriggerFn.from_runner_api(trigger_fn.to_runner_api(context), context))
+
+
 class TriggerPipelineTest(unittest.TestCase):
 
   def test_after_count(self):

http://git-wip-us.apache.org/repos/asf/beam/blob/5b86e1fc/sdks/python/apache_beam/transforms/window.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/window.py b/sdks/python/apache_beam/transforms/window.py
index a562bcf..c763a96 100644
--- a/sdks/python/apache_beam/transforms/window.py
+++ b/sdks/python/apache_beam/transforms/window.py
@@ -70,9 +70,9 @@ from apache_beam.utils import urns
 class OutputTimeFn(object):
   """Determines how output timestamps of grouping operations are assigned."""
 
-  OUTPUT_AT_EOW = 'OUTPUT_AT_EOW'
-  OUTPUT_AT_EARLIEST = 'OUTPUT_AT_EARLIEST'
-  OUTPUT_AT_LATEST = 'OUTPUT_AT_LATEST'
+  OUTPUT_AT_EOW = beam_runner_api_pb2.END_OF_WINDOW
+  OUTPUT_AT_EARLIEST = beam_runner_api_pb2.EARLIEST_IN_PANE
+  OUTPUT_AT_LATEST = beam_runner_api_pb2.LATEST_IN_PANE
   OUTPUT_AT_EARLIEST_TRANSFORMED = 'OUTPUT_AT_EARLIEST_TRANSFORMED'
 
   @staticmethod
@@ -116,6 +116,10 @@ class WindowFn(object):
     """Returns a window that is the result of merging a set of windows."""
     raise NotImplementedError
 
+  def is_merging(self):
+    """Returns whether this WindowFn merges windows."""
+    return True
+
   def get_window_coder(self):
     return coders.WindowCoder()
 
@@ -267,7 +271,16 @@ class GlobalWindow(BoundedWindow):
     return self is other or type(self) is type(other)
 
 
-class GlobalWindows(WindowFn):
+class NonMergingWindowFn(WindowFn):
+
+  def is_merging(self):
+    return False
+
+  def merge(self, merge_context):
+    pass  # No merging.
+
+
+class GlobalWindows(NonMergingWindowFn):
   """A windowing function that assigns everything to one global window."""
 
   @classmethod
@@ -277,9 +290,6 @@ class GlobalWindows(WindowFn):
   def assign(self, assign_context):
     return [GlobalWindow()]
 
-  def merge(self, merge_context):
-    pass  # No merging.
-
   def get_window_coder(self):
     return coders.GlobalWindowCoder()
 
@@ -304,7 +314,7 @@ WindowFn.register_urn(
     urns.GLOBAL_WINDOWS_FN, None, GlobalWindows.from_runner_api_parameter)
 
 
-class FixedWindows(WindowFn):
+class FixedWindows(NonMergingWindowFn):
   """A windowing function that assigns each element to one time interval.
 
   The attributes size and offset determine in what time interval a timestamp
@@ -329,9 +339,6 @@ class FixedWindows(WindowFn):
     start = timestamp - (timestamp - self.offset) % self.size
     return [IntervalWindow(start, start + self.size)]
 
-  def merge(self, merge_context):
-    pass  # No merging.
-
   def __eq__(self, other):
     if type(self) == type(other) == FixedWindows:
       return self.size == other.size and self.offset == other.offset
@@ -356,7 +363,7 @@ WindowFn.register_urn(
     FixedWindows.from_runner_api_parameter)
 
 
-class SlidingWindows(WindowFn):
+class SlidingWindows(NonMergingWindowFn):
   """A windowing function that assigns each element to a set of sliding windows.
 
   The attributes size and offset determine in what time interval a timestamp
@@ -384,9 +391,6 @@ class SlidingWindows(WindowFn):
     return [IntervalWindow(Timestamp.of(s), Timestamp.of(s) + self.size)
             for s in range(start, start - self.size, -self.period)]
 
-  def merge(self, merge_context):
-    pass  # No merging.
-
   def __eq__(self, other):
     if type(self) == type(other) == SlidingWindows:
       return (self.size == other.size

http://git-wip-us.apache.org/repos/asf/beam/blob/5b86e1fc/sdks/python/apache_beam/transforms/window_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/window_test.py b/sdks/python/apache_beam/transforms/window_test.py
index 821b143..c79739a 100644
--- a/sdks/python/apache_beam/transforms/window_test.py
+++ b/sdks/python/apache_beam/transforms/window_test.py
@@ -28,13 +28,17 @@ from apache_beam.transforms import Create
 from apache_beam.transforms import GroupByKey
 from apache_beam.transforms import Map
 from apache_beam.transforms import WindowInto
+from apache_beam.transforms.core import Windowing
 from apache_beam.transforms.timeutil import MAX_TIMESTAMP
 from apache_beam.transforms.timeutil import MIN_TIMESTAMP
+from apache_beam.transforms.trigger import AccumulationMode
+from apache_beam.transforms.trigger import AfterCount
 from apache_beam.transforms.util import assert_that, equal_to
 from apache_beam.transforms.window import FixedWindows
 from apache_beam.transforms.window import GlobalWindow
 from apache_beam.transforms.window import GlobalWindows
 from apache_beam.transforms.window import IntervalWindow
+from apache_beam.transforms.window import OutputTimeFn
 from apache_beam.transforms.window import Sessions
 from apache_beam.transforms.window import SlidingWindows
 from apache_beam.transforms.window import TimestampedValue
@@ -226,7 +230,10 @@ class WindowTest(unittest.TestCase):
                 label='assert:mean')
     p.run()
 
-  def test_runner_api(self):
+
+class RunnerApiTest(unittest.TestCase):
+
+  def test_windowfn_encoding(self):
     for window_fn in (GlobalWindows(),
                       FixedWindows(37),
                       SlidingWindows(2, 389),
@@ -236,5 +243,19 @@ class WindowTest(unittest.TestCase):
           window_fn,
           WindowFn.from_runner_api(window_fn.to_runner_api(context), context))
 
+  def test_windowing_encoding(self):
+    for windowing in (
+        Windowing(GlobalWindows()),
+        Windowing(FixedWindows(1, 3), AfterCount(6),
+                  accumulation_mode=AccumulationMode.ACCUMULATING),
+        Windowing(SlidingWindows(10, 15, 21), AfterCount(28),
+                  output_time_fn=OutputTimeFn.OUTPUT_AT_LATEST,
+                  accumulation_mode=AccumulationMode.DISCARDING)):
+      context = pipeline.PipelineContext()
+      self.assertEqual(
+          windowing,
+          Windowing.from_runner_api(windowing.to_runner_api(context), context))
+
+
 if __name__ == '__main__':
   unittest.main()


[18/50] [abbrv] beam git commit: This closes #1868

Posted by ke...@apache.org.
This closes #1868


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c12d432d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c12d432d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c12d432d

Branch: refs/heads/gearpump-runner
Commit: c12d432d84437175e9ad54f9c31e8c0669f6ecdc
Parents: 2c2424c 94bef14
Author: Amit Sela <am...@gmail.com>
Authored: Fri Mar 10 15:14:57 2017 +0200
Committer: Amit Sela <am...@gmail.com>
Committed: Fri Mar 10 15:14:57 2017 +0200

----------------------------------------------------------------------
 runners/spark/pom.xml                           |   5 +
 .../spark/SparkNativePipelineVisitor.java       | 202 +++++++++++++++++++
 .../apache/beam/runners/spark/SparkRunner.java  |  16 +-
 .../beam/runners/spark/SparkRunnerDebugger.java | 121 +++++++++++
 .../spark/translation/TransformEvaluator.java   |   1 +
 .../spark/translation/TransformTranslator.java  | 105 ++++++++++
 .../streaming/StreamingTransformTranslator.java |  53 ++++-
 .../runners/spark/SparkRunnerDebuggerTest.java  | 180 +++++++++++++++++
 8 files changed, 673 insertions(+), 10 deletions(-)
----------------------------------------------------------------------



[26/50] [abbrv] beam git commit: This closes #2210

Posted by ke...@apache.org.
This closes #2210


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/54390a33
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/54390a33
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/54390a33

Branch: refs/heads/gearpump-runner
Commit: 54390a3338b2216ca82cb9c8e8acd81711068691
Parents: 818fc94 7daf9ab
Author: Ahmet Altay <al...@google.com>
Authored: Fri Mar 10 11:56:40 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 11:56:40 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/transforms/display.py      |  1 +
 sdks/python/apache_beam/transforms/display_test.py | 11 +++++++++++
 2 files changed, 12 insertions(+)
----------------------------------------------------------------------



[41/50] [abbrv] beam git commit: This closes #2225

Posted by ke...@apache.org.
This closes #2225


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7c78480c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7c78480c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7c78480c

Branch: refs/heads/gearpump-runner
Commit: 7c78480cfe82c9f3bae2da4ed72eda11e036e99a
Parents: f29bf89 466e83f
Author: Ahmet Altay <al...@google.com>
Authored: Fri Mar 10 16:54:10 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 16:54:10 2017 -0800

----------------------------------------------------------------------
 sdks/python/pom.xml | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------



[14/50] [abbrv] beam git commit: Closes #2190

Posted by ke...@apache.org.
Closes #2190


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2c2424cb
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2c2424cb
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2c2424cb

Branch: refs/heads/gearpump-runner
Commit: 2c2424cb44bb2976ea9099230106a639b5ee3993
Parents: f13a84d deff128
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Thu Mar 9 20:29:03 2017 -0800
Committer: Robert Bradshaw <ro...@gmail.com>
Committed: Thu Mar 9 20:29:03 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/coders/coders.py        |  113 +
 sdks/python/apache_beam/runners/api/__init__.py |   16 +
 .../runners/api/beam_runner_api_pb2.py          | 2772 ++++++++++++++++++
 .../apache_beam/runners/pipeline_context.py     |   88 +
 .../runners/pipeline_context_test.py            |   49 +
 sdks/python/apache_beam/transforms/core.py      |   39 +
 sdks/python/apache_beam/transforms/trigger.py   |  143 +-
 .../apache_beam/transforms/trigger_test.py      |   19 +
 sdks/python/apache_beam/transforms/window.py    |  147 +-
 .../apache_beam/transforms/window_test.py       |   32 +
 sdks/python/apache_beam/utils/proto_utils.py    |   54 +
 sdks/python/apache_beam/utils/urns.py           |   24 +
 sdks/python/run_pylint.sh                       |    3 +-
 13 files changed, 3481 insertions(+), 18 deletions(-)
----------------------------------------------------------------------



[46/50] [abbrv] beam git commit: This closes #2226

Posted by ke...@apache.org.
This closes #2226


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/781e4172
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/781e4172
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/781e4172

Branch: refs/heads/gearpump-runner
Commit: 781e4172c3f36863d1c9145d4c18cd0910f2436a
Parents: b6ca062 839c906
Author: Amit Sela <am...@gmail.com>
Authored: Sun Mar 12 11:59:06 2017 +0200
Committer: Amit Sela <am...@gmail.com>
Committed: Sun Mar 12 11:59:06 2017 +0200

----------------------------------------------------------------------
 .../java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
----------------------------------------------------------------------



[49/50] [abbrv] beam git commit: [BEAM-79] Fix gearpump-runner merge conflicts and test failure

Posted by ke...@apache.org.
[BEAM-79] Fix gearpump-runner merge conflicts and test failure


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3eab6a64
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3eab6a64
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3eab6a64

Branch: refs/heads/gearpump-runner
Commit: 3eab6a647e4761725680c8bc40589dfa5569d75b
Parents: 3f91798
Author: manuzhang <ow...@gmail.com>
Authored: Tue Mar 14 08:09:46 2017 +0800
Committer: manuzhang <ow...@gmail.com>
Committed: Wed Mar 15 15:21:29 2017 +0800

----------------------------------------------------------------------
 runners/gearpump/pom.xml                        |  51 ++-
 .../gearpump/GearpumpPipelineResult.java        |  21 +-
 .../gearpump/GearpumpPipelineTranslator.java    | 388 ++++++++++++++++++-
 .../beam/runners/gearpump/GearpumpRunner.java   | 376 +-----------------
 .../runners/gearpump/TestGearpumpRunner.java    |  38 +-
 .../gearpump/examples/StreamingWordCount.java   |  98 -----
 .../gearpump/examples/UnboundedTextSource.java  | 139 -------
 .../runners/gearpump/examples/package-info.java |  22 --
 ...CreateGearpumpPCollectionViewTranslator.java |  14 +-
 .../CreatePCollectionViewTranslator.java        |   6 +-
 .../translators/CreateValuesTranslator.java     |  51 ---
 .../FlattenPCollectionTranslator.java           |  84 ----
 .../FlattenPCollectionsTranslator.java          |  83 ++++
 .../translators/GroupByKeyTranslator.java       |   4 +-
 .../translators/ParDoBoundMultiTranslator.java  |  32 +-
 .../translators/ParDoBoundTranslator.java       |   7 +-
 .../translators/ReadBoundedTranslator.java      |   4 +-
 .../translators/ReadUnboundedTranslator.java    |   4 +-
 .../translators/TransformTranslator.java        |   2 +-
 .../translators/TranslationContext.java         |  29 +-
 .../translators/WindowAssignTranslator.java     | 100 +++++
 .../translators/WindowBoundTranslator.java      | 100 -----
 .../translators/functions/DoFnFunction.java     |  12 +-
 .../translators/io/UnboundedSourceWrapper.java  |   1 +
 .../translators/utils/DoFnRunnerFactory.java    |   4 +-
 .../utils/NoOpAggregatorFactory.java            |   2 +-
 .../translators/utils/NoOpStepContext.java      |   6 +-
 .../translators/utils/TranslatorUtils.java      |   2 -
 .../translators/utils/TranslatorUtilsTest.java  |   1 -
 29 files changed, 703 insertions(+), 978 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/pom.xml
----------------------------------------------------------------------
diff --git a/runners/gearpump/pom.xml b/runners/gearpump/pom.xml
index 3efb1f6..9a6a432 100644
--- a/runners/gearpump/pom.xml
+++ b/runners/gearpump/pom.xml
@@ -23,7 +23,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>0.5.0-incubating-SNAPSHOT</version>
+    <version>0.7.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -65,10 +65,12 @@
                 <configuration>
                   <groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
                   <excludedGroups>
+                    org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders,
                     org.apache.beam.sdk.testing.UsesStatefulParDo,
                     org.apache.beam.sdk.testing.UsesTimersInParDo,
                     org.apache.beam.sdk.testing.UsesSplittableParDo,
-                    org.apache.beam.sdk.testing.UsesMetrics
+                    org.apache.beam.sdk.testing.UsesAttemptedMetrics,
+                    org.apache.beam.sdk.testing.UsesCommittedMetrics
                   </excludedGroups>
                   <parallel>none</parallel>
                   <failIfNoTests>true</failIfNoTests>
@@ -136,6 +138,16 @@
       <artifactId>beam-runners-core-java</artifactId>
     </dependency>
     <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-runners-core-construction-java</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-jdk14</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
     </dependency>
@@ -182,6 +194,11 @@
       </exclusions>
     </dependency>
     <dependency>
+      <groupId>com.fasterxml.jackson.dataformat</groupId>
+      <artifactId>jackson-dataformat-yaml</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-all</artifactId>
       <scope>test</scope>
@@ -210,8 +227,36 @@
 
       <!-- Java compiler -->
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+          <testSource>1.8</testSource>
+          <testTarget>1.8</testTarget>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>enforce</id>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <enforceBytecodeVersion>
+                  <maxJdkVersion>1.8</maxJdkVersion>
+                </enforceBytecodeVersion>
+                <requireJavaVersion>
+                  <version>[1.8,)</version>
+                </requireJavaVersion>
+              </rules>
+            </configuration>
+          </execution>
+        </executions>
       </plugin>
 
       <!-- uber jar -->

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineResult.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineResult.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineResult.java
index 8f90898..d833cd6 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineResult.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineResult.java
@@ -43,6 +43,7 @@ public class GearpumpPipelineResult implements PipelineResult {
 
   private final ClientContext client;
   private final RunningApplication app;
+  private boolean finished = false;
 
   public GearpumpPipelineResult(ClientContext client, RunningApplication app) {
     this.client = client;
@@ -51,13 +52,22 @@ public class GearpumpPipelineResult implements PipelineResult {
 
   @Override
   public State getState() {
-    return getGearpumpState();
+    if (!finished) {
+      return getGearpumpState();
+    } else {
+      return State.DONE;
+    }
   }
 
   @Override
   public State cancel() throws IOException {
-    app.shutDown();
-    return State.CANCELLED;
+    if (!finished) {
+      app.shutDown();
+      finished = true;
+      return State.CANCELLED;
+    } else {
+      return State.DONE;
+    }
   }
 
   @Override
@@ -67,7 +77,10 @@ public class GearpumpPipelineResult implements PipelineResult {
 
   @Override
   public State waitUntilFinish() {
-    app.waitUntilFinish();
+    if (!finished) {
+      app.waitUntilFinish();
+      finished = true;
+    }
     return State.DONE;
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineTranslator.java
index 4cc060c..1a36343 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpPipelineTranslator.java
@@ -18,13 +18,19 @@
 
 package org.apache.beam.runners.gearpump;
 
+import com.google.common.collect.ImmutableMap;
+
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
+import org.apache.beam.runners.core.construction.PTransformMatchers;
+import org.apache.beam.runners.core.construction.SingleInputOutputOverrideFactory;
 import org.apache.beam.runners.gearpump.translators.CreateGearpumpPCollectionViewTranslator;
 import org.apache.beam.runners.gearpump.translators.CreatePCollectionViewTranslator;
-import org.apache.beam.runners.gearpump.translators.CreateValuesTranslator;
-import org.apache.beam.runners.gearpump.translators.FlattenPCollectionTranslator;
+import org.apache.beam.runners.gearpump.translators.FlattenPCollectionsTranslator;
 import org.apache.beam.runners.gearpump.translators.GroupByKeyTranslator;
 import org.apache.beam.runners.gearpump.translators.ParDoBoundMultiTranslator;
 import org.apache.beam.runners.gearpump.translators.ParDoBoundTranslator;
@@ -32,17 +38,29 @@ import org.apache.beam.runners.gearpump.translators.ReadBoundedTranslator;
 import org.apache.beam.runners.gearpump.translators.ReadUnboundedTranslator;
 import org.apache.beam.runners.gearpump.translators.TransformTranslator;
 import org.apache.beam.runners.gearpump.translators.TranslationContext;
-import org.apache.beam.runners.gearpump.translators.WindowBoundTranslator;
+import org.apache.beam.runners.gearpump.translators.WindowAssignTranslator;
 import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
 import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.runners.PTransformMatcher;
+import org.apache.beam.sdk.runners.PTransformOverrideFactory;
 import org.apache.beam.sdk.runners.TransformHierarchy;
-import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.Flatten;
 import org.apache.beam.sdk.transforms.GroupByKey;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.View;
 import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.util.InstanceBuilder;
+import org.apache.beam.sdk.util.PCollectionViews;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.PValue;
 
 import org.apache.gearpump.util.Graph;
@@ -74,14 +92,13 @@ public class GearpumpPipelineTranslator extends Pipeline.PipelineVisitor.Default
     registerTransformTranslator(Read.Unbounded.class, new ReadUnboundedTranslator());
     registerTransformTranslator(Read.Bounded.class, new ReadBoundedTranslator());
     registerTransformTranslator(GroupByKey.class, new GroupByKeyTranslator());
-    registerTransformTranslator(Flatten.FlattenPCollectionList.class,
-        new FlattenPCollectionTranslator());
+    registerTransformTranslator(Flatten.PCollections.class,
+        new FlattenPCollectionsTranslator());
     registerTransformTranslator(ParDo.BoundMulti.class, new ParDoBoundMultiTranslator());
-    registerTransformTranslator(Window.Bound.class, new WindowBoundTranslator());
-    registerTransformTranslator(Create.Values.class, new CreateValuesTranslator());
+    registerTransformTranslator(Window.Assign.class, new WindowAssignTranslator());
     registerTransformTranslator(View.CreatePCollectionView.class,
         new CreatePCollectionViewTranslator());
-    registerTransformTranslator(GearpumpRunner.CreateGearpumpPCollectionView.class,
+    registerTransformTranslator(CreateGearpumpPCollectionView.class,
         new CreateGearpumpPCollectionViewTranslator<>());
   }
 
@@ -90,6 +107,27 @@ public class GearpumpPipelineTranslator extends Pipeline.PipelineVisitor.Default
   }
 
   public void translate(Pipeline pipeline) {
+    Map<PTransformMatcher, PTransformOverrideFactory> overrides =
+        ImmutableMap.<PTransformMatcher, PTransformOverrideFactory>builder()
+            .put(PTransformMatchers.classEqualTo(Combine.GloballyAsSingletonView.class),
+                new ReflectiveOneToOneOverrideFactory(
+                    StreamingCombineGloballyAsSingletonView.class))
+            .put(PTransformMatchers.classEqualTo(View.AsMap.class),
+                new ReflectiveOneToOneOverrideFactory(StreamingViewAsMap.class))
+            .put(PTransformMatchers.classEqualTo(View.AsMultimap.class),
+                new ReflectiveOneToOneOverrideFactory(StreamingViewAsMultimap.class))
+            .put(PTransformMatchers.classEqualTo(View.AsSingleton.class),
+                new ReflectiveOneToOneOverrideFactory(StreamingViewAsSingleton.class))
+            .put(PTransformMatchers.classEqualTo(View.AsList.class),
+                new ReflectiveOneToOneOverrideFactory(StreamingViewAsList.class))
+            .put(PTransformMatchers.classEqualTo(View.AsIterable.class),
+                new ReflectiveOneToOneOverrideFactory(StreamingViewAsIterable.class))
+            .build();
+
+    for (Map.Entry<PTransformMatcher, PTransformOverrideFactory> override :
+        overrides.entrySet()) {
+      pipeline.replace(override.getKey(), override.getValue());
+    }
     pipeline.traverseTopologically(this);
   }
 
@@ -145,5 +183,337 @@ public class GearpumpPipelineTranslator extends Pipeline.PipelineVisitor.Default
     return transformTranslators.get(transformClass);
   }
 
+  // The following codes are forked from DataflowRunner for View translator
+  private static class ReflectiveOneToOneOverrideFactory<
+      InputT extends PValue,
+      OutputT extends PValue,
+      TransformT extends PTransform<InputT, OutputT>>
+      extends SingleInputOutputOverrideFactory<InputT, OutputT, TransformT> {
+    private final Class<PTransform<InputT, OutputT>> replacement;
+
+    private ReflectiveOneToOneOverrideFactory(
+        Class<PTransform<InputT, OutputT>> replacement) {
+      this.replacement = replacement;
+    }
+
+    @Override
+    public PTransform<InputT, OutputT> getReplacementTransform(TransformT transform) {
+      return InstanceBuilder.ofType(replacement)
+          .withArg((Class<PTransform<InputT, OutputT>>) transform.getClass(), transform)
+          .build();
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link org.apache.beam.sdk.transforms.View.AsMap View.AsMap}
+   * for the Gearpump runner.
+   */
+  private static class StreamingViewAsMap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
+
+    private static final long serialVersionUID = 4791080760092950304L;
+
+    public StreamingViewAsMap(View.AsMap<K, V> transform) {}
+
+    @Override
+    public PCollectionView<Map<K, V>> expand(PCollection<KV<K, V>> input) {
+      PCollectionView<Map<K, V>> view =
+          PCollectionViews.mapView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        inputCoder.getKeyCoder().verifyDeterministic();
+      } catch (Coder.NonDeterministicException e) {
+        // throw new RuntimeException(e);
+      }
+
+      return input
+          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(CreateGearpumpPCollectionView.<KV<K, V>, Map<K, V>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsMap";
+    }
+  }
+
+  /**
+   * Specialized expansion for {@link
+   * org.apache.beam.sdk.transforms.View.AsMultimap View.AsMultimap} for the
+   * Gearpump runner.
+   */
+  private static class StreamingViewAsMultimap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
+
+    private static final long serialVersionUID = 5854899081751333352L;
+
+    public StreamingViewAsMultimap(View.AsMultimap<K, V> transform) {}
+
+    @Override
+    public PCollectionView<Map<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) {
+      PCollectionView<Map<K, Iterable<V>>> view =
+          PCollectionViews.multimapView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        inputCoder.getKeyCoder().verifyDeterministic();
+      } catch (Coder.NonDeterministicException e) {
+        // throw new RuntimeException(e);
+      }
+
+      return input
+          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(CreateGearpumpPCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsMultimap";
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link org.apache.beam.sdk.transforms.View.AsIterable View.AsIterable} for the
+   * Gearpump runner.
+   */
+  private static class StreamingViewAsIterable<T>
+      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
+
+    private static final long serialVersionUID = -3399860618995613421L;
+
+    public StreamingViewAsIterable(View.AsIterable<T> transform) {}
+
+    @Override
+    public PCollectionView<Iterable<T>> expand(PCollection<T> input) {
+      PCollectionView<Iterable<T>> view =
+          PCollectionViews.iterableView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
+          .apply(CreateGearpumpPCollectionView.<T, Iterable<T>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsIterable";
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link org.apache.beam.sdk.transforms.View.AsList View.AsList} for the
+   * Gearpump runner.
+   */
+  private static class StreamingViewAsList<T>
+      extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
+
+    private static final long serialVersionUID = -5018631473886330629L;
+
+    public StreamingViewAsList(View.AsList<T> transform) {}
+
+    @Override
+    public PCollectionView<List<T>> expand(PCollection<T> input) {
+      PCollectionView<List<T>> view =
+          PCollectionViews.listView(
+              input.getPipeline(),
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
+          .apply(CreateGearpumpPCollectionView.<T, List<T>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsList";
+    }
+  }
+  private static class StreamingCombineGloballyAsSingletonView<InputT, OutputT>
+      extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
+
+    private static final long serialVersionUID = 9064900748869035738L;
+    private final Combine.GloballyAsSingletonView<InputT, OutputT> transform;
+
+    public StreamingCombineGloballyAsSingletonView(
+        Combine.GloballyAsSingletonView<InputT, OutputT> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PCollectionView<OutputT> expand(PCollection<InputT> input) {
+      PCollection<OutputT> combined =
+          input.apply(Combine.globally(transform.getCombineFn())
+              .withoutDefaults()
+              .withFanout(transform.getFanout()));
+
+      PCollectionView<OutputT> view = PCollectionViews.singletonView(
+          combined.getPipeline(),
+          combined.getWindowingStrategy(),
+          transform.getInsertDefault(),
+          transform.getInsertDefault()
+              ? transform.getCombineFn().defaultValue() : null,
+          combined.getCoder());
+      return combined
+          .apply(ParDo.of(new WrapAsList<OutputT>()))
+          .apply(CreateGearpumpPCollectionView.<OutputT, OutputT>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingCombineGloballyAsSingletonView";
+    }
+  }
+
+  private static class StreamingViewAsSingleton<T>
+      extends PTransform<PCollection<T>, PCollectionView<T>> {
+
+    private static final long serialVersionUID = 5870455965625071546L;
+    private final View.AsSingleton<T> transform;
+
+    public StreamingViewAsSingleton(View.AsSingleton<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PCollectionView<T> expand(PCollection<T> input) {
+      Combine.Globally<T, T> combine = Combine.globally(
+          new SingletonCombine<>(transform.hasDefaultValue(), transform.defaultValue()));
+      if (!transform.hasDefaultValue()) {
+        combine = combine.withoutDefaults();
+      }
+      return input.apply(combine.asSingletonView());
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsSingleton";
+    }
+
+    private static class SingletonCombine<T> extends Combine.BinaryCombineFn<T> {
+      private boolean hasDefaultValue;
+      private T defaultValue;
+
+      SingletonCombine(boolean hasDefaultValue, T defaultValue) {
+        this.hasDefaultValue = hasDefaultValue;
+        this.defaultValue = defaultValue;
+      }
+
+      @Override
+      public T apply(T left, T right) {
+        throw new IllegalArgumentException("PCollection with more than one element "
+            + "accessed as a singleton view. Consider using Combine.globally().asSingleton() to "
+            + "combine the PCollection into a single value");
+      }
+
+      @Override
+      public T identity() {
+        if (hasDefaultValue) {
+          return defaultValue;
+        } else {
+          throw new IllegalArgumentException(
+              "Empty PCollection accessed as a singleton view. "
+                  + "Consider setting withDefault to provide a default value");
+        }
+      }
+    }
+  }
+
+  private static class WrapAsList<T> extends DoFn<T, List<T>> {
+    @ProcessElement
+    public void processElement(ProcessContext c) {
+      c.output(Collections.singletonList(c.element()));
+    }
+  }
 
+  /**
+   * Creates a primitive {@link PCollectionView}.
+   *
+   * <p>For internal use only by runner implementors.
+   *
+   * @param <ElemT> The type of the elements of the input PCollection
+   * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
+   */
+  public static class CreateGearpumpPCollectionView<ElemT, ViewT>
+      extends PTransform<PCollection<List<ElemT>>, PCollectionView<ViewT>> {
+    private static final long serialVersionUID = -2637073020800540542L;
+    private PCollectionView<ViewT> view;
+
+    private CreateGearpumpPCollectionView(PCollectionView<ViewT> view) {
+      this.view = view;
+    }
+
+    public static <ElemT, ViewT> CreateGearpumpPCollectionView<ElemT, ViewT> of(
+        PCollectionView<ViewT> view) {
+      return new CreateGearpumpPCollectionView<>(view);
+    }
+
+    public PCollectionView<ViewT> getView() {
+      return view;
+    }
+
+    @Override
+    public PCollectionView<ViewT> expand(PCollection<List<ElemT>> input) {
+      return view;
+    }
+  }
+
+  /**
+   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
+   *
+   * <p>For internal use by {@link StreamingViewAsMap}, {@link StreamingViewAsMultimap},
+   * {@link StreamingViewAsList}, {@link StreamingViewAsIterable}.
+   * They require the input {@link PCollection} fits in memory.
+   * For a large {@link PCollection} this is expected to crash!
+   *
+   * @param <T> the type of elements to concatenate.
+   */
+  private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
+    @Override
+    public List<T> createAccumulator() {
+      return new ArrayList<>();
+    }
+
+    @Override
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
+    }
+
+    @Override
+    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+      List<T> result = createAccumulator();
+      for (List<T> accumulator : accumulators) {
+        result.addAll(accumulator);
+      }
+      return result;
+    }
+
+    @Override
+    public List<T> extractOutput(List<T> accumulator) {
+      return accumulator;
+    }
+
+    @Override
+    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+
+    @Override
+    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpRunner.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpRunner.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpRunner.java
index 72f2126..897467a 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpRunner.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/GearpumpRunner.java
@@ -17,40 +17,18 @@
  */
 package org.apache.beam.runners.gearpump;
 
-import com.google.common.collect.ImmutableMap;
 import com.typesafe.config.Config;
 import com.typesafe.config.ConfigValueFactory;
 
-import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.beam.runners.gearpump.translators.TranslationContext;
+
 import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderRegistry;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.ListCoder;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsValidator;
 import org.apache.beam.sdk.runners.PipelineRunner;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.util.InstanceBuilder;
-import org.apache.beam.sdk.util.PCollectionViews;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionList;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.PInput;
-import org.apache.beam.sdk.values.POutput;
 
 import org.apache.gearpump.cluster.ClusterConfig;
 import org.apache.gearpump.cluster.UserConfig;
@@ -72,21 +50,8 @@ public class GearpumpRunner extends PipelineRunner<GearpumpPipelineResult> {
   private static final String GEARPUMP_SERIALIZERS = "gearpump.serializers";
   private static final String DEFAULT_APPNAME = "beam_gearpump_app";
 
-  /** Custom transforms implementations. */
-  private final Map<Class<?>, Class<?>> overrides;
-
   public GearpumpRunner(GearpumpPipelineOptions options) {
     this.options = options;
-
-    ImmutableMap.Builder<Class<?>, Class<?>> builder = ImmutableMap.builder();
-    builder.put(Combine.GloballyAsSingletonView.class,
-        StreamingCombineGloballyAsSingletonView.class);
-    builder.put(View.AsMap.class, StreamingViewAsMap.class);
-    builder.put(View.AsMultimap.class, StreamingViewAsMultimap.class);
-    builder.put(View.AsSingleton.class, StreamingViewAsSingleton.class);
-    builder.put(View.AsList.class, StreamingViewAsList.class);
-    builder.put(View.AsIterable.class, StreamingViewAsIterable.class);
-    overrides = builder.build();
   }
 
   public static GearpumpRunner fromOptions(PipelineOptions options) {
@@ -95,31 +60,6 @@ public class GearpumpRunner extends PipelineRunner<GearpumpPipelineResult> {
     return new GearpumpRunner(pipelineOptions);
   }
 
-
-  public <OutputT extends POutput, InputT extends PInput> OutputT apply(
-      PTransform<InputT, OutputT> transform, InputT input) {
-    if (overrides.containsKey(transform.getClass())) {
-
-      Class<PTransform<InputT, OutputT>> transformClass =
-          (Class<PTransform<InputT, OutputT>>) transform.getClass();
-
-      Class<PTransform<InputT, OutputT>> customTransformClass =
-          (Class<PTransform<InputT, OutputT>>) overrides.get(transform.getClass());
-
-      PTransform<InputT, OutputT> customTransform =
-          InstanceBuilder.ofType(customTransformClass)
-              .withArg(transformClass, transform)
-              .build();
-
-      return Pipeline.applyTransform(input, customTransform);
-    } else if (Flatten.FlattenPCollectionList.class.equals(transform.getClass())
-            && ((PCollectionList<?>) input).size() == 0) {
-      return (OutputT) Pipeline.applyTransform(input.getPipeline().begin(), Create.of());
-    } else {
-      return super.apply(transform, input);
-    }
-  }
-
   @Override
   public GearpumpPipelineResult run(Pipeline pipeline) {
     String appName = options.getApplicationName();
@@ -170,318 +110,4 @@ public class GearpumpRunner extends PipelineRunner<GearpumpPipelineResult> {
 
 
 
-  // The following codes are forked from DataflowRunner for View translator
-  /**
-   * Specialized implementation for
-   * {@link org.apache.beam.sdk.transforms.View.AsMap View.AsMap}
-   * for the Gearpump runner.
-   */
-  private static class StreamingViewAsMap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
-
-    private static final long serialVersionUID = 4791080760092950304L;
-
-    public StreamingViewAsMap(View.AsMap<K, V> transform) {}
-
-    @Override
-    public PCollectionView<Map<K, V>> expand(PCollection<KV<K, V>> input) {
-      PCollectionView<Map<K, V>> view =
-          PCollectionViews.mapView(
-              input.getPipeline(),
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-      try {
-        inputCoder.getKeyCoder().verifyDeterministic();
-      } catch (Coder.NonDeterministicException e) {
-        // throw new RuntimeException(e);
-      }
-
-      return input
-          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
-          .apply(CreateGearpumpPCollectionView.<KV<K, V>, Map<K, V>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsMap";
-    }
-  }
-
-  /**
-   * Specialized expansion for {@link
-   * org.apache.beam.sdk.transforms.View.AsMultimap View.AsMultimap} for the
-   * Gearpump runner.
-   */
-  private static class StreamingViewAsMultimap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
-
-    private static final long serialVersionUID = 5854899081751333352L;
-
-    public StreamingViewAsMultimap(View.AsMultimap<K, V> transform) {}
-
-    @Override
-    public PCollectionView<Map<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) {
-      PCollectionView<Map<K, Iterable<V>>> view =
-          PCollectionViews.multimapView(
-              input.getPipeline(),
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-      try {
-        inputCoder.getKeyCoder().verifyDeterministic();
-      } catch (Coder.NonDeterministicException e) {
-        // throw new RuntimeException(e);
-      }
-
-      return input
-          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
-          .apply(CreateGearpumpPCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsMultimap";
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link org.apache.beam.sdk.transforms.View.AsIterable View.AsIterable} for the
-   * Gearpump runner.
-   */
-  private static class StreamingViewAsIterable<T>
-      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
-
-    private static final long serialVersionUID = -3399860618995613421L;
-
-    public StreamingViewAsIterable(View.AsIterable<T> transform) {}
-
-    @Override
-    public PCollectionView<Iterable<T>> expand(PCollection<T> input) {
-      PCollectionView<Iterable<T>> view =
-          PCollectionViews.iterableView(
-              input.getPipeline(),
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
-          .apply(CreateGearpumpPCollectionView.<T, Iterable<T>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsIterable";
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link org.apache.beam.sdk.transforms.View.AsList View.AsList} for the
-   * Gearpump runner.
-   */
-  private static class StreamingViewAsList<T>
-      extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
-
-    private static final long serialVersionUID = -5018631473886330629L;
-
-    public StreamingViewAsList(View.AsList<T> transform) {}
-
-    @Override
-    public PCollectionView<List<T>> expand(PCollection<T> input) {
-      PCollectionView<List<T>> view =
-          PCollectionViews.listView(
-              input.getPipeline(),
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
-          .apply(CreateGearpumpPCollectionView.<T, List<T>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsList";
-    }
-  }
-  private static class StreamingCombineGloballyAsSingletonView<InputT, OutputT>
-      extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
-
-    private static final long serialVersionUID = 9064900748869035738L;
-    private final Combine.GloballyAsSingletonView<InputT, OutputT> transform;
-
-    public StreamingCombineGloballyAsSingletonView(
-        Combine.GloballyAsSingletonView<InputT, OutputT> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PCollectionView<OutputT> expand(PCollection<InputT> input) {
-      PCollection<OutputT> combined =
-          input.apply(Combine.globally(transform.getCombineFn())
-              .withoutDefaults()
-              .withFanout(transform.getFanout()));
-
-      PCollectionView<OutputT> view = PCollectionViews.singletonView(
-          combined.getPipeline(),
-          combined.getWindowingStrategy(),
-          transform.getInsertDefault(),
-          transform.getInsertDefault()
-              ? transform.getCombineFn().defaultValue() : null,
-          combined.getCoder());
-      return combined
-          .apply(ParDo.of(new WrapAsList<OutputT>()))
-          .apply(CreateGearpumpPCollectionView.<OutputT, OutputT>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingCombineGloballyAsSingletonView";
-    }
-  }
-
-  private static class StreamingViewAsSingleton<T>
-      extends PTransform<PCollection<T>, PCollectionView<T>> {
-
-    private static final long serialVersionUID = 5870455965625071546L;
-    private final View.AsSingleton<T> transform;
-
-    public StreamingViewAsSingleton(View.AsSingleton<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PCollectionView<T> expand(PCollection<T> input) {
-      Combine.Globally<T, T> combine = Combine.globally(
-          new SingletonCombine<>(transform.hasDefaultValue(), transform.defaultValue()));
-      if (!transform.hasDefaultValue()) {
-        combine = combine.withoutDefaults();
-      }
-      return input.apply(combine.asSingletonView());
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsSingleton";
-    }
-
-    private static class SingletonCombine<T> extends Combine.BinaryCombineFn<T> {
-      private boolean hasDefaultValue;
-      private T defaultValue;
-
-      SingletonCombine(boolean hasDefaultValue, T defaultValue) {
-        this.hasDefaultValue = hasDefaultValue;
-        this.defaultValue = defaultValue;
-      }
-
-      @Override
-      public T apply(T left, T right) {
-        throw new IllegalArgumentException("PCollection with more than one element "
-            + "accessed as a singleton view. Consider using Combine.globally().asSingleton() to "
-            + "combine the PCollection into a single value");
-      }
-
-      @Override
-      public T identity() {
-        if (hasDefaultValue) {
-          return defaultValue;
-        } else {
-          throw new IllegalArgumentException(
-              "Empty PCollection accessed as a singleton view. "
-                  + "Consider setting withDefault to provide a default value");
-        }
-      }
-    }
-  }
-
-  private static class WrapAsList<T> extends DoFn<T, List<T>> {
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      c.output(Collections.singletonList(c.element()));
-    }
-  }
-
-  /**
-   * Creates a primitive {@link PCollectionView}.
-   *
-   * <p>For internal use only by runner implementors.
-   *
-   * @param <ElemT> The type of the elements of the input PCollection
-   * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
-   */
-  public static class CreateGearpumpPCollectionView<ElemT, ViewT>
-      extends PTransform<PCollection<List<ElemT>>, PCollectionView<ViewT>> {
-    private static final long serialVersionUID = -2637073020800540542L;
-    private PCollectionView<ViewT> view;
-
-    private CreateGearpumpPCollectionView(PCollectionView<ViewT> view) {
-      this.view = view;
-    }
-
-    public static <ElemT, ViewT> CreateGearpumpPCollectionView<ElemT, ViewT> of(
-        PCollectionView<ViewT> view) {
-      return new CreateGearpumpPCollectionView<>(view);
-    }
-
-    public PCollectionView<ViewT> getView() {
-      return view;
-    }
-
-    @Override
-    public PCollectionView<ViewT> expand(PCollection<List<ElemT>> input) {
-      return view;
-    }
-  }
-
-  /**
-   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
-   *
-   * <p>For internal use by {@link StreamingViewAsMap}, {@link StreamingViewAsMultimap},
-   * {@link StreamingViewAsList}, {@link StreamingViewAsIterable}.
-   * They require the input {@link PCollection} fits in memory.
-   * For a large {@link PCollection} this is expected to crash!
-   *
-   * @param <T> the type of elements to concatenate.
-   */
-  private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
-    @Override
-    public List<T> createAccumulator() {
-      return new ArrayList<>();
-    }
-
-    @Override
-    public List<T> addInput(List<T> accumulator, T input) {
-      accumulator.add(input);
-      return accumulator;
-    }
-
-    @Override
-    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
-      List<T> result = createAccumulator();
-      for (List<T> accumulator : accumulators) {
-        result.addAll(accumulator);
-      }
-      return result;
-    }
-
-    @Override
-    public List<T> extractOutput(List<T> accumulator) {
-      return accumulator;
-    }
-
-    @Override
-    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-
-    @Override
-    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/TestGearpumpRunner.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/TestGearpumpRunner.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/TestGearpumpRunner.java
index c96bcb1..ea7dd26 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/TestGearpumpRunner.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/TestGearpumpRunner.java
@@ -24,9 +24,6 @@ import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsValidator;
 import org.apache.beam.sdk.runners.PipelineRunner;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.values.PInput;
-import org.apache.beam.sdk.values.POutput;
 
 import org.apache.gearpump.cluster.ClusterConfig;
 import org.apache.gearpump.cluster.embedded.EmbeddedCluster;
@@ -58,36 +55,9 @@ public class TestGearpumpRunner extends PipelineRunner<GearpumpPipelineResult> {
 
   @Override
   public GearpumpPipelineResult run(Pipeline pipeline) {
-    try {
-      GearpumpPipelineResult result = delegate.run(pipeline);
-      result.waitUntilFinish();
-      cluster.stop();
-      return result;
-    } catch (Throwable e) {
-      // copied from TestFlinkRunner to pull out AssertionError
-      // which is wrapped in UserCodeException
-      Throwable cause = e;
-      Throwable oldCause;
-      do {
-        if (cause.getCause() == null) {
-          break;
-        }
-
-        oldCause = cause;
-        cause = cause.getCause();
-
-      } while (!oldCause.equals(cause));
-      if (cause instanceof AssertionError) {
-        throw (AssertionError) cause;
-      } else {
-        throw e;
-      }
-    }
-  }
-
-  @Override
-  public <OutputT extends POutput, InputT extends PInput>
-  OutputT apply(PTransform<InputT, OutputT> transform, InputT input) {
-    return delegate.apply(transform, input);
+    GearpumpPipelineResult result = delegate.run(pipeline);
+    result.waitUntilFinish();
+    cluster.stop();
+    return result;
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/StreamingWordCount.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/StreamingWordCount.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/StreamingWordCount.java
deleted file mode 100644
index b2d762a..0000000
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/StreamingWordCount.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.gearpump.examples;
-
-import org.apache.beam.runners.gearpump.GearpumpPipelineOptions;
-import org.apache.beam.runners.gearpump.GearpumpRunner;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-
-import org.apache.gearpump.cluster.client.ClientContext;
-import org.joda.time.Duration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/**
- * streaming word count example on Gearpump runner.
- */
-public class StreamingWordCount {
-
-  static class ExtractWordsFn extends DoFn<String, String> {
-
-    @ProcessElement
-    public void process(ProcessContext c) {
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  static class FormatAsStringFn extends DoFn<KV<String, Long>, String> {
-    private static final Logger LOG = LoggerFactory.getLogger(FormatAsStringFn.class);
-
-    @ProcessElement
-    public void process(ProcessContext c) {
-      String row = c.element().getKey()
-          + " - " + c.element().getValue()
-          + " @ " + c.timestamp().toString();
-      LOG.debug("output {}", row);
-      c.output(row);
-    }
-  }
-
-
-  public static void main(String[] args) {
-    GearpumpPipelineOptions options = PipelineOptionsFactory
-            .fromArgs(args).as(GearpumpPipelineOptions.class);
-    options.setRunner(GearpumpRunner.class);
-    options.setApplicationName("StreamingWordCount");
-    options.setParallelism(1);
-
-    Pipeline p = Pipeline.create(options);
-
-    PCollection<KV<String, Long>> wordCounts =
-        p.apply(Read.from(new UnboundedTextSource()))
-            .apply(ParDo.of(new ExtractWordsFn()))
-            .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10))))
-            .apply(Count.<String>perElement());
-
-    wordCounts.apply(ParDo.of(new FormatAsStringFn()));
-
-    p.run();
-
-    ClientContext clientContext = options.getClientContext();
-    clientContext.close();
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/UnboundedTextSource.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/UnboundedTextSource.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/UnboundedTextSource.java
deleted file mode 100644
index b014432..0000000
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/UnboundedTextSource.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.gearpump.examples;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Collections;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-import javax.annotation.Nullable;
-
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.UnboundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-
-import org.joda.time.Instant;
-
-
-/**
- * unbounded source that reads from text.
- */
-public class UnboundedTextSource extends UnboundedSource<String, UnboundedSource.CheckpointMark> {
-
-  @Override
-  public List<? extends UnboundedSource<String, CheckpointMark>> generateInitialSplits(
-      int desiredNumSplits, PipelineOptions options) throws Exception {
-    return Collections.<UnboundedSource<String, CheckpointMark>>singletonList(this);
-  }
-
-  @Override
-  public UnboundedReader<String> createReader(PipelineOptions options,
-      @Nullable CheckpointMark checkpointMark) {
-    return new UnboundedTextReader(this);
-  }
-
-  @Nullable
-  @Override
-  public Coder<CheckpointMark> getCheckpointMarkCoder() {
-    return null;
-  }
-
-  @Override
-  public void validate() {
-  }
-
-  @Override
-  public Coder<String> getDefaultOutputCoder() {
-    return StringUtf8Coder.of();
-  }
-
-  /**
-   * reads from text.
-   */
-  public static class UnboundedTextReader extends UnboundedReader<String> implements Serializable {
-
-    private static final long serialVersionUID = 7526472295622776147L;
-
-    private final UnboundedTextSource source;
-
-    private final String[] texts = new String[]{"foo foo foo bar bar", "foo foo bar bar bar"};
-    private long index = 0;
-
-    private String currentRecord;
-
-    private Instant currentTimestamp;
-
-    public UnboundedTextReader(UnboundedTextSource source) {
-      this.source = source;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      currentRecord = texts[0];
-      currentTimestamp = new Instant(0);
-      return true;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      index++;
-      currentRecord = texts[(int) index % (texts.length)];
-      currentTimestamp = new Instant(index * 1000);
-
-      return true;
-    }
-
-    @Override
-    public byte[] getCurrentRecordId() throws NoSuchElementException {
-      return new byte[0];
-    }
-
-    @Override
-    public String getCurrent() throws NoSuchElementException {
-      return this.currentRecord;
-    }
-
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      return currentTimestamp;
-    }
-
-    @Override
-    public void close() throws IOException {
-    }
-
-    @Override
-    public Instant getWatermark() {
-      return currentTimestamp;
-    }
-
-    @Override
-    public CheckpointMark getCheckpointMark() {
-      return null;
-    }
-
-    @Override
-    public UnboundedSource<String, ?> getCurrentSource() {
-      return this.source;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/package-info.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/package-info.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/package-info.java
deleted file mode 100644
index a62a6c0..0000000
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/examples/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Examples showcase Beam application over Gearpump runner.
- */
-package org.apache.beam.runners.gearpump.examples;

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateGearpumpPCollectionViewTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateGearpumpPCollectionViewTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateGearpumpPCollectionViewTranslator.java
index d05c89d..c7f24a8 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateGearpumpPCollectionViewTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateGearpumpPCollectionViewTranslator.java
@@ -20,25 +20,27 @@ package org.apache.beam.runners.gearpump.translators;
 
 import java.util.List;
 
-import org.apache.beam.runners.gearpump.GearpumpRunner;
+import org.apache.beam.runners.gearpump.GearpumpPipelineTranslator;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
 
-
 /**
  * CreateGearpumpPCollectionView bridges input stream to down stream
  * transforms.
  */
 public class CreateGearpumpPCollectionViewTranslator<ElemT, ViewT> implements
-    TransformTranslator<GearpumpRunner.CreateGearpumpPCollectionView<ElemT, ViewT>> {
+    TransformTranslator<GearpumpPipelineTranslator.CreateGearpumpPCollectionView<ElemT, ViewT>> {
+
+  private static final long serialVersionUID = -3955521308055056034L;
 
   @Override
-  public void translate(GearpumpRunner.CreateGearpumpPCollectionView<ElemT, ViewT> transform,
+  public void translate(
+      GearpumpPipelineTranslator.CreateGearpumpPCollectionView<ElemT, ViewT> transform,
       TranslationContext context) {
     JavaStream<WindowedValue<List<ElemT>>> inputStream =
-        context.getInputStream(context.getInput(transform));
-    PCollectionView<ViewT> view = transform.getView();
+        context.getInputStream(context.getInput());
+    PCollectionView<ViewT> view = (PCollectionView<ViewT>) context.getOutput();
     context.setOutputStream(view, inputStream);
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreatePCollectionViewTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreatePCollectionViewTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreatePCollectionViewTranslator.java
index e9e2e5d..da55d70 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreatePCollectionViewTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreatePCollectionViewTranslator.java
@@ -32,12 +32,14 @@ import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
 public class CreatePCollectionViewTranslator<ElemT, ViewT> implements
     TransformTranslator<View.CreatePCollectionView<ElemT, ViewT>> {
 
+  private static final long serialVersionUID = -2394386873317515748L;
+
   @Override
   public void translate(View.CreatePCollectionView<ElemT, ViewT> transform,
                         TranslationContext context) {
     JavaStream<WindowedValue<List<ElemT>>> inputStream =
-        context.getInputStream(context.getInput(transform));
-    PCollectionView<ViewT> view = transform.getView();
+        context.getInputStream(context.getInput());
+    PCollectionView<ViewT> view = (PCollectionView<ViewT>) context.getOutput();
     context.setOutputStream(view, inputStream);
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateValuesTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateValuesTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateValuesTranslator.java
deleted file mode 100644
index e5dc6dd..0000000
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/CreateValuesTranslator.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.gearpump.translators;
-
-import org.apache.beam.runners.gearpump.translators.io.UnboundedSourceWrapper;
-import org.apache.beam.runners.gearpump.translators.io.ValuesSource;
-import org.apache.beam.sdk.coders.CannotProvideCoderException;
-import org.apache.beam.sdk.io.UnboundedSource;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.util.WindowedValue;
-
-import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
-
-/**
- * Wraps elements from Create.Values into an {@link UnboundedSource}.
- * mainly used for test
- */
-public class CreateValuesTranslator<T> implements TransformTranslator<Create.Values<T>> {
-
-  private static final long serialVersionUID = 5411841848199229738L;
-
-  @Override
-  public void translate(Create.Values<T> transform, TranslationContext context) {
-    try {
-      UnboundedSourceWrapper<T, ?> unboundedSourceWrapper = new UnboundedSourceWrapper<>(
-          new ValuesSource<>(transform.getElements(),
-              transform.getDefaultOutputCoder(context.getInput(transform))),
-          context.getPipelineOptions());
-      JavaStream<WindowedValue<T>> sourceStream = context.getSourceStream(unboundedSourceWrapper);
-      context.setOutputStream(context.getOutput(transform), sourceStream);
-    } catch (CannotProvideCoderException e) {
-      throw new RuntimeException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionTranslator.java
deleted file mode 100644
index 27e54b8..0000000
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionTranslator.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.gearpump.translators;
-
-import com.google.common.collect.Lists;
-
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.beam.runners.gearpump.translators.io.UnboundedSourceWrapper;
-import org.apache.beam.runners.gearpump.translators.io.ValuesSource;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.values.PCollection;
-
-import org.apache.gearpump.streaming.dsl.api.functions.MapFunction;
-import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
-
-
-
-/**
- * Flatten.FlattenPCollectionList is translated to Gearpump merge function.
- * Note only two-way merge is working now
- */
-public class FlattenPCollectionTranslator<T> implements
-    TransformTranslator<Flatten.FlattenPCollectionList<T>> {
-
-  private static final long serialVersionUID = -5552148802472944759L;
-
-  @Override
-  public void translate(Flatten.FlattenPCollectionList<T> transform, TranslationContext context) {
-    JavaStream<T> merged = null;
-    Set<PCollection<T>> unique = new HashSet<>();
-    for (PCollection<T> collection : context.getInput(transform).getAll()) {
-      unique.add(collection);
-      JavaStream<T> inputStream = context.getInputStream(collection);
-      if (null == merged) {
-        merged = inputStream;
-      } else {
-        // duplicate edges are not allowed in Gearpump graph
-        // so we route through a dummy node
-        if (unique.contains(collection)) {
-          inputStream = inputStream.map(new DummyFunction<T>(), "dummy");
-        }
-
-        merged = merged.merge(inputStream, transform.getName());
-      }
-    }
-
-    if (null == merged) {
-      UnboundedSourceWrapper<String, ?> unboundedSourceWrapper = new UnboundedSourceWrapper<>(
-          new ValuesSource<>(Lists.newArrayList("dummy"),
-              StringUtf8Coder.of()), context.getPipelineOptions());
-      merged = context.getSourceStream(unboundedSourceWrapper);
-    }
-    context.setOutputStream(context.getOutput(transform), merged);
-  }
-
-  private static class DummyFunction<T> extends MapFunction<T, T> {
-
-    private static final long serialVersionUID = 5454396869997290471L;
-
-    @Override
-    public T map(T t) {
-      return t;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionsTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionsTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionsTranslator.java
new file mode 100644
index 0000000..3a465cb
--- /dev/null
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/FlattenPCollectionsTranslator.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.gearpump.translators;
+
+import com.google.common.collect.Lists;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.beam.runners.gearpump.translators.io.UnboundedSourceWrapper;
+import org.apache.beam.runners.gearpump.translators.io.ValuesSource;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.values.PCollection;
+
+import org.apache.beam.sdk.values.TaggedPValue;
+import org.apache.gearpump.streaming.dsl.api.functions.MapFunction;
+import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
+
+/**
+ * Flatten.FlattenPCollectionList is translated to Gearpump merge function.
+ */
+public class FlattenPCollectionsTranslator<T> implements
+    TransformTranslator<Flatten.PCollections<T>> {
+
+  private static final long serialVersionUID = -5552148802472944759L;
+
+  @Override
+  public void translate(Flatten.PCollections<T> transform, TranslationContext context) {
+    JavaStream<T> merged = null;
+    Set<PCollection<T>> unique = new HashSet<>();
+    for (TaggedPValue input: context.getInputs()) {
+      PCollection<T> collection = (PCollection<T>) input.getValue();
+      unique.add(collection);
+      JavaStream<T> inputStream = context.getInputStream(collection);
+      if (null == merged) {
+        merged = inputStream;
+      } else {
+        // duplicate edges are not allowed in Gearpump graph
+        // so we route through a dummy node
+        if (unique.contains(collection)) {
+          inputStream = inputStream.map(new DummyFunction<T>(), "dummy");
+        }
+
+        merged = merged.merge(inputStream, transform.getName());
+      }
+    }
+
+    if (null == merged) {
+      UnboundedSourceWrapper<String, ?> unboundedSourceWrapper = new UnboundedSourceWrapper<>(
+          new ValuesSource<>(Lists.newArrayList("dummy"),
+              StringUtf8Coder.of()), context.getPipelineOptions());
+      merged = context.getSourceStream(unboundedSourceWrapper);
+    }
+    context.setOutputStream(context.getOutput(), merged);
+  }
+
+  private static class DummyFunction<T> extends MapFunction<T, T> {
+
+    private static final long serialVersionUID = 5454396869997290471L;
+
+    @Override
+    public T map(T t) {
+      return t;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/GroupByKeyTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/GroupByKeyTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/GroupByKeyTranslator.java
index df8bfe9..5dfd3e9 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/GroupByKeyTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/GroupByKeyTranslator.java
@@ -61,7 +61,7 @@ public class GroupByKeyTranslator<K, V> implements TransformTranslator<GroupByKe
 
   @Override
   public void translate(GroupByKey<K, V> transform, TranslationContext context) {
-    PCollection<KV<K, V>> input = context.getInput(transform);
+    PCollection<KV<K, V>> input = (PCollection<KV<K, V>>) context.getInput();
     Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
     JavaStream<WindowedValue<KV<K, V>>> inputStream =
         context.getInputStream(input);
@@ -80,7 +80,7 @@ public class GroupByKeyTranslator<K, V> implements TransformTranslator<GroupByKe
         .fold(new Merge<>(windowFn, outputTimeFn), "merge")
         .map(new Values<K, V>(), "values");
 
-    context.setOutputStream(context.getOutput(transform), outputStream);
+    context.setOutputStream(context.getOutput(), outputStream);
   }
 
   private static class GearpumpWindowFn<T, W extends BoundedWindow>

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundMultiTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundMultiTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundMultiTranslator.java
index 8c57019..e88cb73 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundMultiTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundMultiTranslator.java
@@ -18,17 +18,11 @@
 
 package org.apache.beam.runners.gearpump.translators;
 
-
-import com.google.common.base.Predicate;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
-import javax.annotation.Nullable;
-
 import org.apache.beam.runners.gearpump.translators.functions.DoFnFunction;
 import org.apache.beam.runners.gearpump.translators.utils.TranslatorUtils;
 import org.apache.beam.sdk.transforms.DoFn;
@@ -36,6 +30,7 @@ import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TaggedPValue;
 import org.apache.beam.sdk.values.TupleTag;
 
 import org.apache.gearpump.streaming.dsl.api.functions.FilterFunction;
@@ -54,21 +49,21 @@ public class ParDoBoundMultiTranslator<InputT, OutputT> implements
 
   @Override
   public void translate(ParDo.BoundMulti<InputT, OutputT> transform, TranslationContext context) {
-    PCollection<InputT> inputT = (PCollection<InputT>) context.getInput(transform);
+    PCollection<InputT> inputT = (PCollection<InputT>) context.getInput();
     JavaStream<WindowedValue<InputT>> inputStream = context.getInputStream(inputT);
     Collection<PCollectionView<?>> sideInputs = transform.getSideInputs();
     Map<String, PCollectionView<?>> tagsToSideInputs =
         TranslatorUtils.getTagsToSideInputs(sideInputs);
 
-    Map<TupleTag<?>, PCollection<?>> outputs = context.getOutput(transform).getAll();
+    List<TaggedPValue> outputs = context.getOutputs();
     final TupleTag<OutputT> mainOutput = transform.getMainOutputTag();
-    List<TupleTag<?>> sideOutputs = Lists.newLinkedList(Sets.filter(outputs.keySet(),
-        new Predicate<TupleTag<?>>() {
-          @Override
-          public boolean apply(@Nullable TupleTag<?> tupleTag) {
-            return tupleTag != null && !tupleTag.getId().equals(mainOutput.getId());
-          }
-        }));
+    List<TupleTag<?>> sideOutputs = new ArrayList<>(outputs.size() - 1);
+    for (TaggedPValue output: outputs) {
+      TupleTag<?> tag = output.getTag();
+      if (tag != null && !tag.getId().equals(mainOutput.getId())) {
+        sideOutputs.add(tag);
+      }
+    }
 
     JavaStream<TranslatorUtils.RawUnionValue> unionStream = TranslatorUtils.withSideInputStream(
         context, inputStream, tagsToSideInputs);
@@ -83,10 +78,9 @@ public class ParDoBoundMultiTranslator<InputT, OutputT> implements
                 tagsToSideInputs,
                 mainOutput,
                 sideOutputs), transform.getName());
-    for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
-      output.getValue().getCoder();
+    for (TaggedPValue output: outputs) {
       JavaStream<WindowedValue<OutputT>> taggedStream = outputStream
-          .filter(new FilterByOutputTag(output.getKey().getId()),
+          .filter(new FilterByOutputTag(output.getTag().getId()),
               "filter_by_output_tag")
           .map(new TranslatorUtils.FromRawUnionValue<OutputT>(), "from_RawUnionValue");
       context.setOutputStream(output.getValue(), taggedStream);

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundTranslator.java
index efae938..dc32b8c 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ParDoBoundTranslator.java
@@ -35,7 +35,6 @@ import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TupleTagList;
 import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
 
-
 /**
  * {@link ParDo.Bound} is translated to Gearpump flatMap function
  * with {@link DoFn} wrapped in {@link DoFnFunction}.
@@ -50,14 +49,14 @@ public class ParDoBoundTranslator<InputT, OutputT> implements
   @Override
   public void translate(ParDo.Bound<InputT, OutputT> transform, TranslationContext context) {
     DoFn<InputT, OutputT> doFn = transform.getFn();
-    PCollection<OutputT> output = context.getOutput(transform);
+    PCollection<OutputT> output = (PCollection<OutputT>) context.getOutput();
     WindowingStrategy<?, ?> windowingStrategy = output.getWindowingStrategy();
 
     Collection<PCollectionView<?>> sideInputs = transform.getSideInputs();
     Map<String, PCollectionView<?>> tagsToSideInputs =
         TranslatorUtils.getTagsToSideInputs(sideInputs);
     JavaStream<WindowedValue<InputT>> inputStream = context.getInputStream(
-        context.getInput(transform));
+        context.getInput());
     JavaStream<TranslatorUtils.RawUnionValue> unionStream =
         TranslatorUtils.withSideInputStream(context,
         inputStream, tagsToSideInputs);
@@ -71,6 +70,6 @@ public class ParDoBoundTranslator<InputT, OutputT> implements
             .flatMap(doFnFunction, transform.getName())
             .map(new TranslatorUtils.FromRawUnionValue<OutputT>(), "from_RawUnionValue");
 
-    context.setOutputStream(context.getOutput(transform), outputStream);
+    context.setOutputStream(context.getOutput(), outputStream);
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadBoundedTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadBoundedTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadBoundedTranslator.java
index 478d58f..8f71a8e 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadBoundedTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadBoundedTranslator.java
@@ -31,6 +31,8 @@ import org.apache.gearpump.streaming.source.DataSource;
  */
 public class ReadBoundedTranslator <T> implements TransformTranslator<Read.Bounded<T>> {
 
+  private static final long serialVersionUID = -3899020490896998330L;
+
   @Override
   public void translate(Read.Bounded<T> transform, TranslationContext context) {
     BoundedSource<T> boundedSource = transform.getSource();
@@ -38,7 +40,7 @@ public class ReadBoundedTranslator <T> implements TransformTranslator<Read.Bound
         context.getPipelineOptions());
     JavaStream<WindowedValue<T>> sourceStream = context.getSourceStream(sourceWrapper);
 
-    context.setOutputStream(context.getOutput(transform), sourceStream);
+    context.setOutputStream(context.getOutput(), sourceStream);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadUnboundedTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadUnboundedTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadUnboundedTranslator.java
index 7e12a9c..0462c57 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadUnboundedTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/ReadUnboundedTranslator.java
@@ -33,6 +33,8 @@ import org.apache.gearpump.streaming.source.DataSource;
 
 public class ReadUnboundedTranslator<T> implements TransformTranslator<Read.Unbounded<T>> {
 
+  private static final long serialVersionUID = 3529494817859948619L;
+
   @Override
   public void translate(Read.Unbounded<T> transform, TranslationContext context) {
     UnboundedSource<T, ?> unboundedSource = transform.getSource();
@@ -40,7 +42,7 @@ public class ReadUnboundedTranslator<T> implements TransformTranslator<Read.Unbo
         unboundedSource, context.getPipelineOptions());
     JavaStream<WindowedValue<T>> sourceStream = context.getSourceStream(unboundedSourceWrapper);
 
-    context.setOutputStream(context.getOutput(transform), sourceStream);
+    context.setOutputStream(context.getOutput(), sourceStream);
   }
 
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TransformTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TransformTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TransformTranslator.java
index c8587d3..c7becad 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TransformTranslator.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TransformTranslator.java
@@ -23,7 +23,7 @@ import java.io.Serializable;
 import org.apache.beam.sdk.transforms.PTransform;
 
 /**
- * translates {@link PTransform} to Gearpump functions.
+ * Translates {@link PTransform} to Gearpump functions.
  */
 public interface TransformTranslator<T extends PTransform> extends Serializable {
   void translate(T transform, TranslationContext context);

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TranslationContext.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TranslationContext.java
index b2cff8a..e88bb74 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TranslationContext.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/TranslationContext.java
@@ -20,17 +20,18 @@ package org.apache.beam.runners.gearpump.translators;
 
 import static com.google.common.base.Preconditions.checkArgument;
 
+import com.google.common.collect.Iterables;
+
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.beam.runners.gearpump.GearpumpPipelineOptions;
 import org.apache.beam.sdk.runners.TransformHierarchy;
 import org.apache.beam.sdk.transforms.AppliedPTransform;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.values.PInput;
-import org.apache.beam.sdk.values.POutput;
 import org.apache.beam.sdk.values.PValue;
 
+import org.apache.beam.sdk.values.TaggedPValue;
 import org.apache.gearpump.cluster.UserConfig;
 import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
 import org.apache.gearpump.streaming.dsl.javaapi.JavaStreamApp;
@@ -70,18 +71,26 @@ public class TranslationContext {
     }
   }
 
-  public <InputT extends PInput> InputT getInput(PTransform<InputT, ?> transform) {
-    return (InputT) getCurrentTransform(transform).getInput();
+  public List<TaggedPValue> getInputs() {
+    return getCurrentTransform().getInputs();
+  }
+
+  public PValue getInput() {
+    return Iterables.getOnlyElement(getInputs()).getValue();
+  }
+
+  public List<TaggedPValue> getOutputs() {
+    return getCurrentTransform().getOutputs();
   }
 
-  public <OutputT extends POutput> OutputT getOutput(PTransform<?, OutputT> transform) {
-    return (OutputT) getCurrentTransform(transform).getOutput();
+  public PValue getOutput() {
+    return Iterables.getOnlyElement(getOutputs()).getValue();
   }
 
-  private AppliedPTransform<?, ?, ?> getCurrentTransform(PTransform<?, ?> transform) {
+  private AppliedPTransform<?, ?, ?> getCurrentTransform() {
     checkArgument(
-        currentTransform != null && currentTransform.getTransform() == transform,
-        "can only be called with current transform");
+        currentTransform != null,
+        "current transform not set");
     return currentTransform;
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowAssignTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowAssignTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowAssignTranslator.java
new file mode 100644
index 0000000..fe6015a
--- /dev/null
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowAssignTranslator.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.gearpump.translators;
+
+import com.google.common.collect.Iterables;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
+import org.apache.gearpump.streaming.dsl.javaapi.functions.FlatMapFunction;
+import org.joda.time.Instant;
+
+/**
+ * {@link Window.Bound} is translated to Gearpump flatMap function.
+ */
+@SuppressWarnings("unchecked")
+public class WindowAssignTranslator<T> implements  TransformTranslator<Window.Assign<T>> {
+
+  private static final long serialVersionUID = -964887482120489061L;
+
+  @Override
+  public void translate(Window.Assign<T> transform, TranslationContext context) {
+    PCollection<T> input = (PCollection<T>) context.getInput();
+    PCollection<T> output = (PCollection<T>) context.getOutput();
+    JavaStream<WindowedValue<T>> inputStream = context.getInputStream(input);
+    WindowingStrategy<?, ?> outputStrategy = output.getWindowingStrategy();
+    WindowFn<T, BoundedWindow> windowFn = (WindowFn<T, BoundedWindow>) outputStrategy.getWindowFn();
+    JavaStream<WindowedValue<T>> outputStream =
+        inputStream
+            .flatMap(new AssignWindows(windowFn), "assign_windows");
+
+    context.setOutputStream(output, outputStream);
+  }
+
+  private static class AssignWindows<T> extends
+      FlatMapFunction<WindowedValue<T>, WindowedValue<T>> {
+
+    private static final long serialVersionUID = 7284565861938681360L;
+    private final WindowFn<T, BoundedWindow> windowFn;
+
+    AssignWindows(WindowFn<T, BoundedWindow> windowFn) {
+      this.windowFn = windowFn;
+    }
+
+    @Override
+    public Iterator<WindowedValue<T>> flatMap(final WindowedValue<T> value) {
+      try {
+        Collection<BoundedWindow> windows = windowFn.assignWindows(windowFn.new AssignContext() {
+          @Override
+          public T element() {
+            return value.getValue();
+          }
+
+          @Override
+          public Instant timestamp() {
+            return value.getTimestamp();
+          }
+
+          @Override
+          public BoundedWindow window() {
+            return Iterables.getOnlyElement(value.getWindows());
+          }
+        });
+        List<WindowedValue<T>> values = new ArrayList<>(windows.size());
+        for (BoundedWindow win: windows) {
+          values.add(
+              WindowedValue.of(value.getValue(), value.getTimestamp(), win, value.getPane()));
+        }
+        return values.iterator();
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+}


[40/50] [abbrv] beam git commit: Generate zip distribution for pyhthon

Posted by ke...@apache.org.
Generate zip distribution for pyhthon


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/466e83fd
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/466e83fd
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/466e83fd

Branch: refs/heads/gearpump-runner
Commit: 466e83fdd88c16f096f842a2ca941f49d3701658
Parents: f29bf89
Author: Ahmet Altay <al...@google.com>
Authored: Fri Mar 10 16:40:34 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 16:54:02 2017 -0800

----------------------------------------------------------------------
 sdks/python/pom.xml | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/466e83fd/sdks/python/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml
index 0d1fdb4..cb8307a 100644
--- a/sdks/python/pom.xml
+++ b/sdks/python/pom.xml
@@ -136,6 +136,8 @@
                 <argument>sdist</argument>
                 <argument>--dist-dir</argument>
                 <argument>${project.build.directory}</argument>
+                <argument>--formats</argument>
+                <argument>zip,gztar</argument>
               </arguments>
               <environmentVariables>
                 <PYTHONUSERBASE>${python.user.base}</PYTHONUSERBASE>


[47/50] [abbrv] beam git commit: Merge branch 'master' of https://github.com/apache/incubator-beam into gearpump-runner

Posted by ke...@apache.org.
Merge branch 'master' of https://github.com/apache/incubator-beam into gearpump-runner


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3f917987
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3f917987
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3f917987

Branch: refs/heads/gearpump-runner
Commit: 3f917987f02b6e28f99e40c711a7289b2d4c8e6b
Parents: 15a8ad6 781e417
Author: manuzhang <ow...@gmail.com>
Authored: Sun Mar 12 18:41:51 2017 +0800
Committer: manuzhang <ow...@gmail.com>
Committed: Sun Mar 12 18:41:51 2017 +0800

----------------------------------------------------------------------
 .gitignore                                      |   10 +
 .jenkins/common_job_properties.groovy           |   98 +-
 ...job_beam_PostCommit_Java_MavenInstall.groovy |    4 +-
 ...ostCommit_Java_RunnableOnService_Apex.groovy |    8 +-
 ...ommit_Java_RunnableOnService_Dataflow.groovy |    8 +-
 ...stCommit_Java_RunnableOnService_Flink.groovy |    8 +-
 ...ommit_Java_RunnableOnService_Gearpump.groovy |   12 +-
 ...stCommit_Java_RunnableOnService_Spark.groovy |   10 +-
 .../job_beam_PostCommit_Python_Verify.groovy    |    8 +-
 .../job_beam_PreCommit_Java_MavenInstall.groovy |    6 +-
 .../job_beam_PreCommit_Website_Stage.groovy     |   80 +
 .jenkins/job_beam_PreCommit_Website_Test.groovy |   65 +
 .../job_beam_Release_NightlySnapshot.groovy     |    9 +-
 .jenkins/job_seed.groovy                        |   24 +-
 .travis.yml                                     |   24 +-
 .travis/README.md                               |    2 +-
 DISCLAIMER                                      |   10 -
 NOTICE                                          |    4 +-
 README.md                                       |   57 +-
 examples/java/README.md                         |   16 +-
 examples/java/pom.xml                           |   24 +-
 .../beam/examples/DebuggingWordCount.java       |    4 +-
 .../org/apache/beam/examples/WordCount.java     |    6 +-
 .../beam/examples/complete/AutoComplete.java    |    4 +-
 .../org/apache/beam/examples/complete/README.md |   14 +-
 .../apache/beam/examples/complete/TfIdf.java    |    2 +-
 .../examples/complete/TopWikipediaSessions.java |   27 +-
 .../examples/complete/TrafficMaxLaneFlow.java   |    2 +-
 .../beam/examples/complete/TrafficRoutes.java   |    2 +-
 .../examples/cookbook/BigQueryTornadoes.java    |    2 +-
 .../cookbook/CombinePerKeyExamples.java         |    2 +-
 .../org/apache/beam/examples/cookbook/README.md |   14 +-
 .../beam/examples/cookbook/TriggerExample.java  |    4 +-
 .../beam/examples/WindowedWordCountIT.java      |   16 +-
 .../org/apache/beam/examples/WordCountTest.java |    7 +-
 .../examples/complete/AutoCompleteTest.java     |   11 +-
 .../beam/examples/complete/TfIdfTest.java       |    6 +-
 .../complete/TopWikipediaSessionsTest.java      |    7 +-
 .../examples/cookbook/DistinctExampleTest.java  |    9 +-
 .../examples/cookbook/JoinExamplesTest.java     |    6 +-
 .../examples/cookbook/TriggerExampleTest.java   |    6 +-
 examples/java8/pom.xml                          |    3 +-
 .../beam/examples/complete/game/GameStats.java  |    7 +-
 .../examples/complete/game/LeaderBoard.java     |    5 +-
 .../beam/examples/complete/game/UserScore.java  |    2 +-
 .../examples/MinimalWordCountJava8Test.java     |    6 +-
 .../examples/complete/game/GameStatsTest.java   |    7 +-
 .../complete/game/HourlyTeamScoreTest.java      |    5 +-
 .../examples/complete/game/LeaderBoardTest.java |   11 +-
 .../examples/complete/game/UserScoreTest.java   |   10 +-
 examples/pom.xml                                |   16 +-
 pom.xml                                         |  319 +-
 runners/apex/README.md                          |    4 +-
 runners/apex/pom.xml                            |   32 +-
 .../beam/runners/apex/ApexPipelineOptions.java  |    7 +-
 .../apache/beam/runners/apex/ApexRunner.java    |  144 +-
 .../beam/runners/apex/ApexYarnLauncher.java     |   29 +-
 .../beam/runners/apex/TestApexRunner.java       |   10 -
 .../translation/ApexPipelineTranslator.java     |   17 +-
 .../translation/CreateValuesTranslator.java     |   25 +-
 .../FlattenPCollectionTranslator.java           |   37 +-
 .../apex/translation/GroupByKeyTranslator.java  |    2 +-
 .../translation/ParDoBoundMultiTranslator.java  |   32 +-
 .../apex/translation/ParDoBoundTranslator.java  |    9 +-
 .../apex/translation/TranslationContext.java    |   43 +-
 .../translation/WindowAssignTranslator.java     |   78 +
 .../apex/translation/WindowBoundTranslator.java |   78 -
 .../operators/ApexFlattenOperator.java          |    4 +-
 .../operators/ApexGroupByKeyOperator.java       |   22 +-
 .../operators/ApexParDoOperator.java            |   12 +-
 .../ApexReadUnboundedInputOperator.java         |   17 +-
 .../translation/utils/ApexStateInternals.java   |   30 +-
 .../apex/translation/utils/NoOpStepContext.java |    7 +-
 .../beam/runners/apex/ApexRunnerTest.java       |   76 +
 .../beam/runners/apex/ApexYarnLauncherTest.java |    9 +-
 .../runners/apex/examples/WordCountTest.java    |    2 +-
 .../FlattenPCollectionTranslatorTest.java       |   24 +-
 .../translation/ParDoBoundTranslatorTest.java   |   38 +-
 .../translation/ReadUnboundTranslatorTest.java  |    8 +-
 .../utils/ApexStateInternalsTest.java           |   14 +-
 .../test/resources/beam-runners-apex.properties |   20 +
 runners/core-construction-java/pom.xml          |  138 +
 .../EmptyFlattenAsCreateFactory.java            |   71 +
 .../core/construction/PTransformMatchers.java   |  192 +
 .../core/construction/PrimitiveCreate.java      |   77 +
 .../core/construction/ReplacementOutputs.java   |  105 +
 .../SingleInputOutputOverrideFactory.java       |   50 +
 .../UnsupportedOverrideFactory.java             |   71 +
 .../runners/core/construction/package-info.java |   22 +
 .../construction/PTransformMatchersTest.java    |  425 ++
 .../construction/ReplacementOutputsTest.java    |  254 +
 .../SingleInputOutputOverrideFactoryTest.java   |  114 +
 .../UnsupportedOverrideFactoryTest.java         |   65 +
 runners/core-java/pom.xml                       |   24 +-
 .../beam/runners/core/AggregatorFactory.java    |    1 -
 .../beam/runners/core/AssignWindowsDoFn.java    |    3 +-
 .../beam/runners/core/BaseExecutionContext.java |  174 +
 .../apache/beam/runners/core/DoFnAdapters.java  |  323 ++
 .../apache/beam/runners/core/DoFnRunner.java    |   21 -
 .../apache/beam/runners/core/DoFnRunners.java   |  158 +-
 .../beam/runners/core/ExecutionContext.java     |  100 +
 .../GroupAlsoByWindowViaOutputBufferDoFn.java   |  114 +
 .../core/GroupAlsoByWindowViaWindowSetDoFn.java |   17 +-
 .../GroupAlsoByWindowViaWindowSetNewDoFn.java   |  154 +
 .../runners/core/GroupAlsoByWindowsDoFn.java    |    5 +-
 .../GroupAlsoByWindowsViaOutputBufferDoFn.java  |  134 -
 .../runners/core/InMemoryStateInternals.java    |  643 +++
 .../runners/core/InMemoryTimerInternals.java    |  300 ++
 .../apache/beam/runners/core/KeyedWorkItem.java |    2 +-
 .../beam/runners/core/KeyedWorkItemCoder.java   |    8 +-
 .../beam/runners/core/KeyedWorkItems.java       |    2 +-
 .../core/LateDataDroppingDoFnRunner.java        |    2 -
 .../apache/beam/runners/core/LateDataUtils.java |   88 +
 .../runners/core/MergingActiveWindowSet.java    |    4 -
 .../beam/runners/core/MergingStateAccessor.java |   41 +
 .../apache/beam/runners/core/NonEmptyPanes.java |    7 +-
 .../org/apache/beam/runners/core/OldDoFn.java   |  471 ++
 ...eBoundedSplittableProcessElementInvoker.java |  285 ++
 .../beam/runners/core/PaneInfoTracker.java      |    4 -
 .../runners/core/PerKeyCombineFnRunner.java     |   70 -
 .../runners/core/PerKeyCombineFnRunners.java    |  101 -
 .../org/apache/beam/runners/core/ReduceFn.java  |    2 -
 .../runners/core/ReduceFnContextFactory.java    |   11 +-
 .../beam/runners/core/ReduceFnRunner.java       |    6 +-
 .../beam/runners/core/SideInputHandler.java     |    4 -
 .../beam/runners/core/SimpleDoFnRunner.java     |  190 +-
 .../beam/runners/core/SimpleOldDoFnRunner.java  |   12 +-
 .../beam/runners/core/SplittableParDo.java      |  233 +-
 .../core/SplittableProcessElementInvoker.java   |   65 +
 .../apache/beam/runners/core/StateAccessor.java |   38 +
 .../beam/runners/core/StateInternals.java       |   59 +
 .../runners/core/StateInternalsFactory.java     |   35 +
 .../apache/beam/runners/core/StateMerging.java  |  309 ++
 .../beam/runners/core/StateNamespace.java       |   56 +
 .../runners/core/StateNamespaceForTest.java     |   65 +
 .../beam/runners/core/StateNamespaces.java      |  278 ++
 .../apache/beam/runners/core/StateTable.java    |   84 +
 .../org/apache/beam/runners/core/StateTag.java  |  125 +
 .../org/apache/beam/runners/core/StateTags.java |  382 ++
 .../beam/runners/core/StatefulDoFnRunner.java   |  171 +
 .../beam/runners/core/SystemReduceFn.java       |    5 -
 .../core/TestInMemoryStateInternals.java        |   63 +
 .../beam/runners/core/TimerInternals.java       |  286 ++
 .../runners/core/TimerInternalsFactory.java     |   35 +
 .../core/UnboundedReadFromBoundedSource.java    |   14 +-
 .../core/UnsupportedSideInputReader.java        |   52 +
 .../apache/beam/runners/core/WatermarkHold.java |    9 +-
 .../beam/runners/core/WindowingInternals.java   |   81 +
 .../core/WindowingInternalsAdapters.java        |    1 -
 .../core/triggers/AfterAllStateMachine.java     |    2 +-
 .../AfterDelayFromFirstElementStateMachine.java |   16 +-
 .../core/triggers/AfterFirstStateMachine.java   |    2 +-
 .../core/triggers/AfterPaneStateMachine.java    |   12 +-
 .../AfterProcessingTimeStateMachine.java        |    2 +
 ...rSynchronizedProcessingTimeStateMachine.java |    7 +-
 .../triggers/AfterWatermarkStateMachine.java    |   15 +-
 .../core/triggers/OrFinallyStateMachine.java    |    2 +-
 .../triggers/ReshuffleTriggerStateMachine.java  |    6 +-
 .../core/triggers/TriggerStateMachine.java      |    6 +-
 .../TriggerStateMachineContextFactory.java      |   12 +-
 .../triggers/TriggerStateMachineRunner.java     |    8 +-
 .../core/triggers/TriggerStateMachines.java     |  250 +-
 .../core/DoFnDelegatingAggregatorTest.java      |  144 +
 ...roupAlsoByWindowViaOutputBufferDoFnTest.java |  109 +
 .../core/GroupAlsoByWindowsProperties.java      |    7 +-
 ...oupAlsoByWindowsViaOutputBufferDoFnTest.java |  110 -
 .../core/InMemoryStateInternalsTest.java        |  574 +++
 .../core/InMemoryTimerInternalsTest.java        |  195 +
 .../runners/core/KeyedWorkItemCoderTest.java    |    9 +-
 .../core/LateDataDroppingDoFnRunnerTest.java    |    3 +-
 .../core/MergingActiveWindowSetTest.java        |    2 -
 .../apache/beam/runners/core/NoOpOldDoFn.java   |   72 +
 .../beam/runners/core/OldDoFnContextTest.java   |   72 +
 .../apache/beam/runners/core/OldDoFnTest.java   |  192 +
 ...ndedSplittableProcessElementInvokerTest.java |  146 +
 .../core/PushbackSideInputDoFnRunnerTest.java   |    8 +-
 .../beam/runners/core/ReduceFnRunnerTest.java   |   12 +-
 .../beam/runners/core/ReduceFnTester.java       |   19 +-
 .../beam/runners/core/SideInputHandlerTest.java |    1 -
 .../beam/runners/core/SimpleDoFnRunnerTest.java |    6 +-
 .../runners/core/SimpleOldDoFnRunnerTest.java   |    4 +-
 .../beam/runners/core/SplittableParDoTest.java  |  254 +-
 .../beam/runners/core/StateNamespacesTest.java  |  130 +
 .../apache/beam/runners/core/StateTagTest.java  |  205 +
 .../runners/core/StatefulDoFnRunnerTest.java    |  347 ++
 .../beam/runners/core/TimerInternalsTest.java   |  105 +
 .../UnboundedReadFromBoundedSourceTest.java     |   18 +-
 .../beam/runners/core/WindowMatchers.java       |    3 +-
 ...chronizedProcessingTimeStateMachineTest.java |    3 +-
 .../AfterWatermarkStateMachineTest.java         |   19 +
 .../core/triggers/NeverStateMachineTest.java    |    1 -
 .../ReshuffleTriggerStateMachineTest.java       |    8 +-
 .../triggers/TriggerStateMachineTester.java     |   16 +-
 .../core/triggers/TriggerStateMachinesTest.java |  167 +-
 runners/direct-java/pom.xml                     |   27 +-
 .../runners/direct/AggregatorContainer.java     |    2 +-
 .../direct/BoundedReadEvaluatorFactory.java     |   10 +-
 .../CopyOnAccessInMemoryStateInternals.java     |   68 +-
 .../runners/direct/DirectExecutionContext.java  |    8 +-
 ...ectGBKIntoKeyedWorkItemsOverrideFactory.java |    4 +-
 .../beam/runners/direct/DirectGraphVisitor.java |   33 +-
 .../direct/DirectGroupByKeyOverrideFactory.java |    3 +-
 .../beam/runners/direct/DirectMetrics.java      |   31 +-
 .../beam/runners/direct/DirectRunner.java       |  114 +-
 .../runners/direct/DirectTimerInternals.java    |   14 +-
 ...ecycleManagerRemovingTransformEvaluator.java |   19 +-
 .../beam/runners/direct/EmptyInputProvider.java |    4 +-
 .../beam/runners/direct/EvaluationContext.java  |   40 +-
 .../direct/ExecutorServiceParallelExecutor.java |   14 +-
 .../runners/direct/FlattenEvaluatorFactory.java |    8 +-
 .../GroupAlsoByWindowEvaluatorFactory.java      |   38 +-
 .../direct/GroupByKeyOnlyEvaluatorFactory.java  |   10 +-
 .../direct/ImmutableListBundleFactory.java      |    6 +
 .../direct/KeyedPValueTrackingVisitor.java      |   52 +-
 .../beam/runners/direct/ParDoEvaluator.java     |   29 +-
 .../runners/direct/ParDoEvaluatorFactory.java   |   25 +-
 .../direct/ParDoMultiOverrideFactory.java       |  154 +-
 .../ParDoSingleViaMultiOverrideFactory.java     |    5 +-
 .../runners/direct/RootProviderRegistry.java    |    4 +-
 ...littableProcessElementsEvaluatorFactory.java |   84 +-
 .../direct/StatefulParDoEvaluatorFactory.java   |   72 +-
 .../direct/TestStreamEvaluatorFactory.java      |   44 +-
 .../direct/TransformEvaluatorRegistry.java      |    6 +-
 .../direct/UnboundedReadEvaluatorFactory.java   |   22 +-
 .../runners/direct/ViewEvaluatorFactory.java    |   16 +-
 .../beam/runners/direct/WatermarkManager.java   |  107 +-
 .../runners/direct/WindowEvaluatorFactory.java  |   17 +-
 .../direct/WriteWithShardingFactory.java        |  148 +-
 .../runners/direct/AggregatorContainerTest.java |   18 +-
 .../direct/BoundedReadEvaluatorFactoryTest.java |   18 +-
 .../direct/CloningBundleFactoryTest.java        |   16 +-
 .../runners/direct/CommittedResultTest.java     |   23 +-
 .../CopyOnAccessInMemoryStateInternalsTest.java |   79 +-
 .../runners/direct/DirectGraphVisitorTest.java  |   55 +-
 .../DirectGroupByKeyOverrideFactoryTest.java    |   51 +
 .../beam/runners/direct/DirectMetricsTest.java  |  128 +-
 .../beam/runners/direct/DirectRunnerTest.java   |    8 +-
 .../direct/DirectTimerInternalsTest.java        |    4 +-
 ...leManagerRemovingTransformEvaluatorTest.java |  103 +-
 .../runners/direct/EvaluationContextTest.java   |   30 +-
 .../direct/FlattenEvaluatorFactoryTest.java     |    8 +-
 .../direct/GroupByKeyEvaluatorFactoryTest.java  |    5 +-
 .../GroupByKeyOnlyEvaluatorFactoryTest.java     |    5 +-
 .../ImmutabilityCheckingBundleFactoryTest.java  |    6 +-
 .../ImmutabilityEnforcementFactoryTest.java     |    3 +-
 .../direct/ImmutableListBundleFactoryTest.java  |   37 +-
 .../direct/KeyedPValueTrackingVisitorTest.java  |  167 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |    9 +-
 .../direct/ParDoMultiOverrideFactoryTest.java   |   45 +
 .../ParDoSingleViaMultiOverrideFactoryTest.java |   46 +
 .../runners/direct/SideInputContainerTest.java  |    5 +-
 .../StatefulParDoEvaluatorFactoryTest.java      |  129 +-
 .../runners/direct/StepTransformResultTest.java |    5 +-
 .../direct/TestStreamEvaluatorFactoryTest.java  |   38 +-
 .../runners/direct/TransformExecutorTest.java   |    4 +-
 .../UnboundedReadEvaluatorFactoryTest.java      |   22 +-
 .../direct/ViewEvaluatorFactoryTest.java        |   18 +-
 .../direct/WatermarkCallbackExecutorTest.java   |    5 +-
 .../runners/direct/WatermarkManagerTest.java    |   37 +-
 .../direct/WindowEvaluatorFactoryTest.java      |   87 +-
 .../direct/WriteWithShardingFactoryTest.java    |  154 +-
 runners/flink/README.md                         |  250 -
 runners/flink/examples/pom.xml                  |    2 +-
 .../beam/runners/flink/examples/WordCount.java  |    2 +-
 .../flink/examples/streaming/AutoComplete.java  |    4 +-
 .../examples/streaming/KafkaIOExamples.java     |    4 +-
 .../KafkaWindowedWordCountExample.java          |    2 +-
 .../examples/streaming/WindowedWordCount.java   |    2 +-
 runners/flink/pom.xml                           |    4 +-
 runners/flink/runner/pom.xml                    |   56 +-
 .../flink/DefaultParallelismFactory.java        |    3 +-
 .../flink/FlinkBatchPipelineTranslator.java     |  139 +
 .../flink/FlinkBatchTransformTranslators.java   |  797 ++++
 .../flink/FlinkBatchTranslationContext.java     |  154 +
 .../FlinkPipelineExecutionEnvironment.java      |   11 +-
 .../runners/flink/FlinkPipelineOptions.java     |    6 +-
 .../runners/flink/FlinkPipelineTranslator.java  |   53 +
 .../apache/beam/runners/flink/FlinkRunner.java  |  395 +-
 .../flink/FlinkStreamingPipelineTranslator.java |  230 +
 .../FlinkStreamingTransformTranslators.java     | 1043 +++++
 .../flink/FlinkStreamingTranslationContext.java |  130 +
 .../flink/FlinkStreamingViewOverrides.java      |  372 ++
 .../flink/PipelineTranslationOptimizer.java     |   72 +
 .../beam/runners/flink/TestFlinkRunner.java     |   24 +-
 .../beam/runners/flink/TranslationMode.java     |   31 +
 .../FlinkBatchPipelineTranslator.java           |  143 -
 .../FlinkBatchTransformTranslators.java         |  745 ---
 .../FlinkBatchTranslationContext.java           |  145 -
 .../translation/FlinkPipelineTranslator.java    |   53 -
 .../FlinkStreamingPipelineTranslator.java       |  154 -
 .../FlinkStreamingTransformTranslators.java     |  978 ----
 .../FlinkStreamingTranslationContext.java       |  110 -
 .../PipelineTranslationOptimizer.java           |   73 -
 .../flink/translation/TranslationMode.java      |   31 -
 .../functions/FlinkAggregatorFactory.java       |   53 +
 .../functions/FlinkAssignWindows.java           |    2 +-
 .../functions/FlinkDoFnFunction.java            |  136 +-
 .../FlinkMergingNonShuffleReduceFunction.java   |   57 +-
 .../FlinkMergingPartialReduceFunction.java      |   37 +-
 .../functions/FlinkMergingReduceFunction.java   |   31 +-
 .../functions/FlinkMultiOutputDoFnFunction.java |  126 -
 .../FlinkMultiOutputProcessContext.java         |  118 -
 .../FlinkMultiOutputPruningFunction.java        |    2 +-
 .../functions/FlinkNoElementAssignContext.java  |   68 -
 .../functions/FlinkNoOpStepContext.java         |   73 +
 .../functions/FlinkPartialReduceFunction.java   |   45 +-
 .../functions/FlinkProcessContextBase.java      |  267 --
 .../functions/FlinkReduceFunction.java          |   41 +-
 .../functions/FlinkSideInputReader.java         |   80 +
 .../FlinkSingleOutputProcessContext.java        |   69 -
 .../functions/FlinkStatefulDoFnFunction.java    |  198 +
 .../utils/SerializedPipelineOptions.java        |    4 +-
 .../wrappers/streaming/DoFnOperator.java        |  671 ++-
 .../wrappers/streaming/FlinkStateInternals.java | 1037 -----
 .../streaming/KvToByteBufferKeySelector.java    |   56 +
 .../streaming/SingletonKeyedWorkItem.java       |    2 +-
 .../streaming/SingletonKeyedWorkItemCoder.java  |   10 +-
 .../wrappers/streaming/WindowDoFnOperator.java  |  457 +-
 .../streaming/io/BoundedSourceWrapper.java      |    7 +-
 .../streaming/io/UnboundedFlinkSink.java        |    6 +
 .../streaming/io/UnboundedSourceWrapper.java    |  159 +-
 .../state/FlinkBroadcastStateInternals.java     |  865 ++++
 .../state/FlinkKeyGroupStateInternals.java      |  487 ++
 .../state/FlinkSplitStateInternals.java         |  260 ++
 .../streaming/state/FlinkStateInternals.java    | 1053 +++++
 .../state/KeyGroupCheckpointedOperator.java     |   35 +
 .../state/KeyGroupRestoringOperator.java        |   32 +
 .../wrappers/streaming/state/package-info.java  |   22 +
 .../beam/runners/flink/PipelineOptionsTest.java |   42 +-
 .../beam/runners/flink/WriteSinkITCase.java     |    5 +-
 .../flink/streaming/DoFnOperatorTest.java       |  337 +-
 .../FlinkBroadcastStateInternalsTest.java       |  245 +
 .../FlinkKeyGroupStateInternalsTest.java        |  262 ++
 .../streaming/FlinkSplitStateInternalsTest.java |  101 +
 .../streaming/FlinkStateInternalsTest.java      |   39 +-
 .../streaming/UnboundedSourceWrapperTest.java   |  570 ++-
 runners/google-cloud-dataflow-java/pom.xml      |   40 +-
 .../beam/runners/dataflow/AssignWindows.java    |   89 +
 .../dataflow/BatchStatefulParDoOverrides.java   |  283 ++
 .../runners/dataflow/BatchViewOverrides.java    | 1391 ++++++
 .../dataflow/DataflowAggregatorTransforms.java  |   79 +
 .../beam/runners/dataflow/DataflowClient.java   |   44 +-
 .../dataflow/DataflowMetricUpdateExtractor.java |  109 +
 .../runners/dataflow/DataflowPipelineJob.java   |    4 +-
 .../dataflow/DataflowPipelineTranslator.java    |  606 +--
 .../beam/runners/dataflow/DataflowRunner.java   | 2466 ++--------
 .../DataflowUnboundedReadFromBoundedSource.java |  547 +++
 .../beam/runners/dataflow/ReadTranslator.java   |  102 +
 .../dataflow/StreamingViewOverrides.java        |  110 +
 .../runners/dataflow/TransformTranslator.java   |  131 +
 .../dataflow/internal/AssignWindows.java        |   89 -
 .../dataflow/internal/CustomSources.java        |   12 +-
 .../internal/DataflowAggregatorTransforms.java  |   79 -
 .../internal/DataflowMetricUpdateExtractor.java |  109 -
 .../DataflowUnboundedReadFromBoundedSource.java |  556 ---
 .../runners/dataflow/internal/IsmFormat.java    |   20 +-
 .../dataflow/internal/ReadTranslator.java       |  107 -
 .../options/DataflowPipelineOptions.java        |   36 +-
 .../DataflowPipelineWorkerPoolOptions.java      |   16 +-
 .../dataflow/testing/TestDataflowRunner.java    |   17 +-
 .../beam/runners/dataflow/util/DoFnInfo.java    |   75 +-
 .../beam/runners/dataflow/util/GcsStager.java   |   18 +-
 .../beam/runners/dataflow/util/PackageUtil.java |  363 +-
 .../beam/runners/dataflow/dataflow.properties   |    6 +-
 .../BatchStatefulParDoOverridesTest.java        |  169 +
 .../dataflow/BatchViewOverridesTest.java        |  633 +++
 .../dataflow/DataflowPipelineJobTest.java       |  184 +-
 .../DataflowPipelineTranslatorTest.java         |  145 +-
 .../runners/dataflow/DataflowRunnerTest.java    |  614 +--
 ...aflowUnboundedReadFromBoundedSourceTest.java |   79 +
 ...aflowUnboundedReadFromBoundedSourceTest.java |   83 -
 .../DataflowPipelineDebugOptionsTest.java       |    2 +-
 .../options/DataflowPipelineOptionsTest.java    |   20 +-
 .../options/DataflowProfilingOptionsTest.java   |    4 +-
 .../testing/TestDataflowRunnerTest.java         |   53 +-
 .../runners/dataflow/util/PackageUtilTest.java  |   99 +-
 runners/pom.xml                                 |   17 +-
 runners/spark/README.md                         |    8 +-
 runners/spark/pom.xml                           |   70 +-
 .../spark/SparkNativePipelineVisitor.java       |  198 +
 .../runners/spark/SparkPipelineOptions.java     |    6 +-
 .../beam/runners/spark/SparkPipelineResult.java |   67 +-
 .../apache/beam/runners/spark/SparkRunner.java  |  124 +-
 .../beam/runners/spark/SparkRunnerDebugger.java |  137 +
 .../runners/spark/TestSparkPipelineOptions.java |   61 +
 .../beam/runners/spark/TestSparkRunner.java     |  204 +-
 .../spark/aggregators/AccumulatorSingleton.java |   53 -
 .../aggregators/AggregatorsAccumulator.java     |  131 +
 .../spark/aggregators/NamedAggregators.java     |   12 +-
 .../spark/aggregators/SparkAggregators.java     |   26 +-
 .../aggregators/metrics/AggregatorMetric.java   |   44 -
 .../metrics/AggregatorMetricSource.java         |   50 -
 .../metrics/WithNamedAggregatorsSupport.java    |  174 -
 .../spark/aggregators/metrics/sink/CsvSink.java |   39 -
 .../aggregators/metrics/sink/GraphiteSink.java  |   39 -
 .../aggregators/metrics/sink/package-info.java  |   23 -
 .../coders/BeamSparkRunnerRegistrator.java      |   48 +-
 .../beam/runners/spark/coders/CoderHelpers.java |   23 +
 .../runners/spark/coders/NullWritableCoder.java |   76 -
 .../spark/coders/StatelessJavaSerializer.java   |   97 +
 .../runners/spark/coders/WritableCoder.java     |  122 -
 .../beam/runners/spark/examples/WordCount.java  |    2 +-
 .../apache/beam/runners/spark/io/ConsoleIO.java |    4 +-
 .../beam/runners/spark/io/CreateStream.java     |  198 +-
 .../beam/runners/spark/io/MicrobatchSource.java |    9 +-
 .../runners/spark/io/SparkUnboundedSource.java  |  166 +-
 .../runners/spark/metrics/AggregatorMetric.java |   43 +
 .../spark/metrics/AggregatorMetricSource.java   |   51 +
 .../runners/spark/metrics/CompositeSource.java  |   49 +
 .../spark/metrics/MetricsAccumulator.java       |  132 +
 .../spark/metrics/MetricsAccumulatorParam.java  |   42 +
 .../runners/spark/metrics/SparkBeamMetric.java  |   69 +
 .../spark/metrics/SparkBeamMetricSource.java    |   51 +
 .../spark/metrics/SparkMetricResults.java       |  181 +
 .../spark/metrics/SparkMetricsContainer.java    |  147 +
 .../spark/metrics/WithMetricsSupport.java       |  209 +
 .../runners/spark/metrics/package-info.java     |   20 +
 .../runners/spark/metrics/sink/CsvSink.java     |   38 +
 .../spark/metrics/sink/GraphiteSink.java        |   38 +
 .../spark/metrics/sink/package-info.java        |   22 +
 .../SparkGroupAlsoByWindowViaWindowSet.java     |  431 ++
 .../spark/stateful/SparkStateInternals.java     |  418 ++
 .../spark/stateful/SparkTimerInternals.java     |  193 +
 .../spark/stateful/StateSpecFunctions.java      |   60 +-
 .../spark/translation/BoundedDataset.java       |    3 +-
 .../runners/spark/translation/DoFnFunction.java |   41 +-
 .../translation/DoFnRunnerWithMetrics.java      |   91 +
 .../spark/translation/EvaluationContext.java    |   89 +-
 .../translation/GroupCombineFunctions.java      |  259 +-
 .../spark/translation/MultiDoFnFunction.java    |   39 +-
 .../translation/SparkAbstractCombineFn.java     |   12 +-
 .../spark/translation/SparkContextFactory.java  |   13 +-
 .../spark/translation/SparkGlobalCombineFn.java |   13 +-
 .../translation/SparkGroupAlsoByWindowFn.java   |  214 -
 ...SparkGroupAlsoByWindowViaOutputBufferFn.java |  179 +
 .../spark/translation/SparkKeyedCombineFn.java  |   13 +-
 .../spark/translation/SparkPCollectionView.java |   99 +
 .../spark/translation/SparkProcessContext.java  |    8 +-
 .../spark/translation/SparkRuntimeContext.java  |   84 +-
 .../spark/translation/TransformEvaluator.java   |    1 +
 .../spark/translation/TransformTranslator.java  |  467 +-
 .../spark/translation/TranslationUtils.java     |   93 +-
 .../spark/translation/streaming/Checkpoint.java |  137 +
 .../SparkRunnerStreamingContextFactory.java     |   57 +-
 .../streaming/StreamingTransformTranslator.java |  492 +-
 .../translation/streaming/UnboundedDataset.java |   63 +-
 .../runners/spark/util/BroadcastHelper.java     |  127 -
 .../spark/util/GlobalWatermarkHolder.java       |  200 +
 .../runners/spark/util/SideInputBroadcast.java  |   77 +
 .../spark/util/SparkSideInputReader.java        |    8 +-
 .../beam/runners/spark/ClearWatermarksRule.java |   37 +
 .../beam/runners/spark/ForceStreamingTest.java  |   60 +-
 .../spark/GlobalWatermarkHolderTest.java        |  151 +
 .../apache/beam/runners/spark/PipelineRule.java |  109 +
 .../runners/spark/ProvidedSparkContextTest.java |   72 +-
 .../runners/spark/ReuseSparkContextRule.java    |   46 +
 .../runners/spark/SparkPipelineStateTest.java   |   27 +-
 .../runners/spark/SparkRunnerDebuggerTest.java  |  180 +
 .../spark/aggregators/ClearAggregatorsRule.java |    5 +-
 .../metrics/sink/InMemoryMetrics.java           |   10 +-
 .../metrics/sink/NamedAggregatorsTest.java      |   25 +-
 .../coders/BeamSparkRunnerRegistratorTest.java  |   57 -
 .../runners/spark/coders/WritableCoderTest.java |   45 -
 .../beam/runners/spark/io/AvroPipelineTest.java |    6 +-
 .../beam/runners/spark/io/NumShardsTest.java    |    6 +-
 .../io/hadoop/HadoopFileFormatPipelineTest.java |    8 +-
 .../spark/metrics/SparkBeamMetricTest.java      |   60 +
 .../spark/translation/StorageLevelTest.java     |    8 +-
 .../translation/streaming/CreateStreamTest.java |  376 ++
 .../streaming/EmptyStreamAssertionTest.java     |   87 -
 .../streaming/FlattenStreamingTest.java         |  103 -
 .../streaming/KafkaStreamingTest.java           |  215 -
 .../ResumeFromCheckpointStreamingTest.java      |  308 +-
 .../streaming/SimpleStreamingWordCountTest.java |   84 -
 .../streaming/TrackStreamingSourcesTest.java    |  171 +
 .../utils/KafkaWriteOnBatchCompleted.java       |  105 -
 .../streaming/utils/PAssertStreaming.java       |  121 -
 .../utils/SparkTestPipelineOptions.java         |   42 -
 .../SparkTestPipelineOptionsForStreaming.java   |   37 -
 .../spark/src/test/resources/log4j.properties   |   30 +
 .../spark/src/test/resources/metrics.properties |   10 +-
 sdks/common/fn-api/pom.xml                      |  109 +
 .../fn-api/src/main/proto/beam_fn_api.proto     |  771 +++
 .../org/apache/beam/fn/v1/standard_coders.yaml  |  195 +
 sdks/common/pom.xml                             |   39 +
 sdks/common/runner-api/pom.xml                  |   89 +
 .../src/main/proto/beam_runner_api.proto        |  711 +++
 sdks/java/build-tools/pom.xml                   |    2 +-
 .../src/main/resources/beam/findbugs-filter.xml |   95 +-
 sdks/java/core/pom.xml                          |   28 +-
 .../main/java/org/apache/beam/sdk/Pipeline.java |   75 +-
 .../beam/sdk/annotations/Experimental.java      |    5 +-
 .../org/apache/beam/sdk/coders/AtomicCoder.java |    2 +-
 .../org/apache/beam/sdk/coders/AvroCoder.java   |   30 +-
 .../apache/beam/sdk/coders/BigDecimalCoder.java |    6 +-
 .../beam/sdk/coders/BigEndianIntegerCoder.java  |    7 +
 .../beam/sdk/coders/BigEndianLongCoder.java     |    7 +
 .../apache/beam/sdk/coders/ByteArrayCoder.java  |    7 +
 .../org/apache/beam/sdk/coders/ByteCoder.java   |    7 +
 .../apache/beam/sdk/coders/ByteStringCoder.java |    8 +
 .../java/org/apache/beam/sdk/coders/Coder.java  |    7 +
 .../apache/beam/sdk/coders/CollectionCoder.java |   12 +-
 .../org/apache/beam/sdk/coders/CustomCoder.java |   18 +-
 .../apache/beam/sdk/coders/DelegateCoder.java   |   29 +-
 .../org/apache/beam/sdk/coders/DoubleCoder.java |    7 +
 .../apache/beam/sdk/coders/DurationCoder.java   |    8 +
 .../apache/beam/sdk/coders/InstantCoder.java    |    7 +
 .../apache/beam/sdk/coders/IterableCoder.java   |   12 +-
 .../beam/sdk/coders/IterableLikeCoder.java      |    6 +-
 .../org/apache/beam/sdk/coders/JAXBCoder.java   |   48 +-
 .../org/apache/beam/sdk/coders/KvCoder.java     |   35 +-
 .../beam/sdk/coders/LengthPrefixCoder.java      |  145 +
 .../org/apache/beam/sdk/coders/ListCoder.java   |    7 +
 .../org/apache/beam/sdk/coders/MapCoder.java    |   62 +-
 .../apache/beam/sdk/coders/NullableCoder.java   |    6 +
 .../beam/sdk/coders/SerializableCoder.java      |   17 +-
 .../org/apache/beam/sdk/coders/SetCoder.java    |   12 +-
 .../apache/beam/sdk/coders/StandardCoder.java   |   42 +-
 .../beam/sdk/coders/StringDelegateCoder.java    |   16 +-
 .../apache/beam/sdk/coders/StringUtf8Coder.java |   18 +-
 .../beam/sdk/coders/TableRowJsonCoder.java      |    7 +
 .../beam/sdk/coders/TextualIntegerCoder.java    |    8 +
 .../org/apache/beam/sdk/coders/VarIntCoder.java |   10 +-
 .../apache/beam/sdk/coders/VarLongCoder.java    |    7 +
 .../org/apache/beam/sdk/coders/VoidCoder.java   |    7 +
 .../beam/sdk/coders/protobuf/ProtoCoder.java    |    8 +-
 .../java/org/apache/beam/sdk/io/AvroIO.java     |    4 +-
 .../java/org/apache/beam/sdk/io/AvroSource.java |    5 -
 .../sdk/io/BoundedReadFromUnboundedSource.java  |   79 +-
 .../org/apache/beam/sdk/io/BoundedSource.java   |    8 -
 .../apache/beam/sdk/io/CompressedSource.java    |   40 +-
 .../org/apache/beam/sdk/io/CountingSource.java  |    5 -
 .../org/apache/beam/sdk/io/FileBasedSink.java   |   22 +
 .../java/org/apache/beam/sdk/io/FileSystem.java |  115 +-
 .../org/apache/beam/sdk/io/FileSystems.java     |   88 +-
 .../org/apache/beam/sdk/io/LocalFileSystem.java |  195 +-
 .../org/apache/beam/sdk/io/LocalResourceId.java |  136 +
 .../java/org/apache/beam/sdk/io/PubsubIO.java   | 1142 ++---
 .../apache/beam/sdk/io/PubsubUnboundedSink.java |   88 +-
 .../beam/sdk/io/PubsubUnboundedSource.java      |  104 +-
 .../main/java/org/apache/beam/sdk/io/Read.java  |    7 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     |  401 +-
 .../main/java/org/apache/beam/sdk/io/Write.java |  706 +--
 .../java/org/apache/beam/sdk/io/XmlSource.java  |    5 -
 .../apache/beam/sdk/io/fs/CreateOptions.java    |   60 +
 .../org/apache/beam/sdk/io/fs/MatchResult.java  |  125 +
 .../apache/beam/sdk/io/fs/ResolveOptions.java   |   41 +
 .../org/apache/beam/sdk/io/fs/ResourceId.java   |   85 +
 .../org/apache/beam/sdk/io/fs/package-info.java |   22 +
 .../beam/sdk/metrics/DistributionData.java      |    3 +-
 .../org/apache/beam/sdk/metrics/MetricKey.java  |    3 +-
 .../apache/beam/sdk/metrics/MetricUpdates.java  |    3 +-
 .../org/apache/beam/sdk/options/GcpOptions.java |   25 +-
 .../org/apache/beam/sdk/options/GcsOptions.java |    4 +-
 .../beam/sdk/options/PipelineOptions.java       |    2 +-
 .../sdk/options/PipelineOptionsFactory.java     |   10 +-
 .../apache/beam/sdk/options/ValueProvider.java  |    6 +-
 .../beam/sdk/runners/PTransformMatcher.java     |   32 +
 .../sdk/runners/PTransformOverrideFactory.java  |   31 +
 .../apache/beam/sdk/runners/PipelineRunner.java |   14 -
 .../beam/sdk/runners/TransformHierarchy.java    |  285 +-
 .../apache/beam/sdk/testing/Annotations.java    |   72 +
 .../beam/sdk/testing/CoderProperties.java       |   85 +-
 .../testing/FlattenWithHeterogeneousCoders.java |   29 +
 .../org/apache/beam/sdk/testing/PAssert.java    |  259 +-
 .../apache/beam/sdk/testing/RegexMatcher.java   |   49 +
 .../beam/sdk/testing/RunnableOnService.java     |   14 +-
 .../beam/sdk/testing/SourceTestUtils.java       |    5 -
 .../apache/beam/sdk/testing/TestPipeline.java   |  111 +-
 .../org/apache/beam/sdk/testing/TestStream.java |   18 +-
 .../beam/sdk/testing/UsesAttemptedMetrics.java  |   28 +
 .../beam/sdk/testing/UsesCommittedMetrics.java  |   28 +
 .../apache/beam/sdk/testing/UsesMapState.java   |   25 +
 .../apache/beam/sdk/testing/UsesMetrics.java    |   24 -
 .../apache/beam/sdk/testing/UsesSetState.java   |   25 +
 .../apache/beam/sdk/testing/UsesTestStream.java |   24 +
 .../sdk/testing/UsesUnboundedPCollections.java  |   23 +
 .../beam/sdk/testing/ValueInSingleWindow.java   |    6 +-
 .../apache/beam/sdk/transforms/Aggregator.java  |   19 -
 .../sdk/transforms/AggregatorRetriever.java     |   13 +-
 .../beam/sdk/transforms/AppliedPTransform.java  |   31 +-
 .../org/apache/beam/sdk/transforms/Combine.java |  204 +-
 .../apache/beam/sdk/transforms/CombineFns.java  |   14 +-
 .../org/apache/beam/sdk/transforms/Count.java   |   28 +-
 .../org/apache/beam/sdk/transforms/Create.java  |   88 +-
 .../sdk/transforms/DelegatingAggregator.java    |    2 +-
 .../org/apache/beam/sdk/transforms/DoFn.java    |   77 +-
 .../beam/sdk/transforms/DoFnAdapters.java       |  504 --
 .../apache/beam/sdk/transforms/DoFnTester.java  |   77 +-
 .../org/apache/beam/sdk/transforms/Flatten.java |   15 +-
 .../apache/beam/sdk/transforms/GroupByKey.java  |   10 +-
 .../org/apache/beam/sdk/transforms/Latest.java  |   80 +-
 .../org/apache/beam/sdk/transforms/Max.java     |  124 +-
 .../org/apache/beam/sdk/transforms/Mean.java    |   27 +-
 .../org/apache/beam/sdk/transforms/Min.java     |  122 +-
 .../org/apache/beam/sdk/transforms/OldDoFn.java |  758 ---
 .../apache/beam/sdk/transforms/PTransform.java  |    9 +-
 .../org/apache/beam/sdk/transforms/ParDo.java   |   55 +-
 .../org/apache/beam/sdk/transforms/Regex.java   |  589 ++-
 .../org/apache/beam/sdk/transforms/Sample.java  |  121 +-
 .../beam/sdk/transforms/SimpleFunction.java     |   44 +-
 .../org/apache/beam/sdk/transforms/Sum.java     |   57 +-
 .../apache/beam/sdk/transforms/ToString.java    |  181 +
 .../org/apache/beam/sdk/transforms/Top.java     |   27 +-
 .../org/apache/beam/sdk/transforms/View.java    |   10 +
 .../sdk/transforms/display/DisplayData.java     |    6 +-
 .../beam/sdk/transforms/join/CoGbkResult.java   |   35 +-
 .../transforms/join/KeyedPCollectionTuple.java  |   41 +-
 .../reflect/ByteBuddyDoFnInvokerFactory.java    |   16 +-
 .../reflect/ByteBuddyOnTimerInvokerFactory.java |    4 +-
 .../sdk/transforms/reflect/DoFnInvoker.java     |   42 +-
 .../sdk/transforms/reflect/DoFnInvokers.java    |  142 +-
 .../sdk/transforms/reflect/DoFnSignature.java   |   71 +-
 .../sdk/transforms/reflect/DoFnSignatures.java  |   82 +-
 .../transforms/splittabledofn/OffsetRange.java  |   71 +
 .../splittabledofn/OffsetRangeTracker.java      |   75 +
 .../splittabledofn/RestrictionTracker.java      |    2 +-
 .../beam/sdk/transforms/windowing/AfterAll.java |    7 +
 .../windowing/AfterDelayFromFirstElement.java   |  251 -
 .../sdk/transforms/windowing/AfterEach.java     |    7 +
 .../sdk/transforms/windowing/AfterFirst.java    |    7 +
 .../sdk/transforms/windowing/AfterPane.java     |   10 -
 .../windowing/AfterProcessingTime.java          |  105 +-
 .../AfterSynchronizedProcessingTime.java        |   31 +-
 .../sdk/transforms/windowing/BoundedWindow.java |   31 +
 .../sdk/transforms/windowing/GlobalWindow.java  |    6 +
 .../sdk/transforms/windowing/GlobalWindows.java |   10 +
 .../transforms/windowing/IntervalWindow.java    |   19 +-
 .../sdk/transforms/windowing/OutputTimeFns.java |   45 +
 .../beam/sdk/transforms/windowing/PaneInfo.java |    2 -
 .../windowing/TimestampTransform.java           |   65 +
 .../beam/sdk/transforms/windowing/Triggers.java |  320 ++
 .../beam/sdk/transforms/windowing/Window.java   |   96 +-
 .../org/apache/beam/sdk/util/ApiSurface.java    |  446 +-
 .../beam/sdk/util/BaseExecutionContext.java     |  174 -
 .../org/apache/beam/sdk/util/CoderUtils.java    |   30 +-
 .../beam/sdk/util/CombineContextFactory.java    |   18 -
 .../org/apache/beam/sdk/util/DefaultBucket.java |  105 +
 .../util/EmptyOnDeserializationThreadLocal.java |   39 +
 .../apache/beam/sdk/util/ExecutionContext.java  |  100 -
 .../apache/beam/sdk/util/GcpProjectUtil.java    |    2 +-
 .../apache/beam/sdk/util/GcsPathValidator.java  |    3 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  334 +-
 .../org/apache/beam/sdk/util/NameUtils.java     |  167 +
 .../org/apache/beam/sdk/util/PropertyNames.java |    1 +
 .../org/apache/beam/sdk/util/PubsubClient.java  |   28 +-
 .../apache/beam/sdk/util/PubsubGrpcClient.java  |    6 +-
 .../apache/beam/sdk/util/PubsubJsonClient.java  |    4 +-
 .../apache/beam/sdk/util/PubsubTestClient.java  |    6 +-
 .../org/apache/beam/sdk/util/StringUtils.java   |  100 -
 .../java/org/apache/beam/sdk/util/Timer.java    |   11 +
 .../apache/beam/sdk/util/TimerInternals.java    |  273 --
 .../org/apache/beam/sdk/util/WindowedValue.java |   23 +-
 .../beam/sdk/util/WindowingInternals.java       |   82 -
 .../beam/sdk/util/WindowingStrategies.java      |  266 ++
 .../apache/beam/sdk/util/WindowingStrategy.java |   53 +-
 .../beam/sdk/util/common/ReflectHelpers.java    |   16 +-
 .../sdk/util/state/InMemoryStateInternals.java  |  430 --
 .../sdk/util/state/InMemoryTimerInternals.java  |  275 --
 .../apache/beam/sdk/util/state/MapState.java    |   93 +
 .../sdk/util/state/MergingStateAccessor.java    |   40 -
 .../beam/sdk/util/state/ReadableState.java      |    4 +-
 .../apache/beam/sdk/util/state/SetState.java    |   71 +
 .../beam/sdk/util/state/StateAccessor.java      |   37 -
 .../apache/beam/sdk/util/state/StateBinder.java |    6 +
 .../beam/sdk/util/state/StateContexts.java      |   63 -
 .../beam/sdk/util/state/StateInternals.java     |   57 -
 .../sdk/util/state/StateInternalsFactory.java   |   35 -
 .../beam/sdk/util/state/StateMerging.java       |  259 --
 .../beam/sdk/util/state/StateNamespace.java     |   56 -
 .../sdk/util/state/StateNamespaceForTest.java   |   65 -
 .../beam/sdk/util/state/StateNamespaces.java    |  278 --
 .../apache/beam/sdk/util/state/StateSpecs.java  |  155 +-
 .../apache/beam/sdk/util/state/StateTable.java  |   82 -
 .../apache/beam/sdk/util/state/StateTag.java    |  111 -
 .../apache/beam/sdk/util/state/StateTags.java   |  290 --
 .../util/state/TestInMemoryStateInternals.java  |   61 -
 .../sdk/util/state/TimerInternalsFactory.java   |   36 -
 .../java/org/apache/beam/sdk/values/PBegin.java |    9 +-
 .../apache/beam/sdk/values/PCollectionList.java |   62 +-
 .../beam/sdk/values/PCollectionTuple.java       |   33 +-
 .../java/org/apache/beam/sdk/values/PDone.java  |    4 +-
 .../java/org/apache/beam/sdk/values/PInput.java |   13 +-
 .../org/apache/beam/sdk/values/POutput.java     |   24 +-
 .../beam/sdk/values/POutputValueBase.java       |    4 +-
 .../java/org/apache/beam/sdk/values/PValue.java |   24 +-
 .../org/apache/beam/sdk/values/PValueBase.java  |   18 +-
 .../apache/beam/sdk/values/TaggedPValue.java    |   42 +
 .../beam/sdk/values/TimestampedValue.java       |   10 +-
 .../apache/beam/sdk/values/TupleTagList.java    |    6 +
 .../org/apache/beam/sdk/values/TypedPValue.java |   78 +-
 .../org/apache/beam/SdkCoreApiSurfaceTest.java  |   62 +
 .../sdk/AggregatorPipelineExtractorTest.java    |   16 +-
 .../java/org/apache/beam/sdk/PipelineTest.java  |   37 +-
 .../apache/beam/sdk/coders/AvroCoderTest.java   |   18 +-
 .../beam/sdk/coders/BigDecimalCoderTest.java    |   46 +-
 .../sdk/coders/BigEndianIntegerCoderTest.java   |    9 +
 .../beam/sdk/coders/BigEndianLongCoderTest.java |    9 +
 .../beam/sdk/coders/ByteArrayCoderTest.java     |    6 +
 .../apache/beam/sdk/coders/ByteCoderTest.java   |    9 +
 .../beam/sdk/coders/ByteStringCoderTest.java    |    8 +
 .../beam/sdk/coders/CoderRegistryTest.java      |   12 +-
 .../org/apache/beam/sdk/coders/CoderTest.java   |    8 +
 .../beam/sdk/coders/CollectionCoderTest.java    |   16 +
 .../apache/beam/sdk/coders/CommonCoderTest.java |  351 ++
 .../beam/sdk/coders/DefaultCoderTest.java       |    4 +-
 .../beam/sdk/coders/DelegateCoderTest.java      |   35 +-
 .../apache/beam/sdk/coders/DoubleCoderTest.java |    9 +
 .../beam/sdk/coders/DurationCoderTest.java      |   10 +
 .../beam/sdk/coders/InstantCoderTest.java       |    9 +
 .../beam/sdk/coders/IterableCoderTest.java      |   27 +-
 .../apache/beam/sdk/coders/JAXBCoderTest.java   |   26 +-
 .../org/apache/beam/sdk/coders/KvCoderTest.java |   29 +
 .../beam/sdk/coders/LengthPrefixCoderTest.java  |  129 +
 .../apache/beam/sdk/coders/ListCoderTest.java   |   16 +-
 .../apache/beam/sdk/coders/MapCoderTest.java    |   21 +-
 .../beam/sdk/coders/NullableCoderTest.java      |   12 +
 .../beam/sdk/coders/SerializableCoderTest.java  |   16 +-
 .../apache/beam/sdk/coders/SetCoderTest.java    |   16 +
 .../beam/sdk/coders/StandardCoderTest.java      |   40 +
 .../sdk/coders/StringDelegateCoderTest.java     |   11 +
 .../beam/sdk/coders/StringUtf8CoderTest.java    |    9 +
 .../beam/sdk/coders/TableRowJsonCoderTest.java  |    9 +
 .../sdk/coders/TextualIntegerCoderTest.java     |    9 +
 .../apache/beam/sdk/coders/VarIntCoderTest.java |    9 +
 .../beam/sdk/coders/VarLongCoderTest.java       |    9 +
 .../apache/beam/sdk/coders/VoidCoderTest.java   |   40 +
 .../beam/sdk/io/AvroIOGeneratedClassTest.java   |  285 --
 .../java/org/apache/beam/sdk/io/AvroIOTest.java |   18 +-
 .../apache/beam/sdk/io/AvroIOTransformTest.java |  324 ++
 .../io/BoundedReadFromUnboundedSourceTest.java  |    6 +-
 .../beam/sdk/io/CompressedSourceTest.java       |   89 +-
 .../apache/beam/sdk/io/CountingInputTest.java   |   12 +-
 .../apache/beam/sdk/io/CountingSourceTest.java  |   13 +-
 .../apache/beam/sdk/io/FileBasedSinkTest.java   |   17 +-
 .../apache/beam/sdk/io/FileBasedSourceTest.java |    9 +-
 .../org/apache/beam/sdk/io/FileSystemsTest.java |   61 +-
 .../apache/beam/sdk/io/LocalFileSystemTest.java |  318 ++
 .../apache/beam/sdk/io/LocalResourceIdTest.java |  226 +
 .../beam/sdk/io/OffsetBasedSourceTest.java      |    5 -
 .../org/apache/beam/sdk/io/PubsubIOTest.java    |   86 +-
 .../beam/sdk/io/PubsubUnboundedSinkTest.java    |   43 +-
 .../beam/sdk/io/PubsubUnboundedSourceTest.java  |   22 +-
 .../java/org/apache/beam/sdk/io/ReadTest.java   |    5 -
 .../java/org/apache/beam/sdk/io/TextIOTest.java |  222 +-
 .../java/org/apache/beam/sdk/io/WriteTest.java  |  158 +-
 .../org/apache/beam/sdk/io/XmlSourceTest.java   |   10 +-
 .../apache/beam/sdk/metrics/MetricMatchers.java |  192 +-
 .../apache/beam/sdk/metrics/MetricsTest.java    |  124 +-
 .../apache/beam/sdk/options/GcpOptionsTest.java |   32 +-
 .../sdk/options/PipelineOptionsFactoryTest.java |    6 +-
 .../beam/sdk/options/PipelineOptionsTest.java   |    3 +-
 .../sdk/options/ProxyInvocationHandlerTest.java |    5 +-
 .../beam/sdk/options/ValueProviderTest.java     |   36 +-
 .../sdk/options/ValueProviderUtilsTest.java     |    2 +-
 .../sdk/runners/TransformHierarchyTest.java     |  320 +-
 .../beam/sdk/runners/TransformTreeTest.java     |   34 +-
 .../beam/sdk/testing/GatherAllPanesTest.java    |    7 +-
 .../apache/beam/sdk/testing/PAssertTest.java    |  118 +-
 .../beam/sdk/testing/TestPipelineTest.java      |  504 +-
 .../apache/beam/sdk/testing/TestStreamTest.java |   45 +-
 .../testing/ValueInSingleWindowCoderTest.java   |    7 +
 .../transforms/ApproximateQuantilesTest.java    |   12 +-
 .../sdk/transforms/ApproximateUniqueTest.java   |  487 +-
 .../beam/sdk/transforms/CombineFnsTest.java     |   25 +-
 .../apache/beam/sdk/transforms/CombineTest.java |  174 +-
 .../apache/beam/sdk/transforms/CountTest.java   |   15 +-
 .../apache/beam/sdk/transforms/CreateTest.java  |  119 +-
 .../beam/sdk/transforms/DistinctTest.java       |   12 +-
 .../DoFnDelegatingAggregatorTest.java           |  142 -
 .../apache/beam/sdk/transforms/DoFnTest.java    |   19 +-
 .../beam/sdk/transforms/DoFnTesterTest.java     |   12 +-
 .../apache/beam/sdk/transforms/FilterTest.java  |   18 +-
 .../sdk/transforms/FlatMapElementsTest.java     |   10 +-
 .../apache/beam/sdk/transforms/FlattenTest.java |   74 +-
 .../beam/sdk/transforms/GroupByKeyTest.java     |   32 +-
 .../apache/beam/sdk/transforms/KeysTest.java    |    9 +-
 .../apache/beam/sdk/transforms/KvSwapTest.java  |   22 +-
 .../apache/beam/sdk/transforms/LatestTest.java  |   23 +-
 .../beam/sdk/transforms/MapElementsTest.java    |   14 +-
 .../org/apache/beam/sdk/transforms/MaxTest.java |   20 +-
 .../apache/beam/sdk/transforms/MeanTest.java    |    7 +-
 .../org/apache/beam/sdk/transforms/MinTest.java |   21 +-
 .../apache/beam/sdk/transforms/NoOpOldDoFn.java |   71 -
 .../beam/sdk/transforms/OldDoFnContextTest.java |   69 -
 .../apache/beam/sdk/transforms/OldDoFnTest.java |  188 -
 .../beam/sdk/transforms/ParDoLifecycleTest.java |   17 +-
 .../apache/beam/sdk/transforms/ParDoTest.java   |  704 ++-
 .../beam/sdk/transforms/PartitionTest.java      |    8 +-
 .../apache/beam/sdk/transforms/RegexTest.java   |  148 +-
 .../apache/beam/sdk/transforms/SampleTest.java  |  419 +-
 .../beam/sdk/transforms/SimpleFunctionTest.java |   43 +
 .../beam/sdk/transforms/SimpleStatsFnsTest.java |   36 +-
 .../beam/sdk/transforms/SplittableDoFnTest.java |   83 +-
 .../org/apache/beam/sdk/transforms/SumTest.java |   24 +-
 .../beam/sdk/transforms/ToStringTest.java       |  125 +
 .../org/apache/beam/sdk/transforms/TopTest.java |   33 +-
 .../apache/beam/sdk/transforms/ValuesTest.java  |    7 +-
 .../apache/beam/sdk/transforms/ViewTest.java    |  108 +-
 .../beam/sdk/transforms/WithKeysTest.java       |    8 +-
 .../beam/sdk/transforms/WithTimestampsTest.java |    9 +-
 .../display/DisplayDataEvaluator.java           |    8 +-
 .../sdk/transforms/display/DisplayDataTest.java |   15 +
 .../transforms/join/CoGbkResultCoderTest.java   |   10 +-
 .../sdk/transforms/join/CoGroupByKeyTest.java   |   11 +-
 .../sdk/transforms/join/UnionCoderTest.java     |   17 +-
 .../transforms/reflect/DoFnInvokersTest.java    |   70 +-
 .../DoFnSignaturesProcessElementTest.java       |   40 +-
 .../DoFnSignaturesSplittableDoFnTest.java       |    3 +-
 .../transforms/reflect/DoFnSignaturesTest.java  |    6 +-
 .../splittabledofn/OffsetRangeTrackerTest.java  |  111 +
 .../windowing/AfterProcessingTimeTest.java      |    2 +-
 .../AfterSynchronizedProcessingTimeTest.java    |    2 +-
 .../transforms/windowing/GlobalWindowTest.java  |   64 +
 .../transforms/windowing/OutputTimeFnsTest.java |   51 +
 .../sdk/transforms/windowing/TriggersTest.java  |  100 +
 .../sdk/transforms/windowing/WindowTest.java    |  204 +-
 .../sdk/transforms/windowing/WindowingTest.java |   15 +-
 .../apache/beam/sdk/util/ApiSurfaceTest.java    |  152 +-
 .../apache/beam/sdk/util/CombineFnUtilTest.java |    8 +-
 .../apache/beam/sdk/util/DefaultBucketTest.java |  112 +
 .../beam/sdk/util/FileIOChannelFactoryTest.java |   13 +-
 .../beam/sdk/util/GcsPathValidatorTest.java     |   17 +-
 .../org/apache/beam/sdk/util/GcsUtilTest.java   |   86 +-
 .../org/apache/beam/sdk/util/NameUtilsTest.java |  177 +
 .../beam/sdk/util/PubsubGrpcClientTest.java     |    8 +-
 .../beam/sdk/util/PubsubJsonClientTest.java     |    3 +-
 .../beam/sdk/util/PubsubTestClientTest.java     |    4 +-
 .../org/apache/beam/sdk/util/ReshuffleTest.java |   11 +-
 .../beam/sdk/util/SerializableUtilsTest.java    |    4 +-
 .../apache/beam/sdk/util/StringUtilsTest.java   |  100 -
 .../beam/sdk/util/TimerInternalsTest.java       |  101 -
 .../beam/sdk/util/ValueWithRecordIdTest.java    |   34 +
 .../apache/beam/sdk/util/WindowedValueTest.java |   23 +
 .../beam/sdk/util/WindowingStrategiesTest.java  |   91 +
 .../util/state/InMemoryStateInternalsTest.java  |  348 --
 .../util/state/InMemoryTimerInternalsTest.java  |  153 -
 .../sdk/util/state/StateNamespacesTest.java     |  130 -
 .../beam/sdk/util/state/StateTagTest.java       |  173 -
 .../beam/sdk/values/PCollectionListTest.java    |  117 +
 .../beam/sdk/values/PCollectionTupleTest.java   |   80 +-
 .../org/apache/beam/sdk/values/PDoneTest.java   |    9 +-
 .../beam/sdk/values/TimestampedValueTest.java   |   19 +-
 .../apache/beam/sdk/values/TypedPValueTest.java |   17 +-
 sdks/java/extensions/jackson/pom.xml            |  125 +
 .../beam/sdk/extensions/jackson/AsJsons.java    |   76 +
 .../beam/sdk/extensions/jackson/ParseJsons.java |   75 +
 .../sdk/extensions/jackson/package-info.java    |   22 +
 .../jackson/JacksonTransformsTest.java          |  242 +
 sdks/java/extensions/join-library/README.md     |   10 -
 sdks/java/extensions/join-library/pom.xml       |    3 +-
 .../extensions/joinlibrary/InnerJoinTest.java   |   23 +-
 .../joinlibrary/OuterLeftJoinTest.java          |   31 +-
 .../joinlibrary/OuterRightJoinTest.java         |   31 +-
 sdks/java/extensions/pom.xml                    |    3 +-
 sdks/java/extensions/sorter/pom.xml             |    5 +-
 .../sorter/BufferedExternalSorter.java          |   23 +-
 .../sorter/BufferedExternalSorterTest.java      |   46 +-
 .../sdk/extensions/sorter/SortValuesTest.java   |    9 +-
 sdks/java/harness/pom.xml                       |  177 +
 .../org/apache/beam/fn/harness/FnHarness.java   |  131 +
 .../harness/channel/ManagedChannelFactory.java  |   80 +
 .../harness/channel/SocketAddressFactory.java   |   64 +
 .../beam/fn/harness/channel/package-info.java   |   22 +
 .../fn/harness/control/BeamFnControlClient.java |  166 +
 .../harness/control/ProcessBundleHandler.java   |  334 ++
 .../fn/harness/control/RegisterHandler.java     |   92 +
 .../beam/fn/harness/control/package-info.java   |   22 +
 .../BeamFnDataBufferingOutboundObserver.java    |  135 +
 .../beam/fn/harness/data/BeamFnDataClient.java  |   64 +
 .../fn/harness/data/BeamFnDataGrpcClient.java   |  122 +
 .../harness/data/BeamFnDataGrpcMultiplexer.java |  141 +
 .../harness/data/BeamFnDataInboundObserver.java |   81 +
 .../beam/fn/harness/data/package-info.java      |   22 +
 .../fn/harness/fake/FakeAggregatorFactory.java  |   52 +
 .../beam/fn/harness/fake/FakeStepContext.java   |   70 +
 .../beam/fn/harness/fake/package-info.java      |   22 +
 .../harness/fn/CloseableThrowingConsumer.java   |   23 +
 .../beam/fn/harness/fn/ThrowingBiFunction.java  |   32 +
 .../beam/fn/harness/fn/ThrowingConsumer.java    |   32 +
 .../beam/fn/harness/fn/ThrowingFunction.java    |   32 +
 .../beam/fn/harness/fn/ThrowingRunnable.java    |   30 +
 .../apache/beam/fn/harness/fn/package-info.java |   22 +
 .../fn/harness/logging/BeamFnLoggingClient.java |  310 ++
 .../beam/fn/harness/logging/package-info.java   |   22 +
 .../apache/beam/fn/harness/package-info.java    |   22 +
 .../beam/fn/harness/stream/AdvancingPhaser.java |   36 +
 .../harness/stream/BufferingStreamObserver.java |  166 +
 .../fn/harness/stream/DirectStreamObserver.java |   71 +
 .../ForwardingClientResponseObserver.java       |   63 +
 .../harness/stream/StreamObserverFactory.java   |   91 +
 .../beam/fn/harness/stream/package-info.java    |   22 +
 .../beam/runners/core/BeamFnDataReadRunner.java |  104 +
 .../runners/core/BeamFnDataWriteRunner.java     |   87 +
 .../beam/runners/core/BoundedSourceRunner.java  |  105 +
 .../apache/beam/runners/core/package-info.java  |   22 +
 .../apache/beam/fn/harness/FnHarnessTest.java   |  130 +
 .../channel/ManagedChannelFactoryTest.java      |   74 +
 .../channel/SocketAddressFactoryTest.java       |   56 +
 .../control/BeamFnControlClientTest.java        |  182 +
 .../control/ProcessBundleHandlerTest.java       |  675 +++
 .../fn/harness/control/RegisterHandlerTest.java |   80 +
 ...BeamFnDataBufferingOutboundObserverTest.java |  147 +
 .../harness/data/BeamFnDataGrpcClientTest.java  |  318 ++
 .../data/BeamFnDataGrpcMultiplexerTest.java     |   98 +
 .../data/BeamFnDataInboundObserverTest.java     |  116 +
 .../logging/BeamFnLoggingClientTest.java        |  169 +
 .../fn/harness/stream/AdvancingPhaserTest.java  |   48 +
 .../stream/BufferingStreamObserverTest.java     |  146 +
 .../stream/DirectStreamObserverTest.java        |  139 +
 .../ForwardingClientResponseObserverTest.java   |   60 +
 .../stream/StreamObserverFactoryTest.java       |   84 +
 .../beam/fn/harness/test/TestExecutors.java     |   85 +
 .../beam/fn/harness/test/TestExecutorsTest.java |  160 +
 .../beam/fn/harness/test/TestStreams.java       |  162 +
 .../beam/fn/harness/test/TestStreamsTest.java   |   84 +
 .../runners/core/BeamFnDataReadRunnerTest.java  |  187 +
 .../runners/core/BeamFnDataWriteRunnerTest.java |  155 +
 .../runners/core/BoundedSourceRunnerTest.java   |  113 +
 sdks/java/io/elasticsearch/pom.xml              |  149 +
 .../sdk/io/elasticsearch/ElasticsearchIO.java   |  815 ++++
 .../beam/sdk/io/elasticsearch/package-info.java |   20 +
 .../src/test/contrib/create_elk_container.sh    |   24 +
 .../elasticsearch/ElasticSearchIOTestUtils.java |  129 +
 .../sdk/io/elasticsearch/ElasticsearchIOIT.java |  154 +
 .../io/elasticsearch/ElasticsearchIOTest.java   |  358 ++
 .../elasticsearch/ElasticsearchTestDataSet.java |  109 +
 .../elasticsearch/ElasticsearchTestOptions.java |   46 +
 sdks/java/io/google-cloud-platform/pom.xml      |   16 +-
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  450 +-
 .../sdk/io/gcp/bigquery/BigQueryServices.java   |   23 +-
 .../io/gcp/bigquery/BigQueryServicesImpl.java   |  118 +-
 .../gcp/bigquery/BigQueryTableRowIterator.java  |   82 +-
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java    |    8 -
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 -
 .../beam/sdk/io/gcp/datastore/DatastoreV1.java  |  149 +-
 .../beam/sdk/io/gcp/storage/GcsFileSystem.java  |  210 +-
 .../io/gcp/storage/GcsFileSystemRegistrar.java  |    9 +-
 .../beam/sdk/io/gcp/storage/GcsResourceId.java  |  116 +
 .../apache/beam/sdk/io/gcp/ApiSurfaceTest.java  |  134 -
 .../beam/sdk/io/gcp/GcpApiSurfaceTest.java      |   79 +
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |  335 +-
 .../gcp/bigquery/BigQueryServicesImplTest.java  |  141 +
 .../bigquery/BigQueryTableRowIteratorTest.java  |   51 +-
 .../sdk/io/gcp/bigquery/BigQueryUtilTest.java   |    3 +-
 .../sdk/io/gcp/bigtable/BigtableIOTest.java     |   24 +-
 .../io/gcp/bigtable/BigtableTestOptions.java    |   37 +
 .../sdk/io/gcp/datastore/DatastoreV1Test.java   |   15 +-
 .../sdk/io/gcp/datastore/SplitQueryFnIT.java    |    2 +-
 .../sdk/io/gcp/storage/GcsFileSystemTest.java   |  274 ++
 .../sdk/io/gcp/storage/GcsResourceIdTest.java   |  147 +
 sdks/java/io/hadoop-common/pom.xml              |   86 +
 .../io/hadoop/SerializableConfiguration.java    |   96 +
 .../beam/sdk/io/hadoop/WritableCoder.java       |  116 +
 .../apache/beam/sdk/io/hadoop/package-info.java |   22 +
 .../hadoop/SerializableConfigurationTest.java   |   75 +
 .../beam/sdk/io/hadoop/WritableCoderTest.java   |   45 +
 sdks/java/io/hadoop-input-format/README.md      |  167 +
 sdks/java/io/hadoop-input-format/pom.xml        |  136 +
 .../hadoop/inputformat/HadoopInputFormatIO.java |  941 ++++
 .../sdk/io/hadoop/inputformat/package-info.java |   23 +
 .../ConfigurableEmployeeInputFormat.java        |  131 +
 .../sdk/io/hadoop/inputformat/Employee.java     |   85 +
 .../hadoop/inputformat/EmployeeInputFormat.java |  172 +
 .../inputformat/HadoopInputFormatIOTest.java    |  844 ++++
 .../ReuseObjectsEmployeeInputFormat.java        |  176 +
 .../hadoop/inputformat/TestEmployeeDataSet.java |   76 +
 sdks/java/io/hbase/pom.xml                      |  233 +
 .../org/apache/beam/sdk/io/hbase/HBaseIO.java   |  693 +++
 .../beam/sdk/io/hbase/HBaseMutationCoder.java   |   71 +
 .../beam/sdk/io/hbase/HBaseResultCoder.java     |   55 +
 .../beam/sdk/io/hbase/SerializableScan.java     |   55 +
 .../apache/beam/sdk/io/hbase/package-info.java  |   24 +
 .../apache/beam/sdk/io/hbase/HBaseIOTest.java   |  430 ++
 .../sdk/io/hbase/HBaseMutationCoderTest.java    |   52 +
 .../beam/sdk/io/hbase/HBaseResultCoderTest.java |   41 +
 .../beam/sdk/io/hbase/SerializableScanTest.java |   56 +
 sdks/java/io/hdfs/pom.xml                       |   64 +-
 .../beam/sdk/io/hdfs/AvroHDFSFileSource.java    |  142 -
 .../beam/sdk/io/hdfs/AvroWrapperCoder.java      |  114 -
 .../apache/beam/sdk/io/hdfs/HDFSFileSink.java   |  301 +-
 .../apache/beam/sdk/io/hdfs/HDFSFileSource.java |  528 ++-
 .../beam/sdk/io/hdfs/HadoopFileSystem.java      |   44 +-
 .../beam/sdk/io/hdfs/HadoopResourceId.java      |   42 +
 .../org/apache/beam/sdk/io/hdfs/UGIHelper.java  |   38 +
 .../apache/beam/sdk/io/hdfs/WritableCoder.java  |  116 -
 .../SimpleAuthAvroHDFSFileSource.java           |   82 -
 .../hdfs/simpleauth/SimpleAuthHDFSFileSink.java |  131 -
 .../simpleauth/SimpleAuthHDFSFileSource.java    |  117 -
 .../sdk/io/hdfs/simpleauth/package-info.java    |   22 -
 .../beam/sdk/io/hdfs/AvroWrapperCoderTest.java  |   51 -
 .../beam/sdk/io/hdfs/HDFSFileSinkTest.java      |  173 +
 .../beam/sdk/io/hdfs/HDFSFileSourceTest.java    |   60 +-
 .../beam/sdk/io/hdfs/WritableCoderTest.java     |   45 -
 sdks/java/io/jdbc/pom.xml                       |  125 +-
 sdks/java/io/jdbc/src/test/README.md            |   32 +
 .../org/apache/beam/sdk/io/jdbc/JdbcIOIT.java   |  178 +
 .../org/apache/beam/sdk/io/jdbc/JdbcIOTest.java |  120 +-
 .../beam/sdk/io/jdbc/JdbcTestDataSet.java       |  128 +
 .../beam/sdk/io/jdbc/PostgresTestOptions.java   |   60 +
 .../kubernetes/postgres-pod-no-vol.yml          |   32 +
 .../kubernetes/postgres-service-public.yml      |   28 +
 .../jdbc/src/test/resources/kubernetes/setup.sh |   20 +
 .../src/test/resources/kubernetes/teardown.sh   |   20 +
 sdks/java/io/jms/pom.xml                        |   22 +-
 .../java/org/apache/beam/sdk/io/jms/JmsIO.java  |  103 +-
 .../org/apache/beam/sdk/io/jms/JmsIOTest.java   |  137 +-
 sdks/java/io/kafka/pom.xml                      |   65 +-
 .../apache/beam/sdk/io/kafka/ConsumerSpEL.java  |   60 +
 .../beam/sdk/io/kafka/KafkaCheckpointMark.java  |   16 +
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   |  683 ++-
 .../beam/sdk/io/kafka/KafkaRecordCoder.java     |    4 +-
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   |  109 +-
 .../beam/sdk/io/kafka/KafkaRecordCoderTest.java |   34 +
 sdks/java/io/kinesis/pom.xml                    |   12 +-
 .../beam/sdk/io/kinesis/KinesisRecordCoder.java |    4 +-
 .../beam/sdk/io/kinesis/package-info.java       |    2 +-
 .../sdk/io/kinesis/KinesisMockReadTest.java     |    7 +-
 .../beam/sdk/io/kinesis/KinesisReaderIT.java    |    6 +-
 sdks/java/io/mongodb/pom.xml                    |   20 +-
 .../beam/sdk/io/mongodb/MongoDbGridFSIO.java    |    5 -
 .../apache/beam/sdk/io/mongodb/MongoDbIO.java   |    5 -
 .../sdk/io/mongodb/MongoDBGridFSIOTest.java     |    9 +-
 .../beam/sdk/io/mongodb/MongoDbIOTest.java      |    7 +-
 sdks/java/io/mqtt/pom.xml                       |  122 +
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |  592 +++
 .../apache/beam/sdk/io/mqtt/package-info.java   |   22 +
 .../org/apache/beam/sdk/io/mqtt/MqttIOTest.java |  257 +
 sdks/java/io/pom.xml                            |   76 +-
 sdks/java/java8tests/pom.xml                    |    3 +-
 .../beam/sdk/transforms/CombineJava8Test.java   |    8 +-
 .../beam/sdk/transforms/DistinctJava8Test.java  |    5 +-
 .../beam/sdk/transforms/FilterJava8Test.java    |    9 +-
 .../transforms/FlatMapElementsJava8Test.java    |    7 +-
 .../sdk/transforms/MapElementsJava8Test.java    |   33 +-
 .../beam/sdk/transforms/PartitionJava8Test.java |    7 +-
 .../sdk/transforms/SimpleFunctionJava8Test.java |   69 +
 .../beam/sdk/transforms/WithKeysJava8Test.java  |    6 +-
 .../sdk/transforms/WithTimestampsJava8Test.java |   14 +-
 sdks/java/javadoc/ant.xml                       |   96 +
 sdks/java/javadoc/pom.xml                       |  295 ++
 .../maven-archetypes/examples-java8/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   78 +-
 sdks/java/maven-archetypes/examples/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |   96 +-
 sdks/java/maven-archetypes/pom.xml              |    2 +-
 sdks/java/maven-archetypes/starter/pom.xml      |    2 +-
 .../main/resources/archetype-resources/pom.xml  |    4 +-
 .../resources/projects/basic/reference/pom.xml  |    4 +-
 sdks/java/pom.xml                               |    6 +-
 sdks/pom.xml                                    |   17 +-
 sdks/python/.pylintrc                           |  164 +
 sdks/python/MANIFEST.in                         |   21 +
 sdks/python/README.md                           |  298 ++
 sdks/python/apache_beam/__init__.py             |   82 +
 sdks/python/apache_beam/coders/__init__.py      |   19 +
 sdks/python/apache_beam/coders/coder_impl.pxd   |  143 +
 sdks/python/apache_beam/coders/coder_impl.py    |  734 +++
 sdks/python/apache_beam/coders/coders.py        |  835 ++++
 sdks/python/apache_beam/coders/coders_test.py   |  115 +
 .../apache_beam/coders/coders_test_common.py    |  389 ++
 .../apache_beam/coders/fast_coders_test.py      |   37 +
 sdks/python/apache_beam/coders/observable.py    |   38 +
 .../apache_beam/coders/observable_test.py       |   57 +
 .../coders/proto2_coder_test_messages_pb2.py    |  318 ++
 .../apache_beam/coders/slow_coders_test.py      |   45 +
 sdks/python/apache_beam/coders/slow_stream.py   |  163 +
 .../apache_beam/coders/standard_coders_test.py  |  156 +
 sdks/python/apache_beam/coders/stream.pxd       |   66 +
 sdks/python/apache_beam/coders/stream.pyx       |  226 +
 sdks/python/apache_beam/coders/stream_test.py   |  180 +
 sdks/python/apache_beam/coders/typecoders.py    |  182 +
 .../apache_beam/coders/typecoders_test.py       |  124 +
 sdks/python/apache_beam/error.py                |   42 +
 sdks/python/apache_beam/examples/__init__.py    |   16 +
 .../apache_beam/examples/complete/__init__.py   |   16 +
 .../examples/complete/autocomplete.py           |   90 +
 .../examples/complete/autocomplete_test.py      |   52 +
 .../examples/complete/estimate_pi.py            |  128 +
 .../examples/complete/estimate_pi_test.py       |   52 +
 .../examples/complete/juliaset/__init__.py      |   16 +
 .../complete/juliaset/juliaset/__init__.py      |   16 +
 .../complete/juliaset/juliaset/juliaset.py      |  124 +
 .../complete/juliaset/juliaset/juliaset_test.py |   86 +
 .../examples/complete/juliaset/juliaset_main.py |   58 +
 .../examples/complete/juliaset/setup.py         |  116 +
 .../apache_beam/examples/complete/tfidf.py      |  208 +
 .../apache_beam/examples/complete/tfidf_test.py |   91 +
 .../examples/complete/top_wikipedia_sessions.py |  182 +
 .../complete/top_wikipedia_sessions_test.py     |   62 +
 .../apache_beam/examples/cookbook/__init__.py   |   16 +
 .../examples/cookbook/bigquery_schema.py        |  129 +
 .../examples/cookbook/bigquery_side_input.py    |  121 +
 .../cookbook/bigquery_side_input_test.py        |   54 +
 .../examples/cookbook/bigquery_tornadoes.py     |   99 +
 .../cookbook/bigquery_tornadoes_it_test.py      |   62 +
 .../cookbook/bigquery_tornadoes_test.py         |   45 +
 .../apache_beam/examples/cookbook/bigshuffle.py |   94 +
 .../examples/cookbook/bigshuffle_test.py        |   63 +
 .../apache_beam/examples/cookbook/coders.py     |  101 +
 .../examples/cookbook/coders_test.py            |   49 +
 .../examples/cookbook/combiners_test.py         |   74 +
 .../examples/cookbook/custom_ptransform.py      |  134 +
 .../examples/cookbook/custom_ptransform_test.py |   53 +
 .../examples/cookbook/datastore_wordcount.py    |  261 ++
 .../apache_beam/examples/cookbook/filters.py    |  107 +
 .../examples/cookbook/filters_test.py           |   69 +
 .../examples/cookbook/group_with_coder.py       |  122 +
 .../examples/cookbook/group_with_coder_test.py  |   89 +
 .../examples/cookbook/mergecontacts.py          |  133 +
 .../examples/cookbook/mergecontacts_test.py     |  125 +
 .../examples/cookbook/multiple_output_pardo.py  |  184 +
 .../cookbook/multiple_output_pardo_test.py      |   72 +
 .../apache_beam/examples/snippets/__init__.py   |   16 +
 .../apache_beam/examples/snippets/snippets.py   | 1158 +++++
 .../examples/snippets/snippets_test.py          |  904 ++++
 .../apache_beam/examples/streaming_wordcap.py   |   64 +
 .../apache_beam/examples/streaming_wordcount.py |   74 +
 sdks/python/apache_beam/examples/wordcount.py   |  116 +
 .../apache_beam/examples/wordcount_debugging.py |  163 +
 .../examples/wordcount_debugging_test.py        |   59 +
 .../apache_beam/examples/wordcount_it_test.py   |   59 +
 .../apache_beam/examples/wordcount_minimal.py   |  121 +
 .../examples/wordcount_minimal_test.py          |   59 +
 .../apache_beam/examples/wordcount_test.py      |   58 +
 sdks/python/apache_beam/internal/__init__.py    |   16 +
 .../python/apache_beam/internal/gcp/__init__.py |   16 +
 sdks/python/apache_beam/internal/gcp/auth.py    |  185 +
 .../apache_beam/internal/gcp/auth_test.py       |   44 +
 .../apache_beam/internal/gcp/json_value.py      |  147 +
 .../apache_beam/internal/gcp/json_value_test.py |   93 +
 sdks/python/apache_beam/internal/module_test.py |   62 +
 sdks/python/apache_beam/internal/pickler.py     |  230 +
 .../python/apache_beam/internal/pickler_test.py |   84 +
 sdks/python/apache_beam/internal/util.py        |  127 +
 sdks/python/apache_beam/internal/util_test.py   |   61 +
 sdks/python/apache_beam/io/__init__.py          |   38 +
 sdks/python/apache_beam/io/avroio.py            |  372 ++
 sdks/python/apache_beam/io/avroio_test.py       |  381 ++
 sdks/python/apache_beam/io/concat_source.py     |  263 ++
 .../python/apache_beam/io/concat_source_test.py |  231 +
 sdks/python/apache_beam/io/filebasedsource.py   |  329 ++
 .../apache_beam/io/filebasedsource_test.py      |  708 +++
 sdks/python/apache_beam/io/fileio.py            |  746 +++
 sdks/python/apache_beam/io/fileio_test.py       |  352 ++
 sdks/python/apache_beam/io/gcp/__init__.py      |   16 +
 sdks/python/apache_beam/io/gcp/bigquery.py      | 1081 +++++
 sdks/python/apache_beam/io/gcp/bigquery_test.py |  828 ++++
 .../apache_beam/io/gcp/datastore/__init__.py    |   16 +
 .../apache_beam/io/gcp/datastore/v1/__init__.py |   16 +
 .../io/gcp/datastore/v1/datastoreio.py          |  397 ++
 .../io/gcp/datastore/v1/datastoreio_test.py     |  245 +
 .../io/gcp/datastore/v1/fake_datastore.py       |   98 +
 .../apache_beam/io/gcp/datastore/v1/helper.py   |  274 ++
 .../io/gcp/datastore/v1/helper_test.py          |  265 ++
 .../io/gcp/datastore/v1/query_splitter.py       |  275 ++
 .../io/gcp/datastore/v1/query_splitter_test.py  |  208 +
 sdks/python/apache_beam/io/gcp/gcsio.py         |  871 ++++
 sdks/python/apache_beam/io/gcp/gcsio_test.py    |  796 ++++
 .../apache_beam/io/gcp/internal/__init__.py     |   16 +
 .../io/gcp/internal/clients/__init__.py         |   16 +
 .../gcp/internal/clients/bigquery/__init__.py   |   33 +
 .../clients/bigquery/bigquery_v2_client.py      |  660 +++
 .../clients/bigquery/bigquery_v2_messages.py    | 1910 ++++++++
 .../io/gcp/internal/clients/storage/__init__.py |   33 +
 .../clients/storage/storage_v1_client.py        | 1039 +++++
 .../clients/storage/storage_v1_messages.py      | 1920 ++++++++
 sdks/python/apache_beam/io/gcp/pubsub.py        |   91 +
 sdks/python/apache_beam/io/gcp/pubsub_test.py   |   63 +
 .../python/apache_beam/io/gcp/tests/__init__.py |   16 +
 .../io/gcp/tests/bigquery_matcher.py            |  108 +
 .../io/gcp/tests/bigquery_matcher_test.py       |  108 +
 sdks/python/apache_beam/io/iobase.py            |  987 ++++
 sdks/python/apache_beam/io/range_trackers.py    |  532 +++
 .../apache_beam/io/range_trackers_test.py       |  590 +++
 sdks/python/apache_beam/io/source_test_utils.py |  642 +++
 .../apache_beam/io/source_test_utils_test.py    |  122 +
 sdks/python/apache_beam/io/sources_test.py      |  111 +
 sdks/python/apache_beam/io/textio.py            |  448 ++
 sdks/python/apache_beam/io/textio_test.py       |  718 +++
 sdks/python/apache_beam/io/tfrecordio.py        |  271 ++
 sdks/python/apache_beam/io/tfrecordio_test.py   |  389 ++
 sdks/python/apache_beam/metrics/__init__.py     |   17 +
 sdks/python/apache_beam/metrics/cells.py        |  315 ++
 sdks/python/apache_beam/metrics/cells_test.py   |  143 +
 sdks/python/apache_beam/metrics/execution.pxd   |   31 +
 sdks/python/apache_beam/metrics/execution.py    |  229 +
 .../apache_beam/metrics/execution_test.py       |  131 +
 sdks/python/apache_beam/metrics/metric.py       |  202 +
 sdks/python/apache_beam/metrics/metric_test.py  |  128 +
 sdks/python/apache_beam/metrics/metricbase.py   |   82 +
 sdks/python/apache_beam/pipeline.py             |  442 ++
 sdks/python/apache_beam/pipeline_test.py        |  444 ++
 sdks/python/apache_beam/pvalue.py               |  468 ++
 sdks/python/apache_beam/pvalue_test.py          |   68 +
 sdks/python/apache_beam/runners/__init__.py     |   30 +
 sdks/python/apache_beam/runners/api/__init__.py |   16 +
 .../runners/api/beam_runner_api_pb2.py          | 2772 +++++++++++
 sdks/python/apache_beam/runners/common.pxd      |   77 +
 sdks/python/apache_beam/runners/common.py       |  436 ++
 .../apache_beam/runners/dataflow/__init__.py    |   16 +
 .../runners/dataflow/dataflow_metrics.py        |  111 +
 .../runners/dataflow/dataflow_metrics_test.py   |  148 +
 .../runners/dataflow/dataflow_runner.py         |  729 +++
 .../runners/dataflow/dataflow_runner_test.py    |  181 +
 .../runners/dataflow/internal/__init__.py       |   16 +
 .../runners/dataflow/internal/apiclient.py      |  739 +++
 .../runners/dataflow/internal/apiclient_test.py |   96 +
 .../dataflow/internal/clients/__init__.py       |   16 +
 .../internal/clients/dataflow/__init__.py       |   33 +
 .../clients/dataflow/dataflow_v1b3_client.py    |  694 +++
 .../clients/dataflow/dataflow_v1b3_messages.py  | 4392 ++++++++++++++++++
 .../clients/dataflow/message_matchers.py        |  124 +
 .../clients/dataflow/message_matchers_test.py   |   77 +
 .../runners/dataflow/internal/dependency.py     |  522 +++
 .../dataflow/internal/dependency_test.py        |  425 ++
 .../runners/dataflow/internal/names.py          |   82 +
 .../runners/dataflow/native_io/__init__.py      |   16 +
 .../runners/dataflow/native_io/iobase.py        |  318 ++
 .../runners/dataflow/template_runner_test.py    |   97 +
 .../runners/dataflow/test_dataflow_runner.py    |   40 +
 .../apache_beam/runners/direct/__init__.py      |   19 +
 .../runners/direct/bundle_factory.py            |  201 +
 sdks/python/apache_beam/runners/direct/clock.py |   50 +
 .../consumer_tracking_pipeline_visitor.py       |   59 +
 .../consumer_tracking_pipeline_visitor_test.py  |  127 +
 .../runners/direct/direct_metrics.py            |  112 +
 .../runners/direct/direct_metrics_test.py       |  211 +
 .../apache_beam/runners/direct/direct_runner.py |  173 +
 .../runners/direct/evaluation_context.py        |  283 ++
 .../apache_beam/runners/direct/executor.py      |  578 +++
 .../runners/direct/helper_transforms.py         |   99 +
 .../runners/direct/transform_evaluator.py       |  558 +++
 .../runners/direct/transform_result.py          |   64 +
 .../runners/direct/watermark_manager.py         |  224 +
 .../apache_beam/runners/pipeline_context.py     |   88 +
 .../runners/pipeline_context_test.py            |   49 +
 sdks/python/apache_beam/runners/runner.py       |  368 ++
 sdks/python/apache_beam/runners/runner_test.py  |  123 +
 .../python/apache_beam/runners/test/__init__.py |   30 +
 sdks/python/apache_beam/test_pipeline.py        |  163 +
 sdks/python/apache_beam/test_pipeline_test.py   |  112 +
 sdks/python/apache_beam/tests/__init__.py       |   16 +
 sdks/python/apache_beam/tests/data/README.md    |   20 +
 .../apache_beam/tests/data/privatekey.p12       |  Bin 0 -> 2452 bytes
 .../apache_beam/tests/data/standard_coders.yaml |  196 +
 .../apache_beam/tests/pipeline_verifiers.py     |  119 +
 .../tests/pipeline_verifiers_test.py            |  123 +
 sdks/python/apache_beam/tests/test_utils.py     |   69 +
 sdks/python/apache_beam/transforms/__init__.py  |   25 +
 sdks/python/apache_beam/transforms/combiners.py |  595 +++
 .../apache_beam/transforms/combiners_test.py    |  324 ++
 sdks/python/apache_beam/transforms/core.py      | 1389 ++++++
 .../apache_beam/transforms/cy_combiners.pxd     |   92 +
 .../apache_beam/transforms/cy_combiners.py      |  306 ++
 sdks/python/apache_beam/transforms/display.py   |  331 ++
 .../apache_beam/transforms/display_test.py      |  216 +
 .../python/apache_beam/transforms/ptransform.py |  671 +++
 .../apache_beam/transforms/ptransform_test.py   | 1941 ++++++++
 .../python/apache_beam/transforms/sideinputs.py |  214 +
 .../apache_beam/transforms/sideinputs_test.py   |  337 ++
 sdks/python/apache_beam/transforms/timeutil.py  |  133 +
 sdks/python/apache_beam/transforms/trigger.py   | 1109 +++++
 .../apache_beam/transforms/trigger_test.py      |  601 +++
 .../transforms/trigger_transcripts.yaml         |  224 +
 sdks/python/apache_beam/transforms/util.py      |  235 +
 sdks/python/apache_beam/transforms/window.py    |  475 ++
 .../apache_beam/transforms/window_test.py       |  261 ++
 .../transforms/write_ptransform_test.py         |  126 +
 sdks/python/apache_beam/typehints/__init__.py   |   22 +
 sdks/python/apache_beam/typehints/decorators.py |  532 +++
 sdks/python/apache_beam/typehints/opcodes.py    |  334 ++
 .../apache_beam/typehints/trivial_inference.py  |  417 ++
 .../typehints/trivial_inference_test.py         |  151 +
 sdks/python/apache_beam/typehints/typecheck.py  |  178 +
 .../typehints/typed_pipeline_test.py            |  251 +
 sdks/python/apache_beam/typehints/typehints.py  | 1062 +++++
 .../apache_beam/typehints/typehints_test.py     | 1062 +++++
 sdks/python/apache_beam/utils/__init__.py       |   22 +
 sdks/python/apache_beam/utils/annotations.py    |  103 +
 .../apache_beam/utils/annotations_test.py       |  126 +
 sdks/python/apache_beam/utils/counters.pxd      |   30 +
 sdks/python/apache_beam/utils/counters.py       |  183 +
 sdks/python/apache_beam/utils/path.py           |   47 +
 sdks/python/apache_beam/utils/path_test.py      |   70 +
 .../apache_beam/utils/pipeline_options.py       |  557 +++
 .../apache_beam/utils/pipeline_options_test.py  |  192 +
 .../utils/pipeline_options_validator.py         |  199 +
 .../utils/pipeline_options_validator_test.py    |  342 ++
 sdks/python/apache_beam/utils/processes.py      |   52 +
 sdks/python/apache_beam/utils/processes_test.py |  106 +
 sdks/python/apache_beam/utils/profiler.py       |  148 +
 sdks/python/apache_beam/utils/proto_utils.py    |   54 +
 sdks/python/apache_beam/utils/retry.py          |  207 +
 sdks/python/apache_beam/utils/retry_test.py     |  221 +
 sdks/python/apache_beam/utils/timestamp.py      |  213 +
 sdks/python/apache_beam/utils/timestamp_test.py |  168 +
 sdks/python/apache_beam/utils/urns.py           |   24 +
 .../python/apache_beam/utils/windowed_value.pxd |   38 +
 sdks/python/apache_beam/utils/windowed_value.py |  122 +
 .../apache_beam/utils/windowed_value_test.py    |   71 +
 sdks/python/apache_beam/version.py              |   57 +
 sdks/python/generate_pydoc.sh                   |   80 +
 sdks/python/pom.xml                             |  191 +
 sdks/python/run_postcommit.sh                   |  102 +
 sdks/python/run_pylint.sh                       |   52 +
 sdks/python/setup.cfg                           |   27 +
 sdks/python/setup.py                            |  155 +
 sdks/python/test_config.py                      |   44 +
 sdks/python/tox.ini                             |   89 +
 1310 files changed, 137874 insertions(+), 30108 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3f917987/.jenkins/job_beam_PostCommit_Java_RunnableOnService_Gearpump.groovy
----------------------------------------------------------------------
diff --cc .jenkins/job_beam_PostCommit_Java_RunnableOnService_Gearpump.groovy
index 847ade3,8f41c3e..c33c9b0
--- a/.jenkins/job_beam_PostCommit_Java_RunnableOnService_Gearpump.groovy
+++ b/.jenkins/job_beam_PostCommit_Java_RunnableOnService_Gearpump.groovy
@@@ -33,9 -35,15 +35,15 @@@ mavenJob('beam_PostCommit_Java_Runnable
  
    // Sets that this is a PostCommit job.
    // 0 5 31 2 * will run on Feb 31 (i.e. never) according to job properties.
-   // This job triggers only on SCM.
+   // In post-commit this job triggers only on SCM changes.
    common_job_properties.setPostCommit(delegate, '0 5 31 2 *')
  
+   // Allows triggering this build against pull requests.
+   common_job_properties.enablePhraseTriggeringFromPullRequest(
+     delegate,
+     'Apache Gearpump Runner RunnableOnService Tests',
+     'Run Gearpump RunnableOnService')
+ 
    // Maven goals for this job.
 -  goals('-B -e clean verify -am -pl runners/gearpump -DforkCount=0 -DrunnableOnServicePipelineOptions=\'[ "--runner=TestGearpumpRunner", "--streaming=false" ]\'')
 +  goals('-B -e clean verify -am -pl runners/gearpump  -Plocal-runnable-on-service-tests -Prunnable-on-service-tests')
  }

http://git-wip-us.apache.org/repos/asf/beam/blob/3f917987/.travis.yml
----------------------------------------------------------------------
diff --cc .travis.yml
index 7dcd5d1,c896431..87ade2f
--- a/.travis.yml
+++ b/.travis.yml
@@@ -55,14 -67,21 +67,22 @@@ before_install
    - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export JAVA_HOME=$(/usr/libexec/java_home); fi
    - if [ "$TRAVIS_OS_NAME" == "linux" ]; then jdk_switcher use "$CUSTOM_JDK"; fi
    - export BEAM_SUREFIRE_ARGLINE="-Xmx512m"
+   # Python SDK environment settings.
+   - export TOX_ENV=py27
+   - if [ "$TRAVIS_OS_NAME" == "osx" ]; then export TOX_HOME=$HOME/Library/Python/2.7/bin; fi
+   - if [ "$TRAVIS_OS_NAME" == "linux" ]; then export TOX_HOME=$HOME/.local/bin; fi
  
  install:
+   - if [ ! "$TEST_PYTHON" ]; then travis_retry mvn -B install clean -U -DskipTests=true; fi
+   - if [ "$TEST_PYTHON" ] && pip list | grep tox; then TOX_FILE=`which tox` ; export TOX_HOME=`dirname $TOX_FILE`; fi
+   - if [ "$TEST_PYTHON" ] && ! pip list | grep tox; then travis_retry pip install tox --user; fi
    # Removing this here protects from inadvertent caching
    - rm -rf "$HOME/.m2/repository/org/apache/beam"
 +  - rm -rf "$HOME/.m2/repository/org/apache/gearpump"
  
  script:
-   - travis_retry mvn --batch-mode --update-snapshots --no-snapshot-updates $MAVEN_OVERRIDE install && travis_retry bash -ex .travis/test_wordcount.sh
+   - if [ "$TEST_PYTHON" ]; then travis_retry $TOX_HOME/tox -e $TOX_ENV -c sdks/python/tox.ini; fi
+   - if [ ! "$TEST_PYTHON" ]; then travis_retry mvn --batch-mode --update-snapshots --no-snapshot-updates --threads 1C $MAVEN_OVERRIDE install && travis_retry bash -ex .travis/test_wordcount.sh; fi
  
  cache:
    directories:

http://git-wip-us.apache.org/repos/asf/beam/blob/3f917987/runners/pom.xml
----------------------------------------------------------------------


[48/50] [abbrv] beam git commit: [BEAM-79] Fix gearpump-runner merge conflicts and test failure

Posted by ke...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowBoundTranslator.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowBoundTranslator.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowBoundTranslator.java
deleted file mode 100644
index 81970e2..0000000
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/WindowBoundTranslator.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.gearpump.translators;
-
-import com.google.common.collect.Iterables;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
-import org.apache.gearpump.streaming.dsl.javaapi.functions.FlatMapFunction;
-import org.joda.time.Instant;
-
-/**
- * {@link Window.Bound} is translated to Gearpump flatMap function.
- */
-@SuppressWarnings("unchecked")
-public class WindowBoundTranslator<T> implements  TransformTranslator<Window.Bound<T>> {
-
-  private static final long serialVersionUID = -964887482120489061L;
-
-  @Override
-  public void translate(Window.Bound<T> transform, TranslationContext context) {
-    PCollection<T> input = context.getInput(transform);
-    JavaStream<WindowedValue<T>> inputStream = context.getInputStream(input);
-    WindowingStrategy<?, ?> outputStrategy =
-        transform.getOutputStrategyInternal(input.getWindowingStrategy());
-    WindowFn<T, BoundedWindow> windowFn = (WindowFn<T, BoundedWindow>) outputStrategy.getWindowFn();
-    JavaStream<WindowedValue<T>> outputStream =
-        inputStream
-            .flatMap(new AssignWindows(windowFn), "assign_windows");
-
-    context.setOutputStream(context.getOutput(transform), outputStream);
-  }
-
-  private static class AssignWindows<T> extends
-      FlatMapFunction<WindowedValue<T>, WindowedValue<T>> {
-
-    private static final long serialVersionUID = 7284565861938681360L;
-    private final WindowFn<T, BoundedWindow> windowFn;
-
-    AssignWindows(WindowFn<T, BoundedWindow> windowFn) {
-      this.windowFn = windowFn;
-    }
-
-    @Override
-    public Iterator<WindowedValue<T>> flatMap(final WindowedValue<T> value) {
-      try {
-        Collection<BoundedWindow> windows = windowFn.assignWindows(windowFn.new AssignContext() {
-          @Override
-          public T element() {
-            return value.getValue();
-          }
-
-          @Override
-          public Instant timestamp() {
-            return value.getTimestamp();
-          }
-
-          @Override
-          public BoundedWindow window() {
-            return Iterables.getOnlyElement(value.getWindows());
-          }
-        });
-        List<WindowedValue<T>> values = new ArrayList<>(windows.size());
-        for (BoundedWindow win: windows) {
-          values.add(
-              WindowedValue.of(value.getValue(), value.getTimestamp(), win, value.getPane()));
-        }
-        return values.iterator();
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/functions/DoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/functions/DoFnFunction.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/functions/DoFnFunction.java
index b2c68d6..9941e71 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/functions/DoFnFunction.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/functions/DoFnFunction.java
@@ -33,6 +33,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.beam.runners.core.DoFnRunners;
+import org.apache.beam.runners.core.InMemoryStateInternals;
 import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.SideInputHandler;
 import org.apache.beam.runners.gearpump.GearpumpPipelineOptions;
@@ -48,7 +49,6 @@ import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowingStrategy;
 
-import org.apache.beam.sdk.util.state.InMemoryStateInternals;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.gearpump.streaming.dsl.javaapi.functions.FlatMapFunction;
@@ -134,12 +134,16 @@ public class DoFnFunction<InputT, OutputT> extends
       } else {
         // side input
         PCollectionView<?> sideInput = tagsToSideInputs.get(unionValue.getUnionTag());
-        WindowedValue<Iterable<?>> sideInputValue =
-            (WindowedValue<Iterable<?>>) unionValue.getValue();
+        WindowedValue<?> sideInputValue =
+            (WindowedValue<?>) unionValue.getValue();
+        Object value = sideInputValue.getValue();
+        if (!(value instanceof Iterable)) {
+          sideInputValue = sideInputValue.withValue(Lists.newArrayList(value));
+        }
         if (!sideInputValues.containsKey(sideInput)) {
           sideInputValues.put(sideInput, new LinkedList<WindowedValue<Iterable<?>>>());
         }
-        sideInputValues.get(sideInput).add(sideInputValue);
+        sideInputValues.get(sideInput).add((WindowedValue<Iterable<?>>) sideInputValue);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/io/UnboundedSourceWrapper.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/io/UnboundedSourceWrapper.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/io/UnboundedSourceWrapper.java
index dfdecb2..cb912c1 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/io/UnboundedSourceWrapper.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/io/UnboundedSourceWrapper.java
@@ -30,6 +30,7 @@ import org.apache.beam.sdk.options.PipelineOptions;
 public class UnboundedSourceWrapper<OutputT, CheckpointMarkT extends UnboundedSource.CheckpointMark>
     extends GearpumpSource<OutputT> {
 
+  private static final long serialVersionUID = -2453956849834747150L;
   private final UnboundedSource<OutputT, CheckpointMarkT> source;
 
   public UnboundedSourceWrapper(UnboundedSource<OutputT, CheckpointMarkT> source,

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/DoFnRunnerFactory.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/DoFnRunnerFactory.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/DoFnRunnerFactory.java
index 5db8320..bdfc336 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/DoFnRunnerFactory.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/DoFnRunnerFactory.java
@@ -25,12 +25,12 @@ import java.util.List;
 import org.apache.beam.runners.core.AggregatorFactory;
 import org.apache.beam.runners.core.DoFnRunner;
 import org.apache.beam.runners.core.DoFnRunners;
+import org.apache.beam.runners.core.ExecutionContext;
 import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.SimpleDoFnRunner;
 import org.apache.beam.runners.gearpump.GearpumpPipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.util.ExecutionContext;
 import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
 import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.PCollectionView;
@@ -75,7 +75,7 @@ public class DoFnRunnerFactory<InputT, OutputT> implements Serializable {
 
   public PushbackSideInputDoFnRunner<InputT, OutputT> createRunner(
       ReadyCheckingSideInputReader sideInputReader) {
-    DoFnRunner<InputT, OutputT> underlying = DoFnRunners.createDefault(
+    DoFnRunner<InputT, OutputT> underlying = DoFnRunners.simpleRunner(
         options, fn, sideInputReader, outputManager, mainOutputTag,
         sideOutputTags, stepContext, aggregatorFactory, windowingStrategy);
     return PushbackSideInputDoFnRunner.create(underlying, sideInputs, sideInputReader);

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpAggregatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpAggregatorFactory.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpAggregatorFactory.java
index 22ffc4d..3436930 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpAggregatorFactory.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpAggregatorFactory.java
@@ -21,9 +21,9 @@ package org.apache.beam.runners.gearpump.translators.utils;
 import java.io.Serializable;
 
 import org.apache.beam.runners.core.AggregatorFactory;
+import org.apache.beam.runners.core.ExecutionContext;
 import org.apache.beam.sdk.transforms.Aggregator;
 import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.util.ExecutionContext;
 
 /**
  * no-op aggregator factory.

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpStepContext.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpStepContext.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpStepContext.java
index 45f146b..140df2a 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpStepContext.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/NoOpStepContext.java
@@ -21,12 +21,12 @@ package org.apache.beam.runners.gearpump.translators.utils;
 import java.io.IOException;
 import java.io.Serializable;
 
+import org.apache.beam.runners.core.ExecutionContext;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.TimerInternals;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.ExecutionContext;
-import org.apache.beam.sdk.util.TimerInternals;
 import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.state.StateInternals;
 import org.apache.beam.sdk.values.TupleTag;
 
 /**

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtils.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtils.java b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtils.java
index b8a5233..b8f0ccb 100644
--- a/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtils.java
+++ b/runners/gearpump/src/main/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtils.java
@@ -38,8 +38,6 @@ import org.apache.gearpump.streaming.dsl.api.functions.MapFunction;
 import org.apache.gearpump.streaming.dsl.javaapi.JavaStream;
 import org.apache.gearpump.streaming.dsl.window.impl.Window;
 
-
-
 /**
  * Utility methods for translators.
  */

http://git-wip-us.apache.org/repos/asf/beam/blob/3eab6a64/runners/gearpump/src/test/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtilsTest.java
----------------------------------------------------------------------
diff --git a/runners/gearpump/src/test/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtilsTest.java b/runners/gearpump/src/test/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtilsTest.java
index 10976e8..524887d 100644
--- a/runners/gearpump/src/test/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtilsTest.java
+++ b/runners/gearpump/src/test/java/org/apache/beam/runners/gearpump/translators/utils/TranslatorUtilsTest.java
@@ -34,7 +34,6 @@ import org.apache.beam.sdk.values.KV;
 import org.apache.gearpump.streaming.dsl.window.impl.Window;
 import org.junit.Test;
 
-
 /**
  * Tests for {@link TranslatorUtils}.
  */


[44/50] [abbrv] beam git commit: This closes #2171

Posted by ke...@apache.org.
This closes #2171


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b6ca062f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b6ca062f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b6ca062f

Branch: refs/heads/gearpump-runner
Commit: b6ca062fcfa31884baf08b804d04c12dee10b62e
Parents: d167153 874c8d0
Author: Stas Levin <st...@apache.org>
Authored: Sun Mar 12 10:02:30 2017 +0200
Committer: Stas Levin <st...@apache.org>
Committed: Sun Mar 12 10:02:30 2017 +0200

----------------------------------------------------------------------
 .../spark/SparkNativePipelineVisitor.java       |  4 --
 .../beam/runners/spark/SparkPipelineResult.java |  8 +--
 .../apache/beam/runners/spark/SparkRunner.java  | 65 ++++++++++----------
 .../beam/runners/spark/SparkRunnerDebugger.java | 30 ++++++---
 .../beam/runners/spark/TestSparkRunner.java     |  4 +-
 .../aggregators/AggregatorsAccumulator.java     | 44 +++++++++----
 .../spark/aggregators/SparkAggregators.java     | 40 ++----------
 .../spark/metrics/AggregatorMetricSource.java   | 11 ++--
 .../spark/metrics/MetricsAccumulator.java       | 38 ++++++++----
 .../spark/metrics/SparkBeamMetricSource.java    | 11 ++--
 .../spark/metrics/SparkMetricsContainer.java    | 17 ++---
 .../spark/translation/TransformTranslator.java  | 13 ++--
 .../SparkRunnerStreamingContextFactory.java     |  3 +
 .../streaming/StreamingTransformTranslator.java | 10 +--
 .../metrics/sink/NamedAggregatorsTest.java      | 15 +----
 .../ResumeFromCheckpointStreamingTest.java      |  4 +-
 16 files changed, 156 insertions(+), 161 deletions(-)
----------------------------------------------------------------------



[07/50] [abbrv] beam git commit: Auto-generated runner api proto bindings.

Posted by ke...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/3bb125e1/sdks/python/run_pylint.sh
----------------------------------------------------------------------
diff --git a/sdks/python/run_pylint.sh b/sdks/python/run_pylint.sh
index afc5fb4..5e63856 100755
--- a/sdks/python/run_pylint.sh
+++ b/sdks/python/run_pylint.sh
@@ -34,7 +34,8 @@ EXCLUDED_GENERATED_FILES=(
 "apache_beam/runners/dataflow/internal/clients/dataflow/dataflow_v1b3_messages.py"
 "apache_beam/io/gcp/internal/clients/storage/storage_v1_client.py"
 "apache_beam/io/gcp/internal/clients/storage/storage_v1_messages.py"
-"apache_beam/coders/proto2_coder_test_messages_pb2.py")
+"apache_beam/coders/proto2_coder_test_messages_pb2.py"
+"apache_beam/runners/api/beam_runner_api_pb2.py")
 
 FILES_TO_IGNORE=""
 for file in "${EXCLUDED_GENERATED_FILES[@]}"; do


[31/50] [abbrv] beam git commit: Added assertion failure tests for `PAssert#thatSingleton`

Posted by ke...@apache.org.
Added assertion failure tests for `PAssert#thatSingleton`


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/59fd45b8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/59fd45b8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/59fd45b8

Branch: refs/heads/gearpump-runner
Commit: 59fd45b8104c7c19c65aa0df31fb9312ff82c650
Parents: 2485a4c
Author: Aviem Zur <av...@gmail.com>
Authored: Wed Mar 1 08:31:57 2017 +0200
Committer: Aviem Zur <av...@gmail.com>
Committed: Fri Mar 10 23:13:38 2017 +0200

----------------------------------------------------------------------
 .../apache/beam/sdk/testing/PAssertTest.java    | 36 ++++++++++++++++++++
 1 file changed, 36 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/59fd45b8/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
index 9bdb1b5..1603db5 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/PAssertTest.java
@@ -297,6 +297,42 @@ public class PAssertTest implements Serializable {
   }
 
   /**
+   * Test that we throw an error for false assertion on singleton.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testPAssertEqualsSingletonFalse() throws Exception {
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    PAssert.thatSingleton("The value was not equal to 44", pcollection).isEqualTo(44);
+
+    Throwable thrown = runExpectingAssertionFailure(pipeline);
+
+    String message = thrown.getMessage();
+
+    assertThat(message, containsString("The value was not equal to 44"));
+    assertThat(message, containsString("Expected: <44>"));
+    assertThat(message, containsString("but: was <42>"));
+  }
+
+  /**
+   * Test that we throw an error for false assertion on singleton.
+   */
+  @Test
+  @Category(RunnableOnService.class)
+  public void testPAssertEqualsSingletonFalseDefaultReasonString() throws Exception {
+    PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
+    PAssert.thatSingleton(pcollection).isEqualTo(44);
+
+    Throwable thrown = runExpectingAssertionFailure(pipeline);
+
+    String message = thrown.getMessage();
+
+    assertThat(message, containsString("Create.Values/Read(CreateSource).out"));
+    assertThat(message, containsString("Expected: <44>"));
+    assertThat(message, containsString("but: was <42>"));
+  }
+
+  /**
    * Tests that {@code containsInAnyOrder} is actually order-independent.
    */
   @Test


[29/50] [abbrv] beam git commit: Javadoc changes

Posted by ke...@apache.org.
Javadoc changes


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/0d08d2a4
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/0d08d2a4
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/0d08d2a4

Branch: refs/heads/gearpump-runner
Commit: 0d08d2a4561b286711ea4322f8d53ce5ad11e89f
Parents: e3cafb4
Author: Aviem Zur <av...@gmail.com>
Authored: Wed Mar 1 07:51:05 2017 +0200
Committer: Aviem Zur <av...@gmail.com>
Committed: Fri Mar 10 23:13:38 2017 +0200

----------------------------------------------------------------------
 .../java/org/apache/beam/sdk/testing/PAssert.java  | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/0d08d2a4/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
index 1faa024..d88c4d6 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
@@ -283,7 +283,8 @@ public class PAssert {
   }
 
   /**
-   * Constructs an {@link IterableAssert} for the elements of the provided {@link PCollection}.
+   * Constructs an {@link IterableAssert} for the elements of the provided {@link PCollection}
+   * with the specified reason.
    */
   public static <T> IterableAssert<T> that(String reason, PCollection<T> actual) {
     return new PCollectionContentsAssert<>(reason, actual);
@@ -299,8 +300,8 @@ public class PAssert {
   }
 
   /**
-   * Constructs an {@link IterableAssert} for the value of the provided {@link PCollection} which
-   * must contain a single {@code Iterable<T>} value.
+   * Constructs an {@link IterableAssert} for the value of the provided {@link PCollection } with
+   * the specified reason. The provided PCollection must contain a single {@code Iterable<T>} value.
    */
   public static <T> IterableAssert<T> thatSingletonIterable(
       String reason, PCollection<? extends Iterable<T>> actual) {
@@ -329,7 +330,8 @@ public class PAssert {
 
   /**
    * Constructs a {@link SingletonAssert} for the value of the provided
-   * {@code PCollection PCollection<T>}, which must be a singleton.
+   * {@code PCollection PCollection<T>} with the specified reason. The provided PCollection must be
+   * a singleton.
    */
   public static <T> SingletonAssert<T> thatSingleton(String reason, PCollection<T> actual) {
     return new PCollectionViewAssert<>(actual, View.<T>asSingleton(), actual.getCoder(), reason);
@@ -347,7 +349,8 @@ public class PAssert {
   }
 
   /**
-   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}.
+   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection} with the
+   * specified reason.
    *
    * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any
    * {@code Coder<K, V>}.
@@ -375,8 +378,8 @@ public class PAssert {
   }
 
   /**
-   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection}, which
-   * must have at most one value per key.
+   * Constructs a {@link SingletonAssert} for the value of the provided {@link PCollection} with
+   * the specified reason. The {@link PCollection} must have at most one value per key.
    *
    * <p>Note that the actual value must be coded by a {@link KvCoder}, not just any
    * {@code Coder<K, V>}.


[19/50] [abbrv] beam git commit: [BEAM-1661] Shade guava in the JdbcIO

Posted by ke...@apache.org.
[BEAM-1661] Shade guava in the JdbcIO


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/818e521f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/818e521f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/818e521f

Branch: refs/heads/gearpump-runner
Commit: 818e521f17870bda94309ce1c4686b17b0048970
Parents: c12d432
Author: mingmxu <mi...@ebay.com>
Authored: Wed Mar 8 15:57:32 2017 -0800
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Mar 10 15:23:37 2017 +0100

----------------------------------------------------------------------
 sdks/java/io/jdbc/pom.xml | 46 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/818e521f/sdks/java/io/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/pom.xml b/sdks/java/io/jdbc/pom.xml
index 3efeb80..ed73abd 100644
--- a/sdks/java/io/jdbc/pom.xml
+++ b/sdks/java/io/jdbc/pom.xml
@@ -166,4 +166,50 @@
     </dependency>
   </dependencies>
 
+  <build>
+    <pluginManagement>
+      <plugins>
+        <!-- BEAM-1661 -->
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-shade-plugin</artifactId>
+          <executions>
+            <execution>
+              <phase>package</phase>
+              <goals>
+                <goal>shade</goal>
+              </goals>
+              <configuration>
+                <artifactSet>
+                  <includes>
+                    <include>com.google.guava:guava</include>
+                  </includes>
+                </artifactSet>
+                <relocations>
+                  <relocation>
+                    <pattern>com.google.common</pattern>
+                    <shadedPattern>org.apache.beam.sdk.io.jdbc.repackaged.com.google.common</shadedPattern>
+                  </relocation>
+                  <relocation>
+                    <pattern>com.google.thirdparty</pattern>
+                    <shadedPattern>org.apache.beam.sdk.io.jdbc.repackaged.com.google.thirdparty</shadedPattern>
+                  </relocation>
+                </relocations>
+                <transformers>
+                  <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+                </transformers>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 </project>


[17/50] [abbrv] beam git commit: [BEAM-797] A PipelineVisitor that creates a Spark-native pipeline.

Posted by ke...@apache.org.
[BEAM-797] A PipelineVisitor that creates a Spark-native pipeline.

[BEAM-797] Remove unnecessary temp dir from test


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/94bef14e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/94bef14e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/94bef14e

Branch: refs/heads/gearpump-runner
Commit: 94bef14e50c66bd1c87bf33937f76d16ba798ee0
Parents: 2c2424c
Author: Aviem Zur <av...@gmail.com>
Authored: Sat Mar 4 23:16:24 2017 +0200
Committer: Amit Sela <am...@gmail.com>
Committed: Fri Mar 10 15:14:25 2017 +0200

----------------------------------------------------------------------
 runners/spark/pom.xml                           |   5 +
 .../spark/SparkNativePipelineVisitor.java       | 202 +++++++++++++++++++
 .../apache/beam/runners/spark/SparkRunner.java  |  16 +-
 .../beam/runners/spark/SparkRunnerDebugger.java | 121 +++++++++++
 .../spark/translation/TransformEvaluator.java   |   1 +
 .../spark/translation/TransformTranslator.java  | 105 ++++++++++
 .../streaming/StreamingTransformTranslator.java |  53 ++++-
 .../runners/spark/SparkRunnerDebuggerTest.java  | 180 +++++++++++++++++
 8 files changed, 673 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/pom.xml
----------------------------------------------------------------------
diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml
index ebd987d..a330820 100644
--- a/runners/spark/pom.xml
+++ b/runners/spark/pom.xml
@@ -196,6 +196,11 @@
       <scope>provided</scope>
     </dependency>
     <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+      <version>2.6</version>
+    </dependency>
+    <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
       <version>2.4</version>

http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
new file mode 100644
index 0000000..056da97
--- /dev/null
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkNativePipelineVisitor.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.spark;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Optional;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.runners.spark.metrics.MetricsAccumulator;
+import org.apache.beam.runners.spark.translation.EvaluationContext;
+import org.apache.beam.runners.spark.translation.SparkPipelineTranslator;
+import org.apache.beam.runners.spark.translation.TransformEvaluator;
+import org.apache.beam.runners.spark.translation.streaming.Checkpoint;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+import org.apache.commons.lang.WordUtils;
+
+
+/**
+ * Pipeline visitor for translating a Beam pipeline into equivalent Spark operations.
+ * Used for debugging purposes using {@link SparkRunnerDebugger}.
+ */
+public class SparkNativePipelineVisitor extends SparkRunner.Evaluator {
+  private final List<NativeTransform> transforms;
+  private final List<String> knownCompositesPackages =
+      Lists.newArrayList(
+          "org.apache.beam.sdk.transforms",
+          "org.apache.beam.runners.spark.examples");
+
+  SparkNativePipelineVisitor(SparkPipelineTranslator translator, EvaluationContext ctxt) {
+    super(translator, ctxt);
+    this.transforms = new ArrayList<>();
+    MetricsAccumulator.init(ctxt.getSparkContext(), Optional.<Checkpoint.CheckpointDir>absent());
+  }
+
+  @Override
+  public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
+    CompositeBehavior compositeBehavior = super.enterCompositeTransform(node);
+    PTransform<?, ?> transform = node.getTransform();
+    if (transform != null) {
+      @SuppressWarnings("unchecked")
+      final Class<PTransform<?, ?>> transformClass = (Class<PTransform<?, ?>>) transform.getClass();
+      if (compositeBehavior == CompositeBehavior.ENTER_TRANSFORM
+          && !knownComposite(transformClass)
+          && shouldDebug(node)) {
+        transforms.add(new NativeTransform(node, null, transform, true));
+      }
+    }
+    return compositeBehavior;
+  }
+
+  private boolean knownComposite(Class<PTransform<?, ?>> transform) {
+    String transformPackage = transform.getPackage().getName();
+    for (String knownCompositePackage : knownCompositesPackages) {
+      if (transformPackage.startsWith(knownCompositePackage)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private boolean shouldDebug(final TransformHierarchy.Node node) {
+    return node == null || !Iterables.any(transforms, new Predicate<NativeTransform>() {
+      @Override
+      public boolean apply(NativeTransform debugTransform) {
+        return debugTransform.getNode().equals(node) && debugTransform.isComposite();
+      }
+    }) && shouldDebug(node.getEnclosingNode());
+  }
+
+  @Override
+  <TransformT extends PTransform<? super PInput, POutput>> void
+  doVisitTransform(TransformHierarchy.Node node) {
+    super.doVisitTransform(node);
+    @SuppressWarnings("unchecked")
+    TransformT transform = (TransformT) node.getTransform();
+    @SuppressWarnings("unchecked")
+    Class<TransformT> transformClass = (Class<TransformT>) transform.getClass();
+    @SuppressWarnings("unchecked")
+    TransformEvaluator<TransformT> evaluator = translate(node, transform, transformClass);
+    if (shouldDebug(node)) {
+      transforms.add(new NativeTransform(node, evaluator, transform, false));
+    }
+  }
+
+  String getDebugString() {
+    return Joiner.on("\n").join(transforms);
+  }
+
+  private static class NativeTransform {
+    private final TransformHierarchy.Node node;
+    private final TransformEvaluator<?> transformEvaluator;
+    private final PTransform<?, ?> transform;
+    private final boolean composite;
+
+    NativeTransform(
+        TransformHierarchy.Node node,
+        TransformEvaluator<?> transformEvaluator,
+        PTransform<?, ?> transform,
+        boolean composite) {
+      this.node = node;
+      this.transformEvaluator = transformEvaluator;
+      this.transform = transform;
+      this.composite = composite;
+    }
+
+    TransformHierarchy.Node getNode() {
+      return node;
+    }
+
+    boolean isComposite() {
+      return composite;
+    }
+
+    @Override
+    public String toString() {
+      try {
+        Class<? extends PTransform> transformClass = transform.getClass();
+        if (node.getFullName().equals("KafkaIO.Read")) {
+          return "KafkaUtils.createDirectStream(...)";
+        }
+        if (composite) {
+          return "_.<" + transformClass.getName() + ">";
+        }
+        String transformString = transformEvaluator.toNativeString();
+        if (transformString.contains("<fn>")) {
+          transformString = replaceFnString(transformClass, transformString, "fn");
+        } else if (transformString.contains("<windowFn>")) {
+          transformString = replaceFnString(transformClass, transformString, "windowFn");
+        } else if (transformString.contains("<source>")) {
+          String sourceName = "...";
+          if (transform instanceof Read.Bounded) {
+            sourceName = ((Read.Bounded<?>) transform).getSource().getClass().getName();
+          } else if (transform instanceof Read.Unbounded) {
+            sourceName = ((Read.Unbounded<?>) transform).getSource().getClass().getName();
+          }
+          transformString = transformString.replace("<source>", sourceName);
+        }
+        if (transformString.startsWith("sparkContext")
+            || transformString.startsWith("streamingContext")) {
+          return transformString;
+        }
+        return "_." + transformString;
+      } catch (
+          NoSuchMethodException
+              | InvocationTargetException
+              | IllegalAccessException
+              | NoSuchFieldException e) {
+        return "<FailedTranslation>";
+      }
+    }
+
+    private String replaceFnString(
+        Class<? extends PTransform> transformClass,
+        String transformString,
+        String fnFieldName)
+        throws IllegalAccessException, InvocationTargetException, NoSuchMethodException,
+        NoSuchFieldException {
+      Object fn =
+          transformClass.getMethod("get" + WordUtils.capitalize(fnFieldName)).invoke(transform);
+      Class<?> fnClass = fn.getClass();
+      String doFnName;
+      Class<?> enclosingClass = fnClass.getEnclosingClass();
+      if (enclosingClass != null && enclosingClass.equals(MapElements.class)) {
+        Field parent = fnClass.getDeclaredField("this$0");
+        parent.setAccessible(true);
+        Field fnField = enclosingClass.getDeclaredField(fnFieldName);
+        fnField.setAccessible(true);
+        doFnName = fnField.get(parent.get(fn)).getClass().getName();
+      } else {
+        doFnName = fnClass.getName();
+      }
+      transformString = transformString.replace("<" + fnFieldName + ">", doFnName);
+      return transformString;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
index 3f002da..a706f00 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunner.java
@@ -297,11 +297,12 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
   /**
    * Evaluator on the pipeline.
    */
+  @SuppressWarnings("WeakerAccess")
   public static class Evaluator extends Pipeline.PipelineVisitor.Defaults {
     private static final Logger LOG = LoggerFactory.getLogger(Evaluator.class);
 
-    private final EvaluationContext ctxt;
-    private final SparkPipelineTranslator translator;
+    protected final EvaluationContext ctxt;
+    protected final SparkPipelineTranslator translator;
 
     public Evaluator(SparkPipelineTranslator translator, EvaluationContext ctxt) {
       this.translator = translator;
@@ -324,7 +325,7 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
       return CompositeBehavior.ENTER_TRANSFORM;
     }
 
-    private boolean shouldDefer(TransformHierarchy.Node node) {
+    protected boolean shouldDefer(TransformHierarchy.Node node) {
       // if the input is not a PCollection, or it is but with non merging windows, don't defer.
       if (node.getInputs().size() != 1) {
         return false;
@@ -361,7 +362,7 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
     }
 
     <TransformT extends PTransform<? super PInput, POutput>> void
-        doVisitTransform(TransformHierarchy.Node node) {
+    doVisitTransform(TransformHierarchy.Node node) {
       @SuppressWarnings("unchecked")
       TransformT transform = (TransformT) node.getTransform();
       @SuppressWarnings("unchecked")
@@ -379,8 +380,8 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
      * Determine if this Node belongs to a Bounded branch of the pipeline, or Unbounded, and
      * translate with the proper translator.
      */
-    private <TransformT extends PTransform<? super PInput, POutput>>
-        TransformEvaluator<TransformT> translate(
+    protected <TransformT extends PTransform<? super PInput, POutput>>
+    TransformEvaluator<TransformT> translate(
             TransformHierarchy.Node node, TransformT transform, Class<TransformT> transformClass) {
       //--- determine if node is bounded/unbounded.
       // usually, the input determines if the PCollection to apply the next transformation to
@@ -400,7 +401,7 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
               : translator.translateUnbounded(transformClass);
     }
 
-    private PCollection.IsBounded isBoundedCollection(Collection<TaggedPValue> pValues) {
+    protected PCollection.IsBounded isBoundedCollection(Collection<TaggedPValue> pValues) {
       // anything that is not a PCollection, is BOUNDED.
       // For PCollections:
       // BOUNDED behaves as the Identity Element, BOUNDED + BOUNDED = BOUNDED
@@ -417,4 +418,3 @@ public final class SparkRunner extends PipelineRunner<SparkPipelineResult> {
     }
   }
 }
-

http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java
new file mode 100644
index 0000000..395acff
--- /dev/null
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRunnerDebugger.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.spark;
+
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeoutException;
+import org.apache.beam.runners.spark.translation.EvaluationContext;
+import org.apache.beam.runners.spark.translation.SparkPipelineTranslator;
+import org.apache.beam.runners.spark.translation.TransformTranslator;
+import org.apache.beam.runners.spark.translation.streaming.StreamingTransformTranslator;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.runners.PipelineRunner;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.joda.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Pipeline runner which translates a Beam pipeline into equivalent Spark operations, without
+ * running them. Used for debugging purposes.
+ *
+ * <p>Example:</p>
+ *
+ * <pre>{@code
+ * SparkPipelineOptions options = PipelineOptionsFactory.as(SparkPipelineOptions.class);
+ * options.setRunner(SparkRunnerDebugger.class);
+ * Pipeline pipeline = Pipeline.create(options);
+ * SparkRunnerDebugger.DebugSparkPipelineResult result =
+ *     (SparkRunnerDebugger.DebugSparkPipelineResult) pipeline.run();
+ * String sparkPipeline = result.getDebugString();
+ * }</pre>
+ */
+public final class SparkRunnerDebugger extends PipelineRunner<SparkPipelineResult> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(SparkRunnerDebugger.class);
+
+  private SparkRunnerDebugger() {}
+
+  @SuppressWarnings("unused")
+  public static SparkRunnerDebugger fromOptions(PipelineOptions options) {
+    return new SparkRunnerDebugger();
+  }
+
+  @Override
+  public SparkPipelineResult run(Pipeline pipeline) {
+    SparkPipelineResult result;
+
+    SparkPipelineOptions options = (SparkPipelineOptions) pipeline.getOptions();
+
+    JavaSparkContext jsc = new JavaSparkContext("local[1]", "Debug_Pipeline");
+    JavaStreamingContext jssc =
+        new JavaStreamingContext(jsc, new org.apache.spark.streaming.Duration(1000));
+    TransformTranslator.Translator translator = new TransformTranslator.Translator();
+    SparkNativePipelineVisitor visitor;
+    if (options.isStreaming()
+        || options instanceof TestSparkPipelineOptions
+        && ((TestSparkPipelineOptions) options).isForceStreaming()) {
+      SparkPipelineTranslator streamingTranslator =
+          new StreamingTransformTranslator.Translator(translator);
+      EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, jssc);
+      visitor = new SparkNativePipelineVisitor(streamingTranslator, ctxt);
+    } else {
+      EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, jssc);
+      visitor = new SparkNativePipelineVisitor(translator, ctxt);
+    }
+    pipeline.traverseTopologically(visitor);
+    jsc.stop();
+    String debugString = visitor.getDebugString();
+    LOG.info("Translated Native Spark pipeline:\n" + debugString);
+    return new DebugSparkPipelineResult(debugString);
+  }
+
+  /**
+   * PipelineResult of running a {@link Pipeline} using {@link SparkRunnerDebugger}
+   * Use {@link #getDebugString} to get a {@link String} representation of the {@link Pipeline}
+   * translated into Spark native operations.
+   */
+  public static class DebugSparkPipelineResult extends SparkPipelineResult {
+    private final String debugString;
+
+    DebugSparkPipelineResult(String debugString) {
+      super(null, null);
+      this.debugString = debugString;
+    }
+
+    /**
+     * Returns Beam pipeline translated into Spark native operations.
+     */
+    String getDebugString() {
+      return debugString;
+    }
+
+    @Override protected void stop() {
+      // Empty implementation
+    }
+
+    @Override protected State awaitTermination(Duration duration)
+        throws TimeoutException, ExecutionException, InterruptedException {
+      return State.DONE;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
index fbfa84d..585b933 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformEvaluator.java
@@ -26,4 +26,5 @@ import org.apache.beam.sdk.transforms.PTransform;
  */
 public interface TransformEvaluator<TransformT extends PTransform<?, ?>> extends Serializable {
   void evaluate(TransformT transform, EvaluationContext context);
+  String toNativeString();
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
index 725d157..44b4039 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/TransformTranslator.java
@@ -121,6 +121,11 @@ public final class TransformTranslator {
         }
         context.putDataset(transform, new BoundedDataset<>(unionRDD));
       }
+
+      @Override
+      public String toNativeString() {
+        return "sparkContext.union(...)";
+      }
     };
   }
 
@@ -162,6 +167,11 @@ public final class TransformTranslator {
 
         context.putDataset(transform, new BoundedDataset<>(groupedAlsoByWindow));
       }
+
+      @Override
+      public String toNativeString() {
+        return "groupByKey()";
+      }
     };
   }
 
@@ -201,6 +211,11 @@ public final class TransformTranslator {
                        });
                context.putDataset(transform, new BoundedDataset<>(outRDD));
             }
+
+            @Override
+            public String toNativeString() {
+              return "map(new <fn>())";
+            }
           };
   }
 
@@ -267,6 +282,11 @@ public final class TransformTranslator {
             }
             context.putDataset(transform, new BoundedDataset<>(outRdd));
           }
+
+          @Override
+          public String toNativeString () {
+            return "aggregate(..., new <fn>(), ...)";
+          }
         };
   }
 
@@ -321,6 +341,11 @@ public final class TransformTranslator {
 
         context.putDataset(transform, new BoundedDataset<>(outRdd));
       }
+
+      @Override
+      public String toNativeString() {
+        return "combineByKey(..., new <fn>(), ...)";
+      }
     };
   }
 
@@ -348,6 +373,11 @@ public final class TransformTranslator {
             new BoundedDataset<>(inRDD.mapPartitions(new DoFnFunction<>(aggAccum, metricsAccum,
                 stepName, doFn, context.getRuntimeContext(), sideInputs, windowingStrategy))));
       }
+
+      @Override
+      public String toNativeString() {
+        return "mapPartitions(new <fn>())";
+      }
     };
   }
 
@@ -388,6 +418,11 @@ public final class TransformTranslator {
           context.putDataset(e.getValue(), new BoundedDataset<>(values));
         }
       }
+
+      @Override
+      public String toNativeString() {
+        return "mapPartitions(new <fn>())";
+      }
     };
   }
 
@@ -401,6 +436,11 @@ public final class TransformTranslator {
             .map(WindowingHelpers.<String>windowFunction());
         context.putDataset(transform, new BoundedDataset<>(rdd));
       }
+
+      @Override
+      public String toNativeString() {
+        return "sparkContext.textFile(...)";
+      }
     };
   }
 
@@ -426,6 +466,11 @@ public final class TransformTranslator {
         writeHadoopFile(last, new Configuration(), shardTemplateInfo, Text.class,
             NullWritable.class, TemplatedTextOutputFormat.class);
       }
+
+      @Override
+      public String toNativeString() {
+        return "saveAsNewAPIHadoopFile(...)";
+      }
     };
   }
 
@@ -450,6 +495,11 @@ public final class TransformTranslator {
             }).map(WindowingHelpers.<T>windowFunction());
         context.putDataset(transform, new BoundedDataset<>(rdd));
       }
+
+      @Override
+      public String toNativeString() {
+        return "sparkContext.newAPIHadoopFile(...)";
+      }
     };
   }
 
@@ -481,6 +531,11 @@ public final class TransformTranslator {
         writeHadoopFile(last, job.getConfiguration(), shardTemplateInfo,
             AvroKey.class, NullWritable.class, TemplatedAvroKeyOutputFormat.class);
       }
+
+      @Override
+      public String toNativeString() {
+        return "mapToPair(<objectToAvroKeyFn>).saveAsNewAPIHadoopFile(...)";
+      }
     };
   }
 
@@ -496,6 +551,11 @@ public final class TransformTranslator {
         // cache to avoid re-evaluation of the source by Spark's lazy DAG evaluation.
         context.putDataset(transform, new BoundedDataset<>(input.cache()));
       }
+
+      @Override
+      public String toNativeString() {
+        return "sparkContext.<readFrom(<source>)>()";
+      }
     };
   }
 
@@ -519,6 +579,11 @@ public final class TransformTranslator {
         }).map(WindowingHelpers.<KV<K, V>>windowFunction());
         context.putDataset(transform, new BoundedDataset<>(rdd));
       }
+
+      @Override
+      public String toNativeString() {
+        return "sparkContext.newAPIHadoopFile(...)";
+      }
     };
   }
 
@@ -547,6 +612,11 @@ public final class TransformTranslator {
         writeHadoopFile(last, conf, shardTemplateInfo,
             transform.getKeyClass(), transform.getValueClass(), transform.getFormatClass());
       }
+
+      @Override
+      public String toNativeString() {
+        return "saveAsNewAPIHadoopFile(...)";
+      }
     };
   }
 
@@ -619,6 +689,11 @@ public final class TransformTranslator {
               inRDD.map(new SparkAssignWindowFn<>(transform.getWindowFn()))));
         }
       }
+
+      @Override
+      public String toNativeString() {
+        return "map(new <windowFn>())";
+      }
     };
   }
 
@@ -632,6 +707,11 @@ public final class TransformTranslator {
         Coder<T> coder = context.getOutput(transform).getCoder();
         context.putBoundedDatasetFromValues(transform, elems, coder);
       }
+
+      @Override
+      public String toNativeString() {
+        return "sparkContext.parallelize(Arrays.asList(...))";
+      }
     };
   }
 
@@ -649,6 +729,11 @@ public final class TransformTranslator {
 
         context.putPView(output, iterCast, coderInternal);
       }
+
+      @Override
+      public String toNativeString() {
+        return "collect()";
+      }
     };
   }
 
@@ -666,6 +751,11 @@ public final class TransformTranslator {
 
         context.putPView(output, iterCast, coderInternal);
       }
+
+      @Override
+      public String toNativeString() {
+        return "collect()";
+      }
     };
   }
 
@@ -685,6 +775,11 @@ public final class TransformTranslator {
 
         context.putPView(output, iterCast, coderInternal);
       }
+
+      @Override
+      public String toNativeString() {
+        return "<createPCollectionView>";
+      }
     };
   }
 
@@ -706,6 +801,11 @@ public final class TransformTranslator {
 
         context.putDataset(transform, new BoundedDataset<String>(output));
       }
+
+      @Override
+      public String toNativeString() {
+        return "sparkContext.parallelize(rdd.getStorageLevel().description())";
+      }
     };
   }
 
@@ -732,6 +832,11 @@ public final class TransformTranslator {
 
         context.putDataset(transform, new BoundedDataset<>(reshuffled));
       }
+
+      @Override
+      public String toNativeString() {
+        return "repartition(...)";
+      }
     };
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
index ccf84b2..8a05fbb 100644
--- a/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
+++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/translation/streaming/StreamingTransformTranslator.java
@@ -96,7 +96,7 @@ import org.apache.spark.streaming.api.java.JavaStreamingContext;
 /**
  * Supports translation between a Beam transform, and Spark's operations on DStreams.
  */
-final class StreamingTransformTranslator {
+public final class StreamingTransformTranslator {
 
   private StreamingTransformTranslator() {
   }
@@ -110,6 +110,11 @@ final class StreamingTransformTranslator {
             ((UnboundedDataset<T>) (context).borrowDataset(transform)).getDStream();
         dstream.map(WindowingHelpers.<T>unwindowFunction()).print(transform.getNum());
       }
+
+      @Override
+      public String toNativeString() {
+        return ".print(...)";
+      }
     };
   }
 
@@ -124,6 +129,11 @@ final class StreamingTransformTranslator {
                 context.getRuntimeContext(),
                 transform.getSource()));
       }
+
+      @Override
+      public String toNativeString() {
+        return "streamingContext.<readFrom(<source>)>()";
+      }
     };
   }
 
@@ -168,6 +178,11 @@ final class StreamingTransformTranslator {
             ImmutableMap.of(unboundedDataset.getStreamSources().get(0), times));
         context.putDataset(transform, unboundedDataset);
       }
+
+      @Override
+      public String toNativeString() {
+        return "streamingContext.queueStream(...)";
+      }
     };
   }
 
@@ -208,6 +223,11 @@ final class StreamingTransformTranslator {
             context.getStreamingContext().union(dStreams.remove(0), dStreams);
         context.putDataset(transform, new UnboundedDataset<>(unifiedStreams, streamingSources));
       }
+
+      @Override
+      public String toNativeString() {
+        return "streamingContext.union(...)";
+      }
     };
   }
 
@@ -235,6 +255,11 @@ final class StreamingTransformTranslator {
         context.putDataset(transform,
             new UnboundedDataset<>(outputStream, unboundedDataset.getStreamSources()));
       }
+
+      @Override
+      public String toNativeString() {
+        return "map(new <windowFn>())";
+      }
     };
   }
 
@@ -283,6 +308,11 @@ final class StreamingTransformTranslator {
 
         context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
       }
+
+      @Override
+      public String toNativeString() {
+        return "groupByKey()";
+      }
     };
   }
 
@@ -329,6 +359,11 @@ final class StreamingTransformTranslator {
         context.putDataset(transform,
             new UnboundedDataset<>(outStream, unboundedDataset.getStreamSources()));
       }
+
+      @Override
+      public String toNativeString() {
+        return "map(new <fn>())";
+      }
     };
   }
 
@@ -375,6 +410,11 @@ final class StreamingTransformTranslator {
         context.putDataset(transform,
             new UnboundedDataset<>(outStream, unboundedDataset.getStreamSources()));
       }
+
+      @Override
+      public String toNativeString() {
+        return "mapPartitions(new <fn>())";
+      }
     };
   }
 
@@ -431,6 +471,11 @@ final class StreamingTransformTranslator {
               new UnboundedDataset<>(values, unboundedDataset.getStreamSources()));
         }
       }
+
+      @Override
+      public String toNativeString() {
+        return "mapPartitions(new <fn>())";
+      }
     };
   }
 
@@ -465,6 +510,10 @@ final class StreamingTransformTranslator {
 
         context.putDataset(transform, new UnboundedDataset<>(reshuffledStream, streamSources));
       }
+
+      @Override public String toNativeString() {
+        return "repartition(...)";
+      }
     };
   }
 
@@ -491,7 +540,7 @@ final class StreamingTransformTranslator {
 
     private final SparkPipelineTranslator batchTranslator;
 
-    Translator(SparkPipelineTranslator batchTranslator) {
+    public Translator(SparkPipelineTranslator batchTranslator) {
       this.batchTranslator = batchTranslator;
     }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/94bef14e/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java
new file mode 100644
index 0000000..905b30e
--- /dev/null
+++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.spark;
+
+import static org.junit.Assert.assertThat;
+
+import java.util.Collections;
+import org.apache.beam.runners.spark.examples.WordCount;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.io.kafka.KafkaIO;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.Distinct;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.transforms.WithKeys;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.hamcrest.Matchers;
+import org.joda.time.Duration;
+import org.junit.Rule;
+import org.junit.Test;
+
+
+/**
+ * Test {@link SparkRunnerDebugger} with different pipelines.
+ */
+public class SparkRunnerDebuggerTest {
+
+  @Rule
+  public final PipelineRule batchPipelineRule = PipelineRule.batch();
+
+  @Rule
+  public final PipelineRule streamingPipelineRule = PipelineRule.streaming();
+
+  @Test
+  public void debugBatchPipeline() {
+    TestSparkPipelineOptions options = batchPipelineRule.getOptions();
+    options.setRunner(SparkRunnerDebugger.class);
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    PCollection<String> lines = pipeline
+        .apply(Create.of(Collections.<String>emptyList()).withCoder(StringUtf8Coder.of()));
+
+    PCollection<KV<String, Long>> wordCounts = lines
+        .apply(new WordCount.CountWords());
+
+    wordCounts
+        .apply(GroupByKey.<String, Long>create())
+        .apply(Combine.<String, Long, Long>groupedValues(Sum.ofLongs()));
+
+    PCollection<KV<String, Long>> wordCountsPlusOne = wordCounts
+        .apply(MapElements.via(new PlusOne()));
+
+    PCollectionList.of(wordCounts).and(wordCountsPlusOne)
+        .apply(Flatten.<KV<String, Long>>pCollections());
+
+    wordCounts
+        .apply(MapElements.via(new WordCount.FormatAsTextFn()))
+        .apply(TextIO.Write.to("!!PLACEHOLDER-OUTPUT-DIR!!").withNumShards(3).withSuffix(".txt"));
+
+    final String expectedPipeline = "sparkContext.parallelize(Arrays.asList(...))\n"
+        + "_.mapPartitions(new org.apache.beam.runners.spark.examples.WordCount$ExtractWordsFn())\n"
+        + "_.mapPartitions(new org.apache.beam.sdk.transforms.Count$PerElement$1())\n"
+        + "_.combineByKey(..., new org.apache.beam.sdk.transforms"
+        + ".Combine$CombineFn$KeyIgnoringCombineFn(), ...)\n"
+        + "_.groupByKey()\n"
+        + "_.map(new org.apache.beam.sdk.transforms.Combine$CombineFn$KeyIgnoringCombineFn())\n"
+        + "_.mapPartitions(new org.apache.beam.runners.spark"
+        + ".SparkRunnerDebuggerTest$PlusOne())\n"
+        + "sparkContext.union(...)\n"
+        + "_.mapPartitions(new org.apache.beam.runners.spark.examples.WordCount$FormatAsTextFn())\n"
+        + "_.<org.apache.beam.sdk.io.TextIO$Write$Bound>";
+
+    SparkRunnerDebugger.DebugSparkPipelineResult result =
+        (SparkRunnerDebugger.DebugSparkPipelineResult) pipeline.run();
+
+    assertThat("Debug pipeline did not equal expected", result.getDebugString(),
+        Matchers.equalTo(expectedPipeline));
+  }
+
+  @Test
+  public void debugStreamingPipeline() {
+    TestSparkPipelineOptions options = streamingPipelineRule.getOptions();
+    options.setRunner(SparkRunnerDebugger.class);
+
+    Pipeline pipeline = Pipeline.create(options);
+
+    KafkaIO.Read<String, String> read = KafkaIO.<String, String>read()
+        .withBootstrapServers("mykafka:9092")
+        .withTopics(Collections.singletonList("my_input_topic"))
+        .withKeyCoder(StringUtf8Coder.of())
+        .withValueCoder(StringUtf8Coder.of());
+
+    KafkaIO.Write<String, String> write = KafkaIO.<String, String>write()
+        .withBootstrapServers("myotherkafka:9092")
+        .withTopic("my_output_topic")
+        .withKeyCoder(StringUtf8Coder.of())
+        .withValueCoder(StringUtf8Coder.of());
+
+    KvCoder<String, String> stringKvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
+
+    pipeline
+        .apply(read.withoutMetadata()).setCoder(stringKvCoder)
+        .apply(Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(5))))
+        .apply(ParDo.of(new SparkRunnerDebuggerTest.FormatKVFn()))
+        .apply(Distinct.<String>create())
+        .apply(WithKeys.of(new SparkRunnerDebuggerTest.ArbitraryKeyFunction()))
+        .apply(write);
+
+    final String expectedPipeline = "KafkaUtils.createDirectStream(...)\n"
+        + "_.map(new org.apache.beam.sdk.transforms.windowing.FixedWindows())\n"
+        + "_.mapPartitions(new org.apache.beam.runners.spark."
+        + "SparkRunnerDebuggerTest$FormatKVFn())\n"
+        + "_.mapPartitions(new org.apache.beam.sdk.transforms.Distinct$2())\n"
+        + "_.groupByKey()\n"
+        + "_.map(new org.apache.beam.sdk.transforms.Combine$CombineFn$KeyIgnoringCombineFn())\n"
+        + "_.mapPartitions(new org.apache.beam.sdk.transforms.Keys$1())\n"
+        + "_.mapPartitions(new org.apache.beam.sdk.transforms.WithKeys$2())\n"
+        + "_.<org.apache.beam.sdk.io.kafka.AutoValue_KafkaIO_Write>";
+
+    SparkRunnerDebugger.DebugSparkPipelineResult result =
+        (SparkRunnerDebugger.DebugSparkPipelineResult) pipeline.run();
+
+    assertThat("Debug pipeline did not equal expected",
+        result.getDebugString(),
+        Matchers.equalTo(expectedPipeline));
+  }
+
+  private static class FormatKVFn extends DoFn<KV<String, String>, String> {
+    @SuppressWarnings("unused")
+    @ProcessElement
+    public void processElement(ProcessContext c) {
+      c.output(c.element().getKey() + "," + c.element().getValue());
+    }
+  }
+
+  private static class ArbitraryKeyFunction implements SerializableFunction<String, String> {
+    @Override
+    public String apply(String input) {
+      return "someKey";
+    }
+  }
+
+  private static class PlusOne extends SimpleFunction<KV<String, Long>, KV<String, Long>> {
+    @Override
+    public KV<String, Long> apply(KV<String, Long> input) {
+      return KV.of(input.getKey(), input.getValue() + 1);
+    }
+  }
+}


[36/50] [abbrv] beam git commit: Add README to python tarball.

Posted by ke...@apache.org.
Add README to python tarball.

And, delete test created files, to avoid them being included in the tarball.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ec6da893
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ec6da893
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ec6da893

Branch: refs/heads/gearpump-runner
Commit: ec6da893bf36e7780728d0c08d47e1e4824a66c7
Parents: 9299e26
Author: Ahmet Altay <al...@google.com>
Authored: Fri Mar 10 13:42:17 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 14:22:37 2017 -0800

----------------------------------------------------------------------
 sdks/python/MANIFEST.in | 2 ++
 sdks/python/tox.ini     | 2 ++
 2 files changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/ec6da893/sdks/python/MANIFEST.in
----------------------------------------------------------------------
diff --git a/sdks/python/MANIFEST.in b/sdks/python/MANIFEST.in
index baa2fda..57f684e 100644
--- a/sdks/python/MANIFEST.in
+++ b/sdks/python/MANIFEST.in
@@ -17,3 +17,5 @@
 
 # This file is used from Python to sync versions
 include pom.xml
+
+include README.md

http://git-wip-us.apache.org/repos/asf/beam/blob/ec6da893/sdks/python/tox.ini
----------------------------------------------------------------------
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 8d8acfa..807fe3f 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -54,6 +54,8 @@ commands =
   # Clean up all cython generated files.
   find apache_beam -type f -name '*.c' -delete
   find apache_beam -type f -name '*.so' -delete
+  find target/build -type f -name '*.c' -delete
+  find target/build -type f -name '*.so' -delete
 passenv = TRAVIS*
 
 [testenv:py27gcp]


[06/50] [abbrv] beam git commit: Runner API context helper classes.

Posted by ke...@apache.org.
Runner API context helper classes.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bc76a186
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bc76a186
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bc76a186

Branch: refs/heads/gearpump-runner
Commit: bc76a186099568ef292ceb007388ae7174150bc2
Parents: 3bb125e
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Tue Mar 7 12:04:27 2017 -0800
Committer: Robert Bradshaw <ro...@gmail.com>
Committed: Thu Mar 9 20:29:00 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/pipeline.py | 62 ++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/bc76a186/sdks/python/apache_beam/pipeline.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py
index 7db39a9..4ec2e47 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py
@@ -52,11 +52,14 @@ import os
 import shutil
 import tempfile
 
+from apache_beam import coders
 from apache_beam import pvalue
 from apache_beam import typehints
 from apache_beam.internal import pickler
 from apache_beam.runners import create_runner
 from apache_beam.runners import PipelineRunner
+from apache_beam.runners.api import beam_runner_api_pb2
+from apache_beam.transforms import core
 from apache_beam.transforms import ptransform
 from apache_beam.typehints import TypeCheckError
 from apache_beam.utils.pipeline_options import PipelineOptions
@@ -440,3 +443,62 @@ class AppliedPTransform(object):
         if v not in visited:
           visited.add(v)
           visitor.visit_value(v, self)
+
+
+class PipelineContextMap(object):
+  """This is a bi-directional map between objects and ids.
+
+  Under the hood it encodes and decodes these objects into runner API
+  representations.
+  """
+  def __init__(self, context, obj_type, proto_map=None):
+    self._pipeline_context = context
+    self._obj_type = obj_type
+    self._obj_to_id = {}
+    self._id_to_obj = {}
+    self._id_to_proto = proto_map if proto_map else {}
+    self._counter = 0
+
+  def _unique_ref(self):
+    self._counter += 1
+    return "ref_%s_%s" % (self._obj_type.__name__, self._counter)
+
+  def populate_map(self, proto_map):
+    for id, obj in self._id_to_obj:
+      proto_map[id] = self._id_to_proto[id]
+
+  def get_id(self, obj):
+    if obj not in self._obj_to_id:
+      id = self._unique_ref()
+      self._id_to_obj[id] = obj
+      self._obj_to_id[obj] = id
+      self._id_to_proto[id] = obj.to_runner_api(self._pipeline_context)
+    return self._obj_to_id[obj]
+
+  def get_by_id(self, id):
+    if id not in self._id_to_obj:
+      self._id_to_obj[id] = self._obj_type.from_runner_api(
+        self._id_to_proto[id], self._pipeline_context)
+    return self._id_to_obj[id]
+
+
+class PipelineContext(object):
+
+  _COMPONENT_TYPES = {
+    'transforms': AppliedPTransform,
+    'pcollections': pvalue.PCollection,
+    'coders': coders.Coder,
+    'windowing_strategies': core.Windowing,
+    # TODO: environment
+  }
+
+  def __init__(self, context_proto=None):
+    for name, cls in self._COMPONENT_TYPES.items():
+      setattr(self, name,
+              PipelineContextMap(self, cls, getattr(context_proto, name, None)))
+
+  def to_runner_api(self):
+    context_proto = beam_runner_api_pb2.Components()
+    for name, cls in self._COMPONENT_TYEPS:
+      getattr(self, name).populate_map(getattr(context_proto, name))
+    return context_proto


[02/50] [abbrv] beam git commit: HadoopInputFormatIO with junits

Posted by ke...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIOTest.java b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIOTest.java
new file mode 100644
index 0000000..c25cf51
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIOTest.java
@@ -0,0 +1,844 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;
+
+import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.beam.sdk.Pipeline.PipelineExecutionException;
+import org.apache.beam.sdk.coders.AvroCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
+import org.apache.beam.sdk.io.hadoop.WritableCoder;
+import org.apache.beam.sdk.io.hadoop.inputformat.EmployeeInputFormat.EmployeeRecordReader;
+import org.apache.beam.sdk.io.hadoop.inputformat.EmployeeInputFormat.NewObjectsEmployeeInputSplit;
+import org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO.HadoopInputFormatBoundedSource;
+import org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO.SerializableConfiguration;
+import org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO.SerializableSplit;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+
+/**
+ * Unit tests for {@link HadoopInputFormatIO}.
+ */
+@RunWith(JUnit4.class)
+public class HadoopInputFormatIOTest {
+  static SerializableConfiguration serConf;
+  static SimpleFunction<Text, String> myKeyTranslate;
+  static SimpleFunction<Employee, String> myValueTranslate;
+
+  @Rule public final transient TestPipeline p = TestPipeline.create();
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  private PBegin input = PBegin.in(p);
+
+  @BeforeClass
+  public static void setUp() throws IOException, InterruptedException {
+    serConf = loadTestConfiguration(
+                  EmployeeInputFormat.class,
+                  Text.class,
+                  Employee.class);
+    myKeyTranslate = new SimpleFunction<Text, String>() {
+      @Override
+      public String apply(Text input) {
+        return input.toString();
+      }
+    };
+    myValueTranslate = new SimpleFunction<Employee, String>() {
+      @Override
+      public String apply(Employee input) {
+        return input.getEmpName() + "_" + input.getEmpAddress();
+      }
+    };
+  }
+
+  @Test
+  public void testReadBuildsCorrectly() {
+    HadoopInputFormatIO.Read<String, String> read = HadoopInputFormatIO.<String, String>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withKeyTranslation(myKeyTranslate)
+        .withValueTranslation(myValueTranslate);
+    assertEquals(serConf.getHadoopConfiguration(),
+        read.getConfiguration().getHadoopConfiguration());
+    assertEquals(myKeyTranslate, read.getKeyTranslationFunction());
+    assertEquals(myValueTranslate, read.getValueTranslationFunction());
+    assertEquals(myValueTranslate.getOutputTypeDescriptor(), read.getValueTypeDescriptor());
+    assertEquals(myKeyTranslate.getOutputTypeDescriptor(), read.getKeyTypeDescriptor());
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} builds correctly in different order
+   * of with configuration/key translation/value translation. This test also validates output
+   * PCollection key/value classes are set correctly even if Hadoop configuration is set after
+   * setting key/value translation.
+   */
+  @Test
+  public void testReadBuildsCorrectlyInDifferentOrder() {
+    HadoopInputFormatIO.Read<String, String> read =
+        HadoopInputFormatIO.<String, String>read()
+            .withValueTranslation(myValueTranslate)
+            .withConfiguration(serConf.getHadoopConfiguration())
+            .withKeyTranslation(myKeyTranslate);
+    assertEquals(serConf.getHadoopConfiguration(),
+        read.getConfiguration().getHadoopConfiguration());
+    assertEquals(myKeyTranslate, read.getKeyTranslationFunction());
+    assertEquals(myValueTranslate, read.getValueTranslationFunction());
+    assertEquals(myKeyTranslate.getOutputTypeDescriptor(), read.getKeyTypeDescriptor());
+    assertEquals(myValueTranslate.getOutputTypeDescriptor(), read.getValueTypeDescriptor());
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} object creation if
+   * {@link HadoopInputFormatIO.Read#withConfiguration() withConfiguration()} is called more than
+   * once.
+   * @throws InterruptedException
+   * @throws IOException
+   */
+  @Test
+  public void testReadBuildsCorrectlyIfWithConfigurationIsCalledMoreThanOneTime()
+      throws IOException, InterruptedException {
+    SerializableConfiguration diffConf =
+        loadTestConfiguration(
+            EmployeeInputFormat.class,
+            Employee.class,
+            Text.class);
+    HadoopInputFormatIO.Read<String, String> read = HadoopInputFormatIO.<String, String>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withKeyTranslation(myKeyTranslate)
+        .withConfiguration(diffConf.getHadoopConfiguration());
+    assertEquals(diffConf.getHadoopConfiguration(),
+        read.getConfiguration().getHadoopConfiguration());
+    assertEquals(myKeyTranslate, read.getKeyTranslationFunction());
+    assertEquals(null, read.getValueTranslationFunction());
+    assertEquals(myKeyTranslate.getOutputTypeDescriptor(), read.getKeyTypeDescriptor());
+    assertEquals(diffConf.getHadoopConfiguration().getClass("value.class", Object.class), read
+        .getValueTypeDescriptor().getRawType());
+  }
+
+  /**
+   * This test validates functionality of {@link HadoopInputFormatIO.Read#populateDisplayData()
+   * populateDisplayData()}.
+   */
+  @Test
+  public void testReadDisplayData() {
+    HadoopInputFormatIO.Read<String, String> read = HadoopInputFormatIO.<String, String>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withKeyTranslation(myKeyTranslate)
+        .withValueTranslation(myValueTranslate);
+    DisplayData displayData = DisplayData.from(read);
+    Iterator<Entry<String, String>> propertyElement = serConf.getHadoopConfiguration().iterator();
+    while (propertyElement.hasNext()) {
+      Entry<String, String> element = propertyElement.next();
+      assertThat(displayData, hasDisplayItem(element.getKey(), element.getValue()));
+    }
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} transform object creation fails with
+   * null configuration. {@link HadoopInputFormatIO.Read#withConfiguration() withConfiguration()}
+   * method checks configuration is null and throws exception if it is null.
+   */
+  @Test
+  public void testReadObjectCreationFailsIfConfigurationIsNull() {
+    thrown.expect(NullPointerException.class);
+    HadoopInputFormatIO.<Text, Employee>read()
+          .withConfiguration(null);
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} transform object creation with only
+   * configuration.
+   */
+  @Test
+  public void testReadObjectCreationWithConfiguration() {
+    HadoopInputFormatIO.Read<Text, Employee> read = HadoopInputFormatIO.<Text, Employee>read()
+        .withConfiguration(serConf.getHadoopConfiguration());
+    assertEquals(serConf.getHadoopConfiguration(),
+        read.getConfiguration().getHadoopConfiguration());
+    assertEquals(null, read.getKeyTranslationFunction());
+    assertEquals(null, read.getValueTranslationFunction());
+    assertEquals(serConf.getHadoopConfiguration().getClass("key.class", Object.class), read
+        .getKeyTypeDescriptor().getRawType());
+    assertEquals(serConf.getHadoopConfiguration().getClass("value.class", Object.class), read
+        .getValueTypeDescriptor().getRawType());
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} transform object creation fails with
+   * configuration and null key translation. {@link HadoopInputFormatIO.Read#withKeyTranslation()
+   * withKeyTranslation()} checks keyTranslation is null and throws exception if it null value is
+   * passed.
+   */
+  @Test
+  public void testReadObjectCreationFailsIfKeyTranslationFunctionIsNull() {
+    thrown.expect(NullPointerException.class);
+    HadoopInputFormatIO.<String, Employee>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withKeyTranslation(null);
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} transform object creation with
+   * configuration and key translation.
+   */
+  @Test
+  public void testReadObjectCreationWithConfigurationKeyTranslation() {
+    HadoopInputFormatIO.Read<String, Employee> read = HadoopInputFormatIO.<String, Employee>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withKeyTranslation(myKeyTranslate);
+    assertEquals(serConf.getHadoopConfiguration(),
+        read.getConfiguration().getHadoopConfiguration());
+    assertEquals(myKeyTranslate, read.getKeyTranslationFunction());
+    assertEquals(null, read.getValueTranslationFunction());
+    assertEquals(myKeyTranslate.getOutputTypeDescriptor().getRawType(),
+        read.getKeyTypeDescriptor().getRawType());
+    assertEquals(serConf.getHadoopConfiguration().getClass("value.class", Object.class),
+        read.getValueTypeDescriptor().getRawType());
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} transform object creation fails with
+   * configuration and null value translation.
+   * {@link HadoopInputFormatIO.Read#withValueTranslation() withValueTranslation()} checks
+   * valueTranslation is null and throws exception if null value is passed.
+   */
+  @Test
+  public void testReadObjectCreationFailsIfValueTranslationFunctionIsNull() {
+    thrown.expect(NullPointerException.class);
+    HadoopInputFormatIO.<Text, String>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withValueTranslation(null);
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} transform object creation with
+   * configuration and value translation.
+   */
+  @Test
+  public void testReadObjectCreationWithConfigurationValueTranslation() {
+    HadoopInputFormatIO.Read<Text, String> read = HadoopInputFormatIO.<Text, String>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withValueTranslation(myValueTranslate);
+    assertEquals(serConf.getHadoopConfiguration(),
+        read.getConfiguration().getHadoopConfiguration());
+    assertEquals(null, read.getKeyTranslationFunction());
+    assertEquals(myValueTranslate, read.getValueTranslationFunction());
+    assertEquals(serConf.getHadoopConfiguration().getClass("key.class", Object.class),
+        read.getKeyTypeDescriptor().getRawType());
+    assertEquals(myValueTranslate.getOutputTypeDescriptor().getRawType(),
+        read.getValueTypeDescriptor().getRawType());
+  }
+
+  /**
+   * This test validates {@link HadoopInputFormatIO.Read Read} transform object creation with
+   * configuration, key translation and value translation.
+   */
+  @Test
+  public void testReadObjectCreationWithConfigurationKeyTranslationValueTranslation() {
+    HadoopInputFormatIO.Read<String, String> read = HadoopInputFormatIO.<String, String>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withKeyTranslation(myKeyTranslate)
+        .withValueTranslation(myValueTranslate);
+    assertEquals(serConf.getHadoopConfiguration(),
+        read.getConfiguration().getHadoopConfiguration());
+    assertEquals(myKeyTranslate, read.getKeyTranslationFunction());
+    assertEquals(myValueTranslate, read.getValueTranslationFunction());
+    assertEquals(myKeyTranslate.getOutputTypeDescriptor().getRawType(),
+        read.getKeyTypeDescriptor().getRawType());
+    assertEquals(myValueTranslate.getOutputTypeDescriptor().getRawType(),
+        read.getValueTypeDescriptor().getRawType());
+  }
+
+  /**
+   * This test validates functionality of {@link HadoopInputFormatIO.Read#validate()
+   * Read.validate()} function when Read transform is created without calling
+   * {@link HadoopInputFormatIO.Read#withConfiguration() withConfiguration()}.
+   */
+  @Test
+  public void testReadValidationFailsMissingConfiguration() {
+    HadoopInputFormatIO.Read<String, String> read = HadoopInputFormatIO.<String, String>read();
+    thrown.expect(NullPointerException.class);
+    read.validate(input);
+  }
+
+  /**
+   * This test validates functionality of {@link HadoopInputFormatIO.Read#withConfiguration()
+   * withConfiguration()} function when Hadoop InputFormat class is not provided by the user in
+   * configuration.
+   */
+  @Test
+  public void testReadValidationFailsMissingInputFormatInConf() {
+    Configuration configuration = new Configuration();
+    configuration.setClass("key.class", Text.class, Object.class);
+    configuration.setClass("value.class", Employee.class, Object.class);
+    thrown.expect(NullPointerException.class);
+    HadoopInputFormatIO.<Text, Employee>read()
+        .withConfiguration(configuration);
+  }
+
+  /**
+   * This test validates functionality of {@link HadoopInputFormatIO.Read#withConfiguration()
+   * withConfiguration()} function when key class is not provided by the user in configuration.
+   */
+  @Test
+  public void testReadValidationFailsMissingKeyClassInConf() {
+    Configuration configuration = new Configuration();
+    configuration.setClass("mapreduce.job.inputformat.class", EmployeeInputFormat.class,
+        InputFormat.class);
+    configuration.setClass("value.class", Employee.class, Object.class);
+    thrown.expect(NullPointerException.class);
+    HadoopInputFormatIO.<Text, Employee>read()
+        .withConfiguration(configuration);
+  }
+
+  /**
+   * This test validates functionality of {@link HadoopInputFormatIO.Read#withConfiguration()
+   * withConfiguration()} function when value class is not provided by the user in configuration.
+   */
+  @Test
+  public void testReadValidationFailsMissingValueClassInConf() {
+    Configuration configuration = new Configuration();
+    configuration.setClass("mapreduce.job.inputformat.class", EmployeeInputFormat.class,
+        InputFormat.class);
+    configuration.setClass("key.class", Text.class, Object.class);
+    thrown.expect(NullPointerException.class);
+    HadoopInputFormatIO.<Text, Employee>read().withConfiguration(configuration);
+  }
+
+  /**
+   * This test validates functionality of {@link HadoopInputFormatIO.Read#validate()
+   * Read.validate()} function when myKeyTranslate's (simple function provided by user for key
+   * translation) input type is not same as Hadoop InputFormat's keyClass(Which is property set in
+   * configuration as "key.class").
+   */
+  @Test
+  public void testReadValidationFailsWithWrongInputTypeKeyTranslationFunction() {
+    SimpleFunction<LongWritable, String> myKeyTranslateWithWrongInputType =
+        new SimpleFunction<LongWritable, String>() {
+          @Override
+          public String apply(LongWritable input) {
+            return input.toString();
+          }
+        };
+    HadoopInputFormatIO.Read<String, Employee> read = HadoopInputFormatIO.<String, Employee>read()
+        .withConfiguration(serConf.getHadoopConfiguration())
+        .withKeyTranslation(myKeyTranslateWithWrongInputType);
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(String.format(
+        "Key translation's input type is not same as hadoop InputFormat : %s key " + "class : %s",
+        serConf.getHadoopConfiguration().getClass("mapreduce.job.inputformat.class",
+            InputFormat.class), serConf.getHadoopConfiguration()
+            .getClass("key.class", Object.class)));
+    read.validate(input);
+  }
+
+  /**
+   * This test validates functionality of {@link HadoopInputFormatIO.Read#validate()
+   * Read.validate()} function when myValueTranslate's (simple function provided by user for value
+   * translation) input type is not same as Hadoop InputFormat's valueClass(Which is property set in
+   * configuration as "value.class").
+   */
+  @Test
+  public void testReadValidationFailsWithWrongInputTypeValueTranslationFunction() {
+    SimpleFunction<LongWritable, String> myValueTranslateWithWrongInputType =
+        new SimpleFunction<LongWritable, String>() {
+          @Override
+          public String apply(LongWritable input) {
+            return input.toString();
+          }
+        };
+    HadoopInputFormatIO.Read<Text, String> read =
+        HadoopInputFormatIO.<Text, String>read()
+            .withConfiguration(serConf.getHadoopConfiguration())
+            .withValueTranslation(myValueTranslateWithWrongInputType);
+    String expectedMessage =
+        String.format(
+            "Value translation's input type is not same as hadoop InputFormat :  "
+                + "%s value class : %s",
+            serConf.getHadoopConfiguration().getClass("mapreduce.job.inputformat.class",
+                InputFormat.class),
+            serConf.getHadoopConfiguration().getClass("value.class", Object.class));
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(expectedMessage);
+    read.validate(input);
+  }
+
+  /**
+   * This test validates reading from Hadoop InputFormat if wrong key class is set in
+   * configuration.
+   */
+  @Test
+  public void testReadFailsWithWrongKeyClass() {
+    SerializableConfiguration wrongConf = loadTestConfiguration(
+       EmployeeInputFormat.class,
+       MapWritable.class, // Actual key class is Text.class.
+       Employee.class);
+    HadoopInputFormatIO.Read<Text, String> read = HadoopInputFormatIO.<Text, String>read()
+        .withConfiguration(wrongConf.getHadoopConfiguration());
+    String expectedMessage =
+        String.format("java.lang.IllegalArgumentException: " + "Wrong InputFormat key class in "
+            + "configuration : Expected key.class is %s but was %s.", Text.class.getName(),
+            MapWritable.class.getName());
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectMessage(expectedMessage);
+    p.apply("ReadTest", read);
+    p.run();
+  }
+
+  /**
+   * This test validates reading from Hadoop InputFormat if wrong value class is set in
+   * configuration.
+   */
+  @Test
+  public void testReadFailsWithWrongValueClass() {
+    SerializableConfiguration wrongConf = loadTestConfiguration(
+       EmployeeInputFormat.class,
+       Text.class,
+       MapWritable.class); // Actual value class is Employee.class.
+    HadoopInputFormatIO.Read<Text, MapWritable> read = HadoopInputFormatIO.<Text, MapWritable>read()
+        .withConfiguration(wrongConf.getHadoopConfiguration());
+    String expectedMessage =
+        String.format("java.lang.IllegalArgumentException: "
+            + "Wrong InputFormat value class in configuration : "
+            + "Expected value.class is %s but was %s.", Employee.class.getName(),
+            MapWritable.class.getName());
+    thrown.expect(PipelineExecutionException.class);
+    thrown.expectMessage(expectedMessage);
+    p.apply("ReadTest", read);
+    p.run();
+  }
+
+  @Test
+  public void testReadingData() throws Exception {
+    HadoopInputFormatIO.Read<Text, Employee> read = HadoopInputFormatIO.<Text, Employee>read()
+        .withConfiguration(serConf.getHadoopConfiguration());
+    List<KV<Text, Employee>> expected = TestEmployeeDataSet.getEmployeeData();
+    PCollection<KV<Text, Employee>> actual = p.apply("ReadTest", read);
+    PAssert.that(actual).containsInAnyOrder(expected);
+    p.run();
+  }
+
+  /**
+   * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object
+   * creation fails.
+   */
+  @Test
+  public void testReadIfCreateRecordReaderFails() throws Exception {
+    thrown.expect(Exception.class);
+    thrown.expectMessage("Exception in creating RecordReader");
+    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
+    Mockito.when(
+        mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
+            Mockito.any(TaskAttemptContext.class))).thenThrow(
+        new IOException("Exception in creating RecordReader"));
+    HadoopInputFormatBoundedSource<Text, Employee> boundedSource =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            new SerializableSplit());
+    boundedSource.setInputFormatObj(mockInputFormat);
+    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
+  }
+
+  /**
+   * This test validates behavior of HadoopInputFormatSource if
+   * {@link InputFormat#createRecordReader() createRecordReader()} of InputFormat returns null.
+   */
+  @Test
+  public void testReadWithNullCreateRecordReader() throws Exception {
+    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
+    thrown.expect(IOException.class);
+    thrown.expectMessage(String.format("Null RecordReader object returned by %s",
+            mockInputFormat.getClass()));
+    Mockito.when(
+        mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
+            Mockito.any(TaskAttemptContext.class))).thenReturn(null);
+    HadoopInputFormatBoundedSource<Text, Employee> boundedSource =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            new SerializableSplit());
+    boundedSource.setInputFormatObj(mockInputFormat);
+    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
+  }
+
+  /**
+   * This test validates behavior of
+   * {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if
+   * InputFormat's {@link InputFormat#getSplits() getSplits()} returns InputSplitList having zero
+   * records.
+   */
+  @Test
+  public void testReadersStartWhenZeroRecords() throws Exception {
+    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
+    EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
+    Mockito.when(
+        mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
+            Mockito.any(TaskAttemptContext.class))).thenReturn(mockReader);
+    Mockito.when(mockReader.nextKeyValue()).thenReturn(false);
+    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
+    HadoopInputFormatBoundedSource<Text, Employee> boundedSource =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            new SerializableSplit(mockInputSplit));
+    BoundedReader<KV<Text, Employee>> boundedReader = boundedSource.createReader(p.getOptions());
+    assertEquals(false, boundedReader.start());
+    assertEquals(Double.valueOf(1), boundedReader.getFractionConsumed());
+  }
+
+  /**
+   * This test validates the method getFractionConsumed()- which indicates the progress of the read
+   * in range of 0 to 1.
+   */
+  @Test
+  public void testReadersGetFractionConsumed() throws Exception {
+    List<KV<Text, Employee>> referenceRecords = TestEmployeeDataSet.getEmployeeData();
+    HadoopInputFormatBoundedSource<Text, Employee> hifSource = getTestHIFSource(
+        EmployeeInputFormat.class,
+        Text.class,
+        Employee.class,
+        WritableCoder.of(Text.class),
+        AvroCoder.of(Employee.class));
+    long estimatedSize = hifSource.getEstimatedSizeBytes(p.getOptions());
+    // Validate if estimated size is equal to the size of records.
+    assertEquals(referenceRecords.size(), estimatedSize);
+    List<BoundedSource<KV<Text, Employee>>> boundedSourceList =
+        hifSource.splitIntoBundles(0, p.getOptions());
+    // Validate if splitIntoBundles() has split correctly.
+    assertEquals(TestEmployeeDataSet.NUMBER_OF_SPLITS, boundedSourceList.size());
+    List<KV<Text, Employee>> bundleRecords = new ArrayList<>();
+    for (BoundedSource<KV<Text, Employee>> source : boundedSourceList) {
+      List<KV<Text, Employee>> elements = new ArrayList<KV<Text, Employee>>();
+      BoundedReader<KV<Text, Employee>> reader = source.createReader(p.getOptions());
+      float recordsRead = 0;
+      // When start is not called, getFractionConsumed() should return 0.
+      assertEquals(Double.valueOf(0), reader.getFractionConsumed());
+      boolean start = reader.start();
+      assertEquals(true, start);
+      if (start) {
+        elements.add(reader.getCurrent());
+        boolean advance = reader.advance();
+        // Validate if getFractionConsumed() returns the correct fraction based on
+        // the number of records read in the split.
+        assertEquals(
+            Double.valueOf(++recordsRead / TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT),
+            reader.getFractionConsumed());
+        assertEquals(true, advance);
+        while (advance) {
+          elements.add(reader.getCurrent());
+          advance = reader.advance();
+          assertEquals(
+              Double.valueOf(++recordsRead / TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT),
+              reader.getFractionConsumed());
+        }
+        bundleRecords.addAll(elements);
+      }
+      // Validate if getFractionConsumed() returns 1 after reading is complete.
+      assertEquals(Double.valueOf(1), reader.getFractionConsumed());
+      reader.close();
+    }
+    assertThat(bundleRecords, containsInAnyOrder(referenceRecords.toArray()));
+  }
+
+  /**
+   * This test validates that reader and its parent source reads the same records.
+   */
+  @Test
+  public void testReaderAndParentSourceReadsSameData() throws Exception {
+    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
+    HadoopInputFormatBoundedSource<Text, Employee> boundedSource =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            new SerializableSplit(mockInputSplit));
+    BoundedReader<KV<Text, Employee>> reader = boundedSource
+        .createReader(p.getOptions());
+    SourceTestUtils.assertUnstartedReaderReadsSameAsItsSource(reader, p.getOptions());
+  }
+
+  /**
+   * This test verifies that the method
+   * {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#getCurrentSource()
+   * getCurrentSource()} returns correct source object.
+   */
+  @Test
+  public void testGetCurrentSourceFunction() throws Exception {
+    SerializableSplit split = new SerializableSplit();
+    BoundedSource<KV<Text, Employee>> source =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            split);
+    BoundedReader<KV<Text, Employee>> hifReader = source.createReader(p.getOptions());
+    BoundedSource<KV<Text, Employee>> hifSource = hifReader.getCurrentSource();
+    assertEquals(hifSource, source);
+  }
+
+  /**
+   * This test validates behavior of {@link HadoopInputFormatBoundedSource#createReader()
+   * createReader()} method when {@link HadoopInputFormatBoundedSource#splitIntoBundles()
+   * splitIntoBundles()} is not called.
+   */
+  @Test
+  public void testCreateReaderIfSplitIntoBundlesNotCalled() throws Exception {
+    HadoopInputFormatBoundedSource<Text, Employee> hifSource = getTestHIFSource(
+        EmployeeInputFormat.class,
+        Text.class,
+        Employee.class,
+        WritableCoder.of(Text.class),
+        AvroCoder.of(Employee.class));
+    thrown.expect(IOException.class);
+    thrown.expectMessage("Cannot create reader as source is not split yet.");
+    hifSource.createReader(p.getOptions());
+  }
+
+  /**
+   * This test validates behavior of
+   * {@link HadoopInputFormatBoundedSource#computeSplitsIfNecessary() computeSplits()} when Hadoop
+   * InputFormat's {@link InputFormat#getSplits() getSplits()} returns empty list.
+   */
+  @Test
+  public void testComputeSplitsIfGetSplitsReturnsEmptyList() throws Exception {
+    InputFormat<?, ?> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
+    SerializableSplit mockInputSplit = Mockito.mock(SerializableSplit.class);
+    Mockito.when(mockInputFormat.getSplits(Mockito.any(JobContext.class))).thenReturn(
+        new ArrayList<InputSplit>());
+    HadoopInputFormatBoundedSource<Text, Employee> hifSource =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            mockInputSplit);
+    thrown.expect(IOException.class);
+    thrown.expectMessage("Error in computing splits, getSplits() returns a empty list");
+    hifSource.setInputFormatObj(mockInputFormat);
+    hifSource.computeSplitsIfNecessary();
+  }
+
+  /**
+   * This test validates behavior of
+   * {@link HadoopInputFormatBoundedSource#computeSplitsIfNecessary() computeSplits()} when Hadoop
+   * InputFormat's {@link InputFormat#getSplits() getSplits()} returns NULL value.
+   */
+  @Test
+  public void testComputeSplitsIfGetSplitsReturnsNullValue() throws Exception {
+    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
+    SerializableSplit mockInputSplit = Mockito.mock(SerializableSplit.class);
+    Mockito.when(mockInputFormat.getSplits(Mockito.any(JobContext.class))).thenReturn(null);
+    HadoopInputFormatBoundedSource<Text, Employee> hifSource =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            mockInputSplit);
+    thrown.expect(IOException.class);
+    thrown.expectMessage("Error in computing splits, getSplits() returns null.");
+    hifSource.setInputFormatObj(mockInputFormat);
+    hifSource.computeSplitsIfNecessary();
+  }
+
+  /**
+   * This test validates behavior of
+   * {@link HadoopInputFormatBoundedSource#computeSplitsIfNecessary() computeSplits()} if Hadoop
+   * InputFormat's {@link InputFormat#getSplits() getSplits()} returns InputSplit list having some
+   * null values.
+   */
+  @Test
+  public void testComputeSplitsIfGetSplitsReturnsListHavingNullValues() throws Exception {
+    // InputSplit list having null value.
+    InputSplit mockInputSplit =
+        Mockito.mock(InputSplit.class, Mockito.withSettings().extraInterfaces(Writable.class));
+    List<InputSplit> inputSplitList = new ArrayList<InputSplit>();
+    inputSplitList.add(mockInputSplit);
+    inputSplitList.add(null);
+    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
+    Mockito.when(mockInputFormat.getSplits(Mockito.any(JobContext.class))).thenReturn(
+        inputSplitList);
+    HadoopInputFormatBoundedSource<Text, Employee> hifSource =
+        new HadoopInputFormatBoundedSource<Text, Employee>(
+            serConf,
+            WritableCoder.of(Text.class),
+            AvroCoder.of(Employee.class),
+            null, // No key translation required.
+            null, // No value translation required.
+            new SerializableSplit());
+    thrown.expect(IOException.class);
+    thrown.expectMessage("Error in computing splits, split is null in InputSplits list populated "
+        + "by getSplits() : ");
+    hifSource.setInputFormatObj(mockInputFormat);
+    hifSource.computeSplitsIfNecessary();
+  }
+
+  /**
+   * This test validates records emitted in PCollection are immutable if InputFormat's recordReader
+   * returns same objects(i.e. same locations in memory) but with updated values for each record.
+   */
+  @Test
+  public void testImmutablityOfOutputOfReadIfRecordReaderObjectsAreMutable() throws Exception {
+    List<BoundedSource<KV<Text, Employee>>> boundedSourceList = getBoundedSourceList(
+       ReuseObjectsEmployeeInputFormat.class,
+       Text.class,
+       Employee.class,
+       WritableCoder.of(Text.class),
+       AvroCoder.of(Employee.class));
+    List<KV<Text, Employee>> bundleRecords = new ArrayList<>();
+    for (BoundedSource<KV<Text, Employee>> source : boundedSourceList) {
+      List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
+      bundleRecords.addAll(elems);
+    }
+    List<KV<Text, Employee>> referenceRecords = TestEmployeeDataSet.getEmployeeData();
+    assertThat(bundleRecords, containsInAnyOrder(referenceRecords.toArray()));
+  }
+
+  /**
+   * Test reading if InputFormat implements {@link org.apache.hadoop.conf.Configurable
+   * Configurable}.
+   */
+  @Test
+  public void testReadingWithConfigurableInputFormat() throws Exception {
+    List<BoundedSource<KV<Text, Employee>>> boundedSourceList = getBoundedSourceList(
+        ConfigurableEmployeeInputFormat.class,
+        Text.class,
+        Employee.class,
+        WritableCoder.of(Text.class),
+        AvroCoder.of(Employee.class));
+    for (BoundedSource<KV<Text, Employee>> source : boundedSourceList) {
+      // Cast to HadoopInputFormatBoundedSource to access getInputFormat().
+      @SuppressWarnings("unchecked")
+      HadoopInputFormatBoundedSource<Text, Employee> hifSource =
+          (HadoopInputFormatBoundedSource<Text, Employee>) source;
+      hifSource.createInputFormatInstance();
+      ConfigurableEmployeeInputFormat inputFormatObj =
+          (ConfigurableEmployeeInputFormat) hifSource.getInputFormat();
+      assertEquals(true, inputFormatObj.isConfSet);
+    }
+  }
+
+  /**
+   * This test validates records emitted in PCollection are immutable if InputFormat's
+   * {@link org.apache.hadoop.mapreduce.RecordReader RecordReader} returns different objects (i.e.
+   * different locations in memory).
+   */
+  @Test
+  public void testImmutablityOfOutputOfReadIfRecordReaderObjectsAreImmutable() throws Exception {
+   List<BoundedSource<KV<Text, Employee>>> boundedSourceList = getBoundedSourceList(
+       EmployeeInputFormat.class,
+       Text.class,
+       Employee.class,
+       WritableCoder.of(Text.class),
+       AvroCoder.of(Employee.class));
+    List<KV<Text, Employee>> bundleRecords = new ArrayList<>();
+    for (BoundedSource<KV<Text, Employee>> source : boundedSourceList) {
+      List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
+      bundleRecords.addAll(elems);
+    }
+    List<KV<Text, Employee>> referenceRecords = TestEmployeeDataSet.getEmployeeData();
+    assertThat(bundleRecords, containsInAnyOrder(referenceRecords.toArray()));
+  }
+
+  private static SerializableConfiguration loadTestConfiguration(Class<?> inputFormatClassName,
+      Class<?> keyClass, Class<?> valueClass) {
+    Configuration conf = new Configuration();
+    conf.setClass("mapreduce.job.inputformat.class", inputFormatClassName, InputFormat.class);
+    conf.setClass("key.class", keyClass, Object.class);
+    conf.setClass("value.class", valueClass, Object.class);
+    return new SerializableConfiguration(conf);
+  }
+
+  private <K, V> HadoopInputFormatBoundedSource<K, V> getTestHIFSource(
+      Class<?> inputFormatClass,
+      Class<K> inputFormatKeyClass,
+      Class<V> inputFormatValueClass,
+      Coder<K> keyCoder,
+      Coder<V> valueCoder){
+    SerializableConfiguration serConf =
+        loadTestConfiguration(
+            inputFormatClass,
+            inputFormatKeyClass,
+            inputFormatValueClass);
+    return new HadoopInputFormatBoundedSource<K, V>(
+            serConf,
+            keyCoder,
+            valueCoder,
+            null, // No key translation required.
+            null); // No value translation required.
+  }
+
+  private <K, V> List<BoundedSource<KV<K, V>>> getBoundedSourceList(
+      Class<?> inputFormatClass,
+      Class<K> inputFormatKeyClass,
+      Class<V> inputFormatValueClass,
+      Coder<K> keyCoder,
+      Coder<V> valueCoder) throws Exception{
+    HadoopInputFormatBoundedSource<K, V> boundedSource = getTestHIFSource(
+        inputFormatClass,
+        inputFormatKeyClass,
+        inputFormatValueClass,
+        keyCoder,
+        valueCoder);
+    return boundedSource.splitIntoBundles(0, p.getOptions());
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ReuseObjectsEmployeeInputFormat.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ReuseObjectsEmployeeInputFormat.java b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ReuseObjectsEmployeeInputFormat.java
new file mode 100644
index 0000000..fbe74ec
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ReuseObjectsEmployeeInputFormat.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * This is a valid InputFormat for reading employee data which is available in the form of
+ * {@code List<KV>} as {@linkplain ReuseObjectsEmployeeRecordReader#employeeDataList
+ * employeeDataList}. {@linkplain ReuseObjectsEmployeeRecordReader#employeeDataList
+ * employeeDataList} is populated using {@linkplain TestEmployeeDataSet#populateEmployeeDataNew()}.
+ *
+ * <p>{@linkplain ReuseObjectsEmployeeInputFormat} splits data into
+ * {@value TestEmployeeDataSet#NUMBER_OF_SPLITS} splits, each split having
+ * {@value TestEmployeeDataSet#NUMBER_OF_RECORDS_IN_EACH_SPLIT} records each.
+ * {@linkplain ReuseObjectsEmployeeInputFormat} reads data from
+ * {@linkplain ReuseObjectsEmployeeRecordReader#employeeDataList employeeDataList} and produces a
+ * key (employee id) of type Text and value of type {@linkplain Employee Employee}.
+ *
+ * <p>{@linkplain ReuseObjectsEmployeeInputFormat} is also input to test whether
+ * {@linkplain HadoopInputFormatIO } source returns immutable records for a scenario when
+ * RecordReader returns the same key and value objects with updating values every time it reads
+ * data.
+ */
+public class ReuseObjectsEmployeeInputFormat extends InputFormat<Text, Employee> {
+
+  public ReuseObjectsEmployeeInputFormat() {}
+
+  @Override
+  public RecordReader<Text, Employee> createRecordReader(InputSplit split,
+      TaskAttemptContext context) throws IOException, InterruptedException {
+    return new ReuseObjectsEmployeeRecordReader();
+  }
+
+  @Override
+  public List<InputSplit> getSplits(JobContext arg0) throws IOException, InterruptedException {
+    List<InputSplit> inputSplitList = new ArrayList<InputSplit>();
+    for (int i = 1; i <= TestEmployeeDataSet.NUMBER_OF_SPLITS; i++) {
+      InputSplit inputSplitObj = new ReuseEmployeeInputSplit(
+          ((i - 1) * TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT),
+          (i * TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT - 1));
+      inputSplitList.add(inputSplitObj);
+    }
+    return inputSplitList;
+  }
+
+  /**
+   * InputSplit implementation for ReuseObjectsEmployeeInputFormat.
+   */
+  public class ReuseEmployeeInputSplit extends InputSplit implements Writable {
+    // Start and end map index of each split of employeeData.
+    private long startIndex;
+    private long endIndex;
+
+    public ReuseEmployeeInputSplit() {}
+
+    public ReuseEmployeeInputSplit(long startIndex, long endIndex) {
+      this.startIndex = startIndex;
+      this.endIndex = endIndex;
+    }
+
+    /** Returns number of records in each split. */
+    @Override
+    public long getLength() throws IOException, InterruptedException {
+      return this.endIndex - this.startIndex + 1;
+    }
+
+    @Override
+    public String[] getLocations() throws IOException, InterruptedException {
+      return null;
+    }
+
+
+    public long getStartIndex() {
+      return startIndex;
+    }
+
+    public long getEndIndex() {
+      return endIndex;
+    }
+
+    @Override
+    public void readFields(DataInput dataIn) throws IOException {
+      startIndex = dataIn.readLong();
+      endIndex = dataIn.readLong();
+    }
+
+    @Override
+    public void write(DataOutput dataOut) throws IOException {
+      dataOut.writeLong(startIndex);
+      dataOut.writeLong(endIndex);
+    }
+  }
+
+  /**
+   * RecordReader for ReuseObjectsEmployeeInputFormat.
+   */
+  public class ReuseObjectsEmployeeRecordReader extends RecordReader<Text, Employee> {
+
+    private ReuseEmployeeInputSplit split;
+    private Text currentKey = new Text();
+    private Employee currentValue = new Employee();
+    private long employeeListIndex = 0L;
+    private long recordsRead = 0L;
+    private List<KV<String, String>> employeeDataList;
+
+    public ReuseObjectsEmployeeRecordReader() {}
+
+    @Override
+    public void close() throws IOException {}
+
+    @Override
+    public Text getCurrentKey() throws IOException, InterruptedException {
+      return currentKey;
+    }
+
+    @Override
+    public Employee getCurrentValue() throws IOException, InterruptedException {
+      return currentValue;
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      return (float) recordsRead / split.getLength();
+    }
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext arg1)
+        throws IOException, InterruptedException {
+      this.split = (ReuseEmployeeInputSplit) split;
+      employeeListIndex = this.split.getStartIndex() - 1;
+      recordsRead = 0;
+      employeeDataList = TestEmployeeDataSet.populateEmployeeData();
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      if ((recordsRead++) >= split.getLength()) {
+        return false;
+      }
+      employeeListIndex++;
+      KV<String, String> employeeDetails = employeeDataList.get((int) employeeListIndex);
+      String empData[] = employeeDetails.getValue().split("_");
+      // Updating the same key and value objects with new employee data.
+      currentKey.set(employeeDetails.getKey());
+      currentValue.setEmpName(empData[0]);
+      currentValue.setEmpAddress(empData[1]);
+      return true;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/TestEmployeeDataSet.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/TestEmployeeDataSet.java b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/TestEmployeeDataSet.java
new file mode 100644
index 0000000..4a8fe95
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/TestEmployeeDataSet.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.io.Text;
+/**
+ * Test Utils used in {@link EmployeeInputFormat} and {@link ReuseObjectsEmployeeInputFormat} for
+ * computing splits.
+ */
+public class TestEmployeeDataSet {
+  public static final long NUMBER_OF_RECORDS_IN_EACH_SPLIT = 5L;
+  public static final long NUMBER_OF_SPLITS = 3L;
+  private static final List<KV<String, String>> data = new ArrayList<KV<String, String>>();
+
+  /**
+   * Returns List of employee details. Employee details are available in the form of {@link KV} in
+   * which, key indicates employee id and value indicates employee details such as name and address
+   * separated by '_'. This is data input to {@link EmployeeInputFormat} and
+   * {@link ReuseObjectsEmployeeInputFormat}.
+   */
+  public static List<KV<String, String>> populateEmployeeData() {
+    if (!data.isEmpty()) {
+      return data;
+    }
+    data.add(KV.of("0", "Alex_US"));
+    data.add(KV.of("1", "John_UK"));
+    data.add(KV.of("2", "Tom_UK"));
+    data.add(KV.of("3", "Nick_UAE"));
+    data.add(KV.of("4", "Smith_IND"));
+    data.add(KV.of("5", "Taylor_US"));
+    data.add(KV.of("6", "Gray_UK"));
+    data.add(KV.of("7", "James_UAE"));
+    data.add(KV.of("8", "Jordan_IND"));
+    data.add(KV.of("9", "Leena_UK"));
+    data.add(KV.of("10", "Zara_UAE"));
+    data.add(KV.of("11", "Talia_IND"));
+    data.add(KV.of("12", "Rose_UK"));
+    data.add(KV.of("13", "Kelvin_UAE"));
+    data.add(KV.of("14", "Goerge_IND"));
+    return data;
+  }
+
+  /**
+   * This is a helper function used in unit tests for validating data against data read using
+   * {@link EmployeeInputFormat} and {@link ReuseObjectsEmployeeInputFormat}.
+   */
+  public static List<KV<Text, Employee>> getEmployeeData() {
+    return Lists.transform((data.isEmpty() ? populateEmployeeData() : data),
+        new Function<KV<String, String>, KV<Text, Employee>>() {
+          @Override
+          public KV<Text, Employee> apply(KV<String, String> input) {
+            String[] empData = input.getValue().split("_");
+            return KV.of(new Text(input.getKey()), new Employee(empData[0], empData[1]));
+          }
+        });
+  }
+}


[03/50] [abbrv] beam git commit: HadoopInputFormatIO with junits

Posted by ke...@apache.org.
HadoopInputFormatIO with junits


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d01620c0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d01620c0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d01620c0

Branch: refs/heads/gearpump-runner
Commit: d01620c05edd947bd1c55570f683101323cdec5d
Parents: ef256df
Author: Radhika S Kulkarni <ra...@persistent.co.in>
Authored: Mon Mar 6 19:41:45 2017 +0530
Committer: Davor Bonaci <da...@google.com>
Committed: Thu Mar 9 15:20:31 2017 -0800

----------------------------------------------------------------------
 sdks/java/io/hadoop-input-format/README.md      | 167 ++++
 sdks/java/io/hadoop-input-format/pom.xml        | 136 +++
 .../hadoop/inputformat/HadoopInputFormatIO.java | 941 +++++++++++++++++++
 .../sdk/io/hadoop/inputformat/package-info.java |  23 +
 .../ConfigurableEmployeeInputFormat.java        | 131 +++
 .../sdk/io/hadoop/inputformat/Employee.java     |  85 ++
 .../hadoop/inputformat/EmployeeInputFormat.java | 172 ++++
 .../inputformat/HadoopInputFormatIOTest.java    | 844 +++++++++++++++++
 .../ReuseObjectsEmployeeInputFormat.java        | 176 ++++
 .../hadoop/inputformat/TestEmployeeDataSet.java |  76 ++
 10 files changed, 2751 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/README.md
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/README.md b/sdks/java/io/hadoop-input-format/README.md
new file mode 100644
index 0000000..d91f019
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/README.md
@@ -0,0 +1,167 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Hadoop InputFormat IO
+
+A HadoopInputFormatIO is a Transform for reading data from any source which
+implements Hadoop InputFormat. For example- Cassandra, Elasticsearch, HBase, Redis, Postgres, etc.
+
+HadoopInputFormatIO has to make several performance trade-offs in connecting to InputFormat, so if there is another Beam IO Transform specifically for connecting to your data source of choice, we would recommend using that one, but this IO Transform allows you to connect to many data sources that do not yet have a Beam IO Transform.
+
+You will need to pass a Hadoop Configuration with parameters specifying how the read will occur. Many properties of the Configuration are optional, and some are required for certain InputFormat classes, but the following properties must be set for all InputFormats:
+
+mapreduce.job.inputformat.class: The InputFormat class used to connect to your data source of choice.
+key.class: The key class returned by the InputFormat in 'mapreduce.job.inputformat.class'.
+value.class: The value class returned by the InputFormat in 'mapreduce.job.inputformat.class'.
+
+For example:
+```java
+Configuration myHadoopConfiguration = new Configuration(false);
+// Set Hadoop InputFormat, key and value class in configuration
+myHadoopConfiguration.setClass("mapreduce.job.inputformat.class", InputFormatClass,
+  InputFormat.class);
+myHadoopConfiguration.setClass("key.class", InputFormatKeyClass, Object.class);
+myHadoopConfiguration.setClass("value.class", InputFormatValueClass, Object.class);
+```
+
+You will need to check to see if the key and value classes output by the InputFormat have a Beam Coder available. If not, You can use withKeyTranslation/withValueTranslation to specify a method transforming instances of those classes into another class that is supported by a Beam Coder. These settings are optional and you don't need to specify translation for both key and value.
+
+For example:
+```java
+SimpleFunction<InputFormatKeyClass, MyKeyClass> myOutputKeyType =
+new SimpleFunction<InputFormatKeyClass, MyKeyClass>() {
+  public MyKeyClass apply(InputFormatKeyClass input) {
+  // ...logic to transform InputFormatKeyClass to MyKeyClass
+  }
+};
+SimpleFunction<InputFormatValueClass, MyValueClass> myOutputValueType =
+new SimpleFunction<InputFormatValueClass, MyValueClass>() {
+  public MyValueClass apply(InputFormatValueClass input) {
+  // ...logic to transform InputFormatValueClass to MyValueClass
+  }
+};
+```
+
+### Reading using Hadoop InputFormat IO
+Pipeline p = ...; // Create pipeline.
+// Read data only with Hadoop configuration.
+
+```java
+p.apply("read",
+  HadoopInputFormatIO.<InputFormatKeyClass, InputFormatKeyClass>read()
+  .withConfiguration(myHadoopConfiguration);
+```
+
+// Read data with configuration and key translation (Example scenario: Beam Coder is not
+available for key class hence key translation is required.).
+
+```java
+p.apply("read",
+  HadoopInputFormatIO.<MyKeyClass, InputFormatKeyClass>read()
+  .withConfiguration(myHadoopConfiguration)
+  .withKeyTranslation(myOutputKeyType);
+```
+
+// Read data with configuration and value translation (Example scenario: Beam Coder is not
+available for value class hence value translation is required.).
+
+```java
+p.apply("read",
+  HadoopInputFormatIO.<InputFormatKeyClass, MyValueClass>read()
+  .withConfiguration(myHadoopConfiguration)
+  .withValueTranslation(myOutputValueType);
+```
+
+// Read data with configuration, value translation and key translation (Example scenario: Beam Coders are not available for both key class and value class of InputFormat hence key and value translation is required.).
+
+```java
+p.apply("read",
+  HadoopInputFormatIO.<MyKeyClass, MyValueClass>read()
+  .withConfiguration(myHadoopConfiguration)
+  .withKeyTranslation(myOutputKeyType)
+  .withValueTranslation(myOutputValueType);
+```
+
+# Examples for specific InputFormats
+
+### Cassandra - CqlInputFormat
+
+To read data from Cassandra, org.apache.cassandra.hadoop.cql3.CqlInputFormat
+CqlInputFormat can be used which needs following properties to be set.
+
+Create Cassandra Hadoop configuration as follows:
+
+```java
+Configuration cassandraConf = new Configuration();
+cassandraConf.set("cassandra.input.thrift.port", "9160");
+cassandraConf.set("cassandra.input.thrift.address", CassandraHostIp);
+cassandraConf.set("cassandra.input.partitioner.class", "Murmur3Partitioner");
+cassandraConf.set("cassandra.input.keyspace", "myKeySpace");
+cassandraConf.set("cassandra.input.columnfamily", "myColumnFamily");
+cassandraConf.setClass("key.class", java.lang.Long Long.class, Object.class);
+cassandraConf.setClass("value.class", com.datastax.driver.core.Row Row.class, Object.class);
+cassandraConf.setClass("mapreduce.job.inputformat.class", org.apache.cassandra.hadoop.cql3.CqlInputFormat CqlInputFormat.class, InputFormat.class);
+```
+
+Call Read transform as follows:
+
+```java
+PCollection<KV<Long, String>> cassandraData =
+  p.apply("read",
+  HadoopInputFormatIO.<Long, String>read()
+  .withConfiguration(cassandraConf)
+  .withValueTranslation(cassandraOutputValueType);
+```
+
+The CqlInputFormat key class is java.lang.Long Long, which has a Beam Coder. The CqlInputFormat value class is com.datastax.driver.core.Row Row, which does not have a Beam Coder. Rather than write a new coder, you can provide your own translation method as follows:
+
+```java
+SimpleFunction<Row, String> cassandraOutputValueType = SimpleFunction<Row, String>()
+{
+  public String apply(Row row) {
+    return row.getString('myColName');
+  }
+};
+```
+ 
+### Elasticsearch - EsInputFormat
+ 
+To read data from Elasticsearch, EsInputFormat can be used which needs following properties to be set.
+ 
+Create ElasticSearch Hadoop configuration as follows:
+
+```java
+Configuration elasticSearchConf = new Configuration();
+elasticSearchConf.set("es.nodes", ElasticsearchHostIp);
+elasticSearchConf.set("es.port", "9200");
+elasticSearchConf.set("es.resource", "ElasticIndexName/ElasticTypeName");
+elasticSearchConf.setClass("key.class", org.apache.hadoop.io.Text Text.class, Object.class);
+elasticSearchConf.setClass("value.class", org.elasticsearch.hadoop.mr.LinkedMapWritable LinkedMapWritable.class, Object.class);
+elasticSearchConf.setClass("mapreduce.job.inputformat.class", org.elasticsearch.hadoop.mr.EsInputFormat EsInputFormat.class, InputFormat.class);
+```
+
+Call Read transform as follows:
+
+```java
+PCollection<KV<Text, LinkedMapWritable>> elasticData = p.apply("read",
+  HadoopInputFormatIO.<Text, LinkedMapWritable>read().withConfiguration(elasticSearchConf));
+```
+
+The org.elasticsearch.hadoop.mr.EsInputFormat EsInputFormat key class is
+org.apache.hadoop.io.Text Text and value class is org.elasticsearch.hadoop.mr.LinkedMapWritable LinkedMapWritable. Both key and value classes have Beam Coders.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/pom.xml b/sdks/java/io/hadoop-input-format/pom.xml
new file mode 100644
index 0000000..6680087
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/pom.xml
@@ -0,0 +1,136 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+  xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.beam</groupId>
+    <artifactId>beam-sdks-java-io-parent</artifactId>
+    <version>0.7.0-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>beam-sdks-java-io-hadoop-input-format</artifactId>
+  <name>Apache Beam :: SDKs :: Java :: IO :: Hadoop Input Format</name>
+  <description>IO library to read data from data sources which implement Hadoop Input Format from Beam.</description>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <properties>
+    <log4j.core.version>2.6.2</log4j.core.version>
+    <hadoop.common.version>2.7.0</hadoop.common.version>
+    <findbugs.jsr305.version>3.0.1</findbugs.jsr305.version>
+    <slf4j.api.version>1.7.14</slf4j.api.version>
+    <guava.version>19.0</guava.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.api.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${findbugs.jsr305.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-io-hadoop-common</artifactId>
+    </dependency>
+
+    <!-- compile dependencies -->
+    <dependency>
+      <groupId>com.google.auto.value</groupId>
+      <artifactId>auto-value</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>${hadoop.common.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-core</artifactId>
+      <version>${hadoop.common.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- test dependencies -->
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-core</artifactId>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-runners-direct-java</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+      <version>${log4j.core.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIO.java b/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIO.java
new file mode 100644
index 0000000..3b786fb
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/HadoopInputFormatIO.java
@@ -0,0 +1,941 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.util.concurrent.AtomicDouble;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutput;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import javax.annotation.Nullable;
+
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.hadoop.WritableCoder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.ObjectWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A {@link HadoopInputFormatIO} is a Transform for reading data from any source which
+ * implements Hadoop {@link InputFormat}. For example- Cassandra, Elasticsearch, HBase, Redis,
+ * Postgres etc. {@link HadoopInputFormatIO} has to make several performance trade-offs in
+ * connecting to {@link InputFormat}, so if there is another Beam IO Transform specifically for
+ * connecting to your data source of choice, we would recommend using that one, but this IO
+ * Transform allows you to connect to many data sources that do not yet have a Beam IO Transform.
+ *
+ * <p>You will need to pass a Hadoop {@link Configuration} with parameters specifying how the read
+ * will occur. Many properties of the Configuration are optional, and some are required for certain
+ * {@link InputFormat} classes, but the following properties must be set for all InputFormats:
+ * <ul>
+ * <li>{@code mapreduce.job.inputformat.class}: The {@link InputFormat} class used to connect to
+ * your data source of choice.</li>
+ * <li>{@code key.class}: The key class returned by the {@link InputFormat} in
+ * {@code mapreduce.job.inputformat.class}.</li>
+ * <li>{@code value.class}: The value class returned by the {@link InputFormat} in
+ * {@code mapreduce.job.inputformat.class}.</li>
+ * </ul>
+ * For example:
+ *
+ * <pre>
+ * {
+ *   Configuration myHadoopConfiguration = new Configuration(false);
+ *   // Set Hadoop InputFormat, key and value class in configuration
+ *   myHadoopConfiguration.setClass(&quot;mapreduce.job.inputformat.class&quot;,
+ *      MyDbInputFormatClass, InputFormat.class);
+ *   myHadoopConfiguration.setClass(&quot;key.class&quot;, MyDbInputFormatKeyClass, Object.class);
+ *   myHadoopConfiguration.setClass(&quot;value.class&quot;,
+ *      MyDbInputFormatValueClass, Object.class);
+ * }
+ * </pre>
+ *
+ * <p>You will need to check to see if the key and value classes output by the {@link InputFormat}
+ * have a Beam {@link Coder} available. If not, you can use withKeyTranslation/withValueTranslation
+ * to specify a method transforming instances of those classes into another class that is supported
+ * by a Beam {@link Coder}. These settings are optional and you don't need to specify translation
+ * for both key and value. If you specify a translation, you will need to make sure the K or V of
+ * the read transform match the output type of the translation.
+ *
+ * <h3>Reading using {@link HadoopInputFormatIO}</h3>
+ *
+ * <pre>
+ * {@code
+ * Pipeline p = ...; // Create pipeline.
+ * // Read data only with Hadoop configuration.
+ * p.apply("read",
+ *     HadoopInputFormatIO.<InputFormatKeyClass, InputFormatKeyClass>read()
+ *              .withConfiguration(myHadoopConfiguration);
+ * }
+ * // Read data with configuration and key translation (Example scenario: Beam Coder is not
+ * available for key class hence key translation is required.).
+ * SimpleFunction&lt;InputFormatKeyClass, MyKeyClass&gt; myOutputKeyType =
+ *       new SimpleFunction&lt;InputFormatKeyClass, MyKeyClass&gt;() {
+ *         public MyKeyClass apply(InputFormatKeyClass input) {
+ *           // ...logic to transform InputFormatKeyClass to MyKeyClass
+ *         }
+ * };
+ * </pre>
+ *
+ * <pre>
+ * {@code
+ * p.apply("read",
+ *     HadoopInputFormatIO.<MyKeyClass, InputFormatKeyClass>read()
+ *              .withConfiguration(myHadoopConfiguration)
+ *              .withKeyTranslation(myOutputKeyType);
+ * }
+ * </pre>
+ *
+ * <p>// Read data with configuration and value translation (Example scenario: Beam Coder is not
+ * available for value class hence value translation is required.).
+ *
+ * <pre>
+ * {@code
+ * SimpleFunction&lt;InputFormatValueClass, MyValueClass&gt; myOutputValueType =
+ *      new SimpleFunction&lt;InputFormatValueClass, MyValueClass&gt;() {
+ *          public MyValueClass apply(InputFormatValueClass input) {
+ *            // ...logic to transform InputFormatValueClass to MyValueClass
+ *          }
+ *  };
+ * }
+ * </pre>
+ *
+ * <pre>
+ * {@code
+ * p.apply("read",
+ *     HadoopInputFormatIO.<InputFormatKeyClass, MyValueClass>read()
+ *              .withConfiguration(myHadoopConfiguration)
+ *              .withValueTranslation(myOutputValueType);
+ * }
+ * </pre>
+ */
+
+public class HadoopInputFormatIO {
+  private static final Logger LOG = LoggerFactory.getLogger(HadoopInputFormatIO.class);
+
+  /**
+   * Creates an uninitialized {@link HadoopInputFormatIO.Read}. Before use, the {@code Read} must
+   * be initialized with a HadoopInputFormatIO.Read#withConfiguration(HadoopConfiguration) that
+   * specifies the source. A key/value translation may also optionally be specified using
+   * {@link HadoopInputFormatIO.Read#withKeyTranslation}/
+   * {@link HadoopInputFormatIO.Read#withValueTranslation}.
+   */
+  public static <K, V> Read<K, V> read() {
+    return new AutoValue_HadoopInputFormatIO_Read.Builder<K, V>().build();
+  }
+
+  /**
+   * A {@link PTransform} that reads from any data source which implements Hadoop InputFormat. For
+   * e.g. Cassandra, Elasticsearch, HBase, Redis, Postgres, etc. See the class-level Javadoc on
+   * {@link HadoopInputFormatIO} for more information.
+   * @param <K> Type of keys to be read.
+   * @param <V> Type of values to be read.
+   * @see HadoopInputFormatIO
+   */
+  @AutoValue
+  public abstract static class Read<K, V> extends PTransform<PBegin, PCollection<KV<K, V>>> {
+
+    // Returns the Hadoop Configuration which contains specification of source.
+    @Nullable
+    public abstract SerializableConfiguration getConfiguration();
+
+    @Nullable public abstract SimpleFunction<?, K> getKeyTranslationFunction();
+    @Nullable public abstract SimpleFunction<?, V> getValueTranslationFunction();
+    @Nullable public abstract TypeDescriptor<K> getKeyTypeDescriptor();
+    @Nullable public abstract TypeDescriptor<V> getValueTypeDescriptor();
+    @Nullable public abstract TypeDescriptor<?> getinputFormatClass();
+    @Nullable public abstract TypeDescriptor<?> getinputFormatKeyClass();
+    @Nullable public abstract TypeDescriptor<?> getinputFormatValueClass();
+
+    abstract Builder<K, V> toBuilder();
+
+    @AutoValue.Builder
+    abstract static class Builder<K, V> {
+      abstract Builder<K, V> setConfiguration(SerializableConfiguration configuration);
+      abstract Builder<K, V> setKeyTranslationFunction(SimpleFunction<?, K> function);
+      abstract Builder<K, V> setValueTranslationFunction(SimpleFunction<?, V> function);
+      abstract Builder<K, V> setKeyTypeDescriptor(TypeDescriptor<K> keyTypeDescriptor);
+      abstract Builder<K, V> setValueTypeDescriptor(TypeDescriptor<V> valueTypeDescriptor);
+      abstract Builder<K, V> setInputFormatClass(TypeDescriptor<?> inputFormatClass);
+      abstract Builder<K, V> setInputFormatKeyClass(TypeDescriptor<?> inputFormatKeyClass);
+      abstract Builder<K, V> setInputFormatValueClass(TypeDescriptor<?> inputFormatValueClass);
+      abstract Read<K, V> build();
+    }
+
+    /**
+     * Returns a new {@link HadoopInputFormatIO.Read} that will read from the source using the
+     * options provided by the given configuration.
+     *
+     * <p>Does not modify this object.
+     */
+    public Read<K, V> withConfiguration(Configuration configuration) {
+      validateConfiguration(configuration);
+      TypeDescriptor<?> inputFormatClass =
+          TypeDescriptor.of(configuration.getClass("mapreduce.job.inputformat.class", null));
+      TypeDescriptor<?> inputFormatKeyClass =
+          TypeDescriptor.of(configuration.getClass("key.class", null));
+      TypeDescriptor<?> inputFormatValueClass =
+          TypeDescriptor.of(configuration.getClass("value.class", null));
+      Builder<K, V> builder =
+          toBuilder().setConfiguration(new SerializableConfiguration(configuration));
+      builder.setInputFormatClass(inputFormatClass);
+      builder.setInputFormatKeyClass(inputFormatKeyClass);
+      builder.setInputFormatValueClass(inputFormatValueClass);
+      /*
+       * Sets the output key class to InputFormat key class if withKeyTranslation() is not called
+       * yet.
+       */
+      if (getKeyTranslationFunction() == null) {
+        builder.setKeyTypeDescriptor((TypeDescriptor<K>) inputFormatKeyClass);
+      }
+      /*
+       * Sets the output value class to InputFormat value class if withValueTranslation() is not
+       * called yet.
+       */
+      if (getValueTranslationFunction() == null) {
+        builder.setValueTypeDescriptor((TypeDescriptor<V>) inputFormatValueClass);
+      }
+      return builder.build();
+    }
+
+    /**
+     * Returns a new {@link HadoopInputFormatIO.Read} that will transform the keys read from the
+     * source using the given key translation function.
+     *
+     * <p>Does not modify this object.
+     */
+    public Read<K, V> withKeyTranslation(SimpleFunction<?, K> function) {
+      checkNotNull(function, "function");
+      // Sets key class to key translation function's output class type.
+      return toBuilder().setKeyTranslationFunction(function)
+          .setKeyTypeDescriptor((TypeDescriptor<K>) function.getOutputTypeDescriptor()).build();
+    }
+
+    /**
+     * Returns a new {@link HadoopInputFormatIO.Read} that will transform the values read from the
+     * source using the given value translation function.
+     *
+     * <p>Does not modify this object.
+     */
+    public Read<K, V> withValueTranslation(SimpleFunction<?, V> function) {
+      checkNotNull(function, "function");
+      // Sets value class to value translation function's output class type.
+      return toBuilder().setValueTranslationFunction(function)
+          .setValueTypeDescriptor((TypeDescriptor<V>) function.getOutputTypeDescriptor()).build();
+    }
+
+    @Override
+    public PCollection<KV<K, V>> expand(PBegin input) {
+      // Get the key and value coders based on the key and value classes.
+      CoderRegistry coderRegistry = input.getPipeline().getCoderRegistry();
+      Coder<K> keyCoder = getDefaultCoder(getKeyTypeDescriptor(), coderRegistry);
+      Coder<V> valueCoder = getDefaultCoder(getValueTypeDescriptor(), coderRegistry);
+      HadoopInputFormatBoundedSource<K, V> source = new HadoopInputFormatBoundedSource<K, V>(
+          getConfiguration(),
+          keyCoder,
+          valueCoder,
+          getKeyTranslationFunction(),
+          getValueTranslationFunction());
+      return input.getPipeline().apply(org.apache.beam.sdk.io.Read.from(source));
+    }
+
+    /**
+     * Validates that the mandatory configuration properties such as InputFormat class, InputFormat
+     * key and value classes are provided in the Hadoop configuration.
+     */
+    private void validateConfiguration(Configuration configuration) {
+      checkNotNull(configuration, "configuration");
+      checkNotNull(configuration.get("mapreduce.job.inputformat.class"),
+          "configuration.get(\"mapreduce.job.inputformat.class\")");
+      checkNotNull(configuration.get("key.class"), "configuration.get(\"key.class\")");
+      checkNotNull(configuration.get("value.class"),
+          "configuration.get(\"value.class\")");
+    }
+
+    /**
+     * Validates inputs provided by the pipeline user before reading the data.
+     */
+    @Override
+    public void validate(PBegin input) {
+      checkNotNull(getConfiguration(), "getConfiguration()");
+      // Validate that the key translation input type must be same as key class of InputFormat.
+      validateTranslationFunction(getinputFormatKeyClass(), getKeyTranslationFunction(),
+          "Key translation's input type is not same as hadoop InputFormat : %s key class : %s");
+      // Validate that the value translation input type must be same as value class of InputFormat.
+      validateTranslationFunction(getinputFormatValueClass(), getValueTranslationFunction(),
+          "Value translation's input type is not same as hadoop InputFormat :  "
+              + "%s value class : %s");
+    }
+
+    /**
+     * Validates translation function given for key/value translation.
+     */
+    private void validateTranslationFunction(TypeDescriptor<?> inputType,
+        SimpleFunction<?, ?> simpleFunction, String errorMsg) {
+      if (simpleFunction != null) {
+        if (!simpleFunction.getInputTypeDescriptor().equals(inputType)) {
+          throw new IllegalArgumentException(
+              String.format(errorMsg, getinputFormatClass().getRawType(), inputType.getRawType()));
+        }
+      }
+    }
+
+    /**
+     * Returns the default coder for a given type descriptor. Coder Registry is queried for correct
+     * coder, if not found in Coder Registry, then check if the type descriptor provided is of type
+     * Writable, then WritableCoder is returned, else exception is thrown "Cannot find coder".
+     */
+    @VisibleForTesting
+    public <T> Coder<T> getDefaultCoder(TypeDescriptor<?> typeDesc, CoderRegistry coderRegistry) {
+      Class classType = typeDesc.getRawType();
+      try {
+        return (Coder<T>) coderRegistry.getCoder(typeDesc);
+      } catch (CannotProvideCoderException e) {
+        if (Writable.class.isAssignableFrom(classType)) {
+          return (Coder<T>) WritableCoder.of(classType);
+        }
+        throw new IllegalStateException(String.format("Cannot find coder for %s  : ", typeDesc)
+            + e.getMessage(), e);
+      }
+    }
+
+    @Override
+    public void populateDisplayData(DisplayData.Builder builder) {
+      super.populateDisplayData(builder);
+      if (getConfiguration().getHadoopConfiguration() != null) {
+        Iterator<Entry<String, String>> configProperties = getConfiguration()
+            .getHadoopConfiguration().iterator();
+        while (configProperties.hasNext()) {
+          Entry<String, String> property = configProperties.next();
+          builder.addIfNotNull(DisplayData.item(property.getKey(), property.getValue())
+              .withLabel(property.getKey()));
+        }
+      }
+    }
+  }
+
+  /**
+   * Bounded source implementation for {@link HadoopInputFormatIO}.
+   * @param <K> Type of keys to be read.
+   * @param <V> Type of values to be read.
+   */
+  public static class HadoopInputFormatBoundedSource<K, V> extends BoundedSource<KV<K, V>>
+      implements Serializable {
+    private final SerializableConfiguration conf;
+    private final Coder<K> keyCoder;
+    private final Coder<V> valueCoder;
+    @Nullable private final SimpleFunction<?, K> keyTranslationFunction;
+    @Nullable private final SimpleFunction<?, V> valueTranslationFunction;
+    private final SerializableSplit inputSplit;
+    private transient List<SerializableSplit> inputSplits;
+    private long boundedSourceEstimatedSize = 0;
+    private transient InputFormat<?, ?> inputFormatObj;
+    private transient TaskAttemptContext taskAttemptContext;
+    HadoopInputFormatBoundedSource(
+        SerializableConfiguration conf,
+        Coder<K> keyCoder,
+        Coder<V> valueCoder,
+        @Nullable SimpleFunction<?, K> keyTranslationFunction,
+        @Nullable SimpleFunction<?, V> valueTranslationFunction) {
+      this(conf,
+          keyCoder,
+          valueCoder,
+          keyTranslationFunction,
+          valueTranslationFunction,
+          null);
+    }
+
+    protected HadoopInputFormatBoundedSource(
+        SerializableConfiguration conf,
+        Coder<K> keyCoder,
+        Coder<V> valueCoder,
+        @Nullable SimpleFunction<?, K> keyTranslationFunction,
+        @Nullable SimpleFunction<?, V> valueTranslationFunction,
+        SerializableSplit inputSplit) {
+      this.conf = conf;
+      this.inputSplit = inputSplit;
+      this.keyCoder = keyCoder;
+      this.valueCoder = valueCoder;
+      this.keyTranslationFunction = keyTranslationFunction;
+      this.valueTranslationFunction = valueTranslationFunction;
+    }
+
+    public SerializableConfiguration getConfiguration() {
+      return conf;
+    }
+
+    @Override
+    public void validate() {
+      checkNotNull(conf, "conf");
+      checkNotNull(keyCoder, "keyCoder");
+      checkNotNull(valueCoder, "valueCoder");
+    }
+
+    @Override
+    public List<BoundedSource<KV<K, V>>> splitIntoBundles(long desiredBundleSizeBytes,
+        PipelineOptions options) throws Exception {
+      // desiredBundleSizeBytes is not being considered as splitting based on this
+      // value is not supported by inputFormat getSplits() method.
+      if (inputSplit != null) {
+        LOG.info("Not splitting source {} because source is already split.", this);
+        return ImmutableList.of((BoundedSource<KV<K, V>>) this);
+      }
+      computeSplitsIfNecessary();
+      LOG.info("Generated {} splits. Size of first split is {} ", inputSplits.size(), inputSplits
+          .get(0).getSplit().getLength());
+      return Lists.transform(inputSplits,
+          new Function<SerializableSplit, BoundedSource<KV<K, V>>>() {
+            @Override
+            public BoundedSource<KV<K, V>> apply(SerializableSplit serializableInputSplit) {
+              HadoopInputFormatBoundedSource<K, V> hifBoundedSource =
+                  new HadoopInputFormatBoundedSource<K, V>(conf, keyCoder, valueCoder,
+                      keyTranslationFunction, valueTranslationFunction, serializableInputSplit);
+              return hifBoundedSource;
+            }
+          });
+    }
+
+    @Override
+    public long getEstimatedSizeBytes(PipelineOptions po) throws Exception {
+      if (inputSplit == null) {
+        // If there are no splits computed yet, then retrieve the splits.
+        computeSplitsIfNecessary();
+        return boundedSourceEstimatedSize;
+      }
+      return inputSplit.getSplit().getLength();
+    }
+
+    /**
+     * This is a helper function to compute splits. This method will also calculate size of the
+     * data being read. Note: This method is executed exactly once and the splits are retrieved
+     * and cached in this. These splits are further used by splitIntoBundles() and
+     * getEstimatedSizeBytes().
+     */
+    @VisibleForTesting
+    void computeSplitsIfNecessary() throws IOException, InterruptedException {
+      if (inputSplits != null) {
+        return;
+      }
+      createInputFormatInstance();
+      List<InputSplit> splits =
+          inputFormatObj.getSplits(Job.getInstance(conf.getHadoopConfiguration()));
+      if (splits == null) {
+        throw new IOException("Error in computing splits, getSplits() returns null.");
+      }
+      if (splits.isEmpty()) {
+        throw new IOException("Error in computing splits, getSplits() returns a empty list");
+      }
+      boundedSourceEstimatedSize = 0;
+      inputSplits = new ArrayList<SerializableSplit>();
+      for (InputSplit inputSplit : splits) {
+        if (inputSplit == null) {
+          throw new IOException("Error in computing splits, split is null in InputSplits list "
+              + "populated by getSplits() : ");
+        }
+        boundedSourceEstimatedSize += inputSplit.getLength();
+        inputSplits.add(new SerializableSplit(inputSplit));
+      }
+      validateUserInputForKeyAndValue();
+    }
+
+    /**
+     * Creates instance of InputFormat class. The InputFormat class name is specified in the Hadoop
+     * configuration.
+     */
+    protected void createInputFormatInstance() throws IOException {
+      if (inputFormatObj == null) {
+        try {
+          taskAttemptContext =
+              new TaskAttemptContextImpl(conf.getHadoopConfiguration(), new TaskAttemptID());
+          inputFormatObj =
+              (InputFormat<?, ?>) conf
+                  .getHadoopConfiguration()
+                  .getClassByName(
+                      conf.getHadoopConfiguration().get("mapreduce.job.inputformat.class"))
+                  .newInstance();
+          /*
+           * If InputFormat explicitly implements interface {@link Configurable}, then setConf()
+           * method of {@link Configurable} needs to be explicitly called to set all the
+           * configuration parameters. For example: InputFormat classes which implement Configurable
+           * are {@link org.apache.hadoop.mapreduce.lib.db.DBInputFormat DBInputFormat}, {@link
+           * org.apache.hadoop.hbase.mapreduce.TableInputFormat TableInputFormat}, etc.
+           */
+          if (Configurable.class.isAssignableFrom(inputFormatObj.getClass())) {
+            ((Configurable) inputFormatObj).setConf(conf.getHadoopConfiguration());
+          }
+        } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
+          throw new IOException("Unable to create InputFormat object: ", e);
+        }
+      }
+    }
+
+    /**
+     * Throws exception if you set different InputFormat key or value class than InputFormat's
+     * actual key or value class. If you set incorrect classes then, it may result in an error like
+     * "unexpected extra bytes after decoding" while the decoding process happens. Hence this
+     * validation is required.
+     */
+    private void validateUserInputForKeyAndValue() throws IOException, InterruptedException {
+      ParameterizedType genericClassType = determineGenericType();
+      RecordReader<?, ?> reader = fetchFirstRecordReader();
+      boolean isCorrectKeyClassSet =
+          validateClass(genericClassType.getActualTypeArguments()[0].getTypeName(), keyCoder,
+              reader.getCurrentKey(), "key.class");
+      boolean isCorrectValueClassSet =
+          validateClass(genericClassType.getActualTypeArguments()[1].getTypeName(), valueCoder,
+              reader.getCurrentValue(), "value.class");
+      if (!isCorrectKeyClassSet) {
+        Class<?> actualClass = conf.getHadoopConfiguration().getClass("key.class", Object.class);
+        throw new IllegalArgumentException(String.format(
+            "Wrong InputFormat key class in configuration : Expected key.class is %s but was %s.",
+            reader.getCurrentKey().getClass().getName(), actualClass.getName()));
+      }
+      if (!isCorrectValueClassSet) {
+        Class<?> actualClass = conf.getHadoopConfiguration().getClass("value.class", Object.class);
+        throw new IllegalArgumentException(String.format("Wrong InputFormat value class in "
+            + "configuration : Expected value.class is %s but was %s.", reader.getCurrentValue()
+            .getClass().getName(), actualClass.getName()));
+      }
+    }
+
+    /**
+     * Returns true if key/value class set by the user is compatible with the key/value class of a
+     * pair returned by RecordReader. User provided key/value class is validated against the
+     * parameterized type's type arguments of InputFormat. If parameterized type has any type
+     * arguments such as T, K, V, etc then validation is done by encoding and decoding key/value
+     * object of first pair returned by RecordReader.
+     */
+    private <T> boolean validateClass(String inputFormatGenericClassName, Coder coder,
+        Object object, String property) {
+      try {
+        Class<?> inputClass = Class.forName(inputFormatGenericClassName);
+        /*
+         * Validates key/value class with InputFormat's parameterized type.
+         */
+        if (property.equals("key.class")) {
+          return (conf.getHadoopConfiguration().getClass("key.class",
+              Object.class)).isAssignableFrom(inputClass);
+        }
+        return (conf.getHadoopConfiguration().getClass("value.class",
+            Object.class)).isAssignableFrom(inputClass);
+      } catch (ClassNotFoundException e) {
+        /*
+         * Given inputFormatGenericClassName is a type parameter i.e. T, K, V, etc. In such cases
+         * class validation for user provided input key/value will not work correctly. Therefore
+         * the need to validate key/value class by encoding and decoding key/value object with
+         * the given coder.
+         */
+        return checkEncodingAndDecoding((Coder<T>) coder, (T) object);
+      }
+    }
+
+    /**
+     * Validates whether the input gets encoded or decoded correctly using the provided coder.
+     */
+    private <T> boolean checkEncodingAndDecoding(Coder<T> coder, T input) {
+      try {
+        CoderUtils.clone(coder, input);
+      } catch (CoderException e) {
+        return false;
+      }
+      return true;
+    }
+
+    /**
+     * Returns parameterized type of the InputFormat class.
+     */
+    private ParameterizedType determineGenericType() {
+      // Any InputFormatClass always inherits from InputFormat<K, V> which is a ParameterizedType.
+      // Hence, we can fetch generic super class of inputFormatClass which is a ParameterizedType.
+      Class<?> inputFormatClass = inputFormatObj.getClass();
+      Type genericSuperclass = null;
+      for (;;) {
+        genericSuperclass = inputFormatClass.getGenericSuperclass();
+        if (genericSuperclass instanceof ParameterizedType) {
+          break;
+        }
+        inputFormatClass = inputFormatClass.getSuperclass();
+      }
+      return (ParameterizedType) genericSuperclass;
+    }
+
+    /**
+     * Returns RecordReader object of the first split to read first record for validating key/value
+     * classes.
+     */
+    private RecordReader fetchFirstRecordReader() throws IOException, InterruptedException {
+      RecordReader<?, ?> reader =
+          inputFormatObj.createRecordReader(inputSplits.get(0).getSplit(), taskAttemptContext);
+      if (reader == null) {
+        throw new IOException(String.format("Null RecordReader object returned by %s",
+            inputFormatObj.getClass()));
+      }
+      reader.initialize(inputSplits.get(0).getSplit(), taskAttemptContext);
+      // First record is read to get the InputFormat's key and value classes.
+      reader.nextKeyValue();
+      return reader;
+    }
+
+    @VisibleForTesting
+    InputFormat<?, ?> getInputFormat(){
+      return inputFormatObj;
+    }
+
+    @VisibleForTesting
+    void setInputFormatObj(InputFormat<?, ?> inputFormatObj) {
+      this.inputFormatObj = inputFormatObj;
+    }
+
+    @Override
+    public Coder<KV<K, V>> getDefaultOutputCoder() {
+      return KvCoder.of(keyCoder, valueCoder);
+    }
+
+    @Override
+    public BoundedReader<KV<K, V>> createReader(PipelineOptions options) throws IOException {
+      this.validate();
+      if (inputSplit == null) {
+        throw new IOException("Cannot create reader as source is not split yet.");
+      } else {
+        createInputFormatInstance();
+        return new HadoopInputFormatReader<>(
+            this,
+            keyTranslationFunction,
+            valueTranslationFunction,
+            inputSplit,
+            inputFormatObj,
+            taskAttemptContext);
+      }
+    }
+
+    /**
+     * BoundedReader for Hadoop InputFormat source.
+     *
+     * @param <K> Type of keys RecordReader emits.
+     * @param <V> Type of values RecordReader emits.
+     */
+    class HadoopInputFormatReader<T1, T2> extends BoundedSource.BoundedReader<KV<K, V>> {
+
+      private final HadoopInputFormatBoundedSource<K, V> source;
+      @Nullable private final SimpleFunction<T1, K> keyTranslationFunction;
+      @Nullable private final SimpleFunction<T2, V> valueTranslationFunction;
+      private final SerializableSplit split;
+      private RecordReader<T1, T2> recordReader;
+      private volatile boolean doneReading = false;
+      private volatile long recordsReturned = 0L;
+      // Tracks the progress of the RecordReader.
+      private AtomicDouble progressValue = new AtomicDouble();
+      private transient InputFormat<T1, T2> inputFormatObj;
+      private transient TaskAttemptContext taskAttemptContext;
+
+      private HadoopInputFormatReader(HadoopInputFormatBoundedSource<K, V> source,
+          @Nullable SimpleFunction keyTranslationFunction,
+          @Nullable SimpleFunction valueTranslationFunction,
+          SerializableSplit split,
+          InputFormat inputFormatObj,
+          TaskAttemptContext taskAttemptContext) {
+        this.source = source;
+        this.keyTranslationFunction = keyTranslationFunction;
+        this.valueTranslationFunction = valueTranslationFunction;
+        this.split = split;
+        this.inputFormatObj = inputFormatObj;
+        this.taskAttemptContext = taskAttemptContext;
+      }
+
+      @Override
+      public HadoopInputFormatBoundedSource<K, V> getCurrentSource() {
+        return source;
+      }
+
+      @Override
+      public boolean start() throws IOException {
+        try {
+          recordsReturned = 0;
+          recordReader =
+              (RecordReader<T1, T2>) inputFormatObj.createRecordReader(split.getSplit(),
+                  taskAttemptContext);
+          if (recordReader != null) {
+            recordReader.initialize(split.getSplit(), taskAttemptContext);
+            progressValue.set(getProgress());
+            if (recordReader.nextKeyValue()) {
+              recordsReturned++;
+              doneReading = false;
+              return true;
+            }
+          } else {
+            throw new IOException(String.format("Null RecordReader object returned by %s",
+                inputFormatObj.getClass()));
+          }
+          recordReader = null;
+        } catch (InterruptedException e) {
+          throw new IOException(
+              "Could not read because the thread got interrupted while "
+              + "reading the records with an exception: ",
+              e);
+        }
+        doneReading = true;
+        return false;
+      }
+
+      @Override
+      public boolean advance() throws IOException {
+        try {
+          progressValue.set(getProgress());
+          if (recordReader.nextKeyValue()) {
+            recordsReturned++;
+            return true;
+          }
+          doneReading = true;
+        } catch (InterruptedException e) {
+          throw new IOException("Unable to read data: ", e);
+        }
+        return false;
+      }
+
+      @Override
+      public KV<K, V> getCurrent() {
+        K key = null;
+        V value = null;
+        try {
+          // Transform key if translation function is provided.
+          key =
+              transformKeyOrValue((T1) recordReader.getCurrentKey(), keyTranslationFunction,
+                  keyCoder);
+          // Transform value if translation function is provided.
+          value =
+              transformKeyOrValue((T2) recordReader.getCurrentValue(), valueTranslationFunction,
+                  valueCoder);
+        } catch (IOException | InterruptedException e) {
+          LOG.error("Unable to read data: " + "{}", e);
+          throw new IllegalStateException("Unable to read data: " + "{}", e);
+        }
+        return KV.of(key, value);
+      }
+
+      /**
+       * Returns the serialized output of transformed key or value object.
+       * @throws ClassCastException
+       * @throws CoderException
+       */
+      private <T, T3> T3 transformKeyOrValue(T input,
+          @Nullable SimpleFunction<T, T3> simpleFunction, Coder<T3> coder) throws CoderException,
+          ClassCastException {
+        T3 output;
+        if (null != simpleFunction) {
+          output = simpleFunction.apply(input);
+        } else {
+          output = (T3) input;
+        }
+        return cloneIfPossiblyMutable((T3) output, coder);
+      }
+
+      /**
+       * Beam expects immutable objects, but the Hadoop InputFormats tend to re-use the same object
+       * when returning them. Hence, mutable objects returned by Hadoop InputFormats are cloned.
+       */
+      private <T> T cloneIfPossiblyMutable(T input, Coder<T> coder) throws CoderException,
+          ClassCastException {
+        // If the input object is not of known immutable type, clone the object.
+        if (!isKnownImmutable(input)) {
+          input = CoderUtils.clone(coder, input);
+        }
+        return input;
+      }
+
+      /**
+       * Utility method to check if the passed object is of a known immutable type.
+       */
+      private boolean isKnownImmutable(Object o) {
+        Set<Class<?>> immutableTypes = new HashSet<Class<?>>(
+            Arrays.asList(
+                String.class,
+                Byte.class,
+                Short.class,
+                Integer.class,
+                Long.class,
+                Float.class,
+                Double.class,
+                Boolean.class,
+                BigInteger.class,
+                BigDecimal.class));
+        return immutableTypes.contains(o.getClass());
+      }
+
+      @Override
+      public void close() throws IOException {
+        LOG.info("Closing reader after reading {} records.", recordsReturned);
+        if (recordReader != null) {
+          recordReader.close();
+          recordReader = null;
+        }
+      }
+
+      @Override
+      public Double getFractionConsumed() {
+        if (doneReading) {
+          progressValue.set(1.0);
+        } else if (recordReader == null || recordsReturned == 0) {
+          progressValue.set(0.0);
+        }
+        return progressValue.doubleValue();
+      }
+
+      /**
+       * Returns RecordReader's progress.
+       * @throws IOException
+       * @throws InterruptedException
+       */
+      private Double getProgress() throws IOException, InterruptedException {
+        try {
+          return (double) recordReader.getProgress();
+        } catch (IOException e) {
+          LOG.error(
+              "Error in computing the fractions consumed as RecordReader.getProgress() throws an "
+              + "exception : " + "{}", e);
+          throw new IOException(
+              "Error in computing the fractions consumed as RecordReader.getProgress() throws an "
+              + "exception : " + e.getMessage(), e);
+        }
+      }
+
+      @Override
+      public final long getSplitPointsRemaining() {
+        if (doneReading) {
+          return 0;
+        }
+        /**
+         * This source does not currently support dynamic work rebalancing, so remaining parallelism
+         * is always 1.
+         */
+        return 1;
+      }
+    }
+  }
+
+  /**
+   * A wrapper to allow Hadoop {@link org.apache.hadoop.mapreduce.InputSplit} to be serialized using
+   * Java's standard serialization mechanisms.
+   */
+  public static class SerializableSplit implements Serializable {
+
+    InputSplit inputSplit;
+
+    public SerializableSplit() {}
+
+    public SerializableSplit(InputSplit split) {
+      checkArgument(split instanceof Writable,
+          String.format("Split is not of type Writable: %s", split));
+      this.inputSplit = split;
+    }
+
+    public InputSplit getSplit() {
+      return inputSplit;
+    }
+
+    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+      ObjectWritable ow = new ObjectWritable();
+      ow.setConf(new Configuration(false));
+      ow.readFields(in);
+      this.inputSplit = (InputSplit) ow.get();
+    }
+
+    private void writeObject(ObjectOutputStream out) throws IOException {
+      new ObjectWritable(inputSplit).write(out);
+    }
+  }
+
+  /**
+   * A wrapper to allow Hadoop {@link org.apache.hadoop.conf.Configuration} to be serialized using
+   * Java's standard serialization mechanisms. Note that the org.apache.hadoop.conf.Configuration
+   * is Writable.
+   */
+  public static class SerializableConfiguration implements Externalizable {
+
+    private Configuration conf;
+
+    public SerializableConfiguration() {}
+
+    public SerializableConfiguration(Configuration conf) {
+      this.conf = conf;
+    }
+
+    public Configuration getHadoopConfiguration() {
+      return conf;
+    }
+
+    @Override
+    public void writeExternal(ObjectOutput out) throws IOException {
+      out.writeUTF(conf.getClass().getCanonicalName());
+      ((Writable) conf).write(out);
+    }
+
+    @Override
+    public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+      String className = in.readUTF();
+      try {
+        conf = (Configuration) Class.forName(className).newInstance();
+        conf.readFields(in);
+      } catch (InstantiationException | IllegalAccessException e) {
+        throw new IOException("Unable to create configuration: " + e);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/package-info.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/package-info.java b/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/package-info.java
new file mode 100644
index 0000000..5488448
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/main/java/org/apache/beam/sdk/io/hadoop/inputformat/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Defines transforms for reading from Data sources which implement Hadoop Input Format.
+ *
+ * @see org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ConfigurableEmployeeInputFormat.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ConfigurableEmployeeInputFormat.java b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ConfigurableEmployeeInputFormat.java
new file mode 100644
index 0000000..40f949b
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/ConfigurableEmployeeInputFormat.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * This is a dummy input format to test reading using HadoopInputFormatIO if InputFormat implements
+ * Configurable. This validates if setConf() method is called before getSplits(). Known InputFormats
+ * which implement Configurable are DBInputFormat, TableInputFormat etc.
+ */
+public class ConfigurableEmployeeInputFormat extends InputFormat<Text, Employee> implements
+    Configurable {
+  public boolean isConfSet = false;
+
+  public ConfigurableEmployeeInputFormat() {}
+
+  @Override
+  public Configuration getConf() {
+    return null;
+  }
+
+  /**
+   * Set configuration properties such as number of splits and number of records in each split.
+   */
+  @Override
+  public void setConf(Configuration conf) {
+    isConfSet = true;
+  }
+
+  @Override
+  public RecordReader<Text, Employee> createRecordReader(InputSplit split,
+      TaskAttemptContext context) throws IOException, InterruptedException {
+    return new ConfigurableEmployeeRecordReader();
+  }
+
+  /**
+   * Returns InputSPlit list of {@link ConfigurableEmployeeInputSplit}. Throws exception if
+   * {@link #setConf()} is not called.
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
+    if (!isConfSet) {
+      throw new IOException("Configuration is not set.");
+    }
+    List<InputSplit> splits = new ArrayList<InputSplit>();
+    splits.add(new ConfigurableEmployeeInputSplit());
+    return splits;
+  }
+
+  /**
+   * InputSplit implementation for ConfigurableEmployeeInputFormat.
+   */
+  public class ConfigurableEmployeeInputSplit extends InputSplit implements Writable {
+
+    @Override
+    public void readFields(DataInput arg0) throws IOException {}
+
+    @Override
+    public void write(DataOutput arg0) throws IOException {}
+
+    @Override
+    public long getLength() throws IOException, InterruptedException {
+      return 0;
+    }
+
+    @Override
+    public String[] getLocations() throws IOException, InterruptedException {
+      return null;
+    }
+  }
+
+  /**
+   * RecordReader for ConfigurableEmployeeInputFormat.
+   */
+  public class ConfigurableEmployeeRecordReader extends RecordReader<Text, Employee> {
+
+    @Override
+    public void initialize(InputSplit paramInputSplit, TaskAttemptContext paramTaskAttemptContext)
+        throws IOException, InterruptedException {}
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      return false;
+    }
+
+    @Override
+    public Text getCurrentKey() throws IOException, InterruptedException {
+      return null;
+    }
+
+    @Override
+    public Employee getCurrentValue() throws IOException, InterruptedException {
+      return null;
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      return 0;
+    }
+
+    @Override
+    public void close() throws IOException {}
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/Employee.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/Employee.java b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/Employee.java
new file mode 100644
index 0000000..9d4f293
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/Employee.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;
+
+import org.apache.beam.sdk.coders.AvroCoder;
+import org.apache.beam.sdk.coders.DefaultCoder;
+
+/**
+ * This class is Employee POJO class with properties- employee name and address. Used in
+ * {@linkplain HadoopInputFormatIO} for different unit tests.
+ */
+@DefaultCoder(AvroCoder.class)
+public class Employee {
+  private String empAddress;
+  private String empName;
+
+  /**
+   * Empty constructor required for Avro decoding.
+   */
+  public Employee() {}
+
+  public Employee(String empName, String empAddress) {
+    this.empAddress = empAddress;
+    this.empName = empName;
+  }
+
+  public String getEmpName() {
+    return empName;
+  }
+
+  public void setEmpName(String empName) {
+    this.empName = empName;
+  }
+
+  public String getEmpAddress() {
+    return empAddress;
+  }
+
+  public void setEmpAddress(String empAddress) {
+    this.empAddress = empAddress;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    Employee employeePojo = (Employee) o;
+
+    if (empName != null ? !empName.equals(employeePojo.empName) : employeePojo.empName != null) {
+      return false;
+    }
+    if (empAddress != null ? !empAddress.equals(employeePojo.empAddress)
+        : employeePojo.empAddress != null) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    return 0;
+  }
+
+  @Override
+  public String toString() {
+    return "Employee{" + "Name='" + empName + '\'' + ", Address=" + empAddress + '}';
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/d01620c0/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/EmployeeInputFormat.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/EmployeeInputFormat.java b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/EmployeeInputFormat.java
new file mode 100644
index 0000000..206f9ab
--- /dev/null
+++ b/sdks/java/io/hadoop-input-format/src/test/java/org/apache/beam/sdk/io/hadoop/inputformat/EmployeeInputFormat.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.beam.sdk.io.hadoop.inputformat;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.beam.sdk.values.KV;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+/**
+ * This is a valid InputFormat for reading employee data, available in the form of {@code List<KV>}
+ * as {@linkplain EmployeeRecordReader#employeeDataList employeeDataList} .
+ * {@linkplain EmployeeRecordReader#employeeDataList employeeDataList} is populated using
+ * {@linkplain TestEmployeeDataSet#populateEmployeeData()}.
+ * {@linkplain EmployeeInputFormat} is used to test whether the
+ * {@linkplain HadoopInputFormatIO } source returns immutable records in the scenario when
+ * RecordReader creates new key and value objects every time it reads data.
+ */
+public class EmployeeInputFormat extends InputFormat<Text, Employee> {
+
+  public EmployeeInputFormat() {}
+
+  @Override
+  public RecordReader<Text, Employee> createRecordReader(InputSplit split,
+      TaskAttemptContext context) throws IOException, InterruptedException {
+    return new EmployeeRecordReader();
+  }
+
+  @Override
+  public List<InputSplit> getSplits(JobContext arg0) throws IOException, InterruptedException {
+    List<InputSplit> inputSplitList = new ArrayList<InputSplit>();
+    for (int i = 1; i <= TestEmployeeDataSet.NUMBER_OF_SPLITS; i++) {
+      InputSplit inputSplitObj =
+          new NewObjectsEmployeeInputSplit(
+              ((i - 1) * TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT), (i
+                  * TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT - 1));
+      inputSplitList.add(inputSplitObj);
+    }
+    return inputSplitList;
+  }
+
+  /**
+   * InputSplit implementation for EmployeeInputFormat.
+   */
+  public static class NewObjectsEmployeeInputSplit extends InputSplit implements Writable {
+    // Start and end map index of each split of employeeData.
+    private long startIndex;
+    private long endIndex;
+
+    public NewObjectsEmployeeInputSplit() {}
+
+    public NewObjectsEmployeeInputSplit(long startIndex, long endIndex) {
+      this.startIndex = startIndex;
+      this.endIndex = endIndex;
+    }
+
+    /**
+     * Returns number of records in each split.
+     */
+    @Override
+    public long getLength() throws IOException, InterruptedException {
+      return this.endIndex - this.startIndex + 1;
+    }
+
+    @Override
+    public String[] getLocations() throws IOException, InterruptedException {
+      return null;
+    }
+
+    public long getStartIndex() {
+      return startIndex;
+    }
+
+    public long getEndIndex() {
+      return endIndex;
+    }
+
+    @Override
+    public void readFields(DataInput dataIn) throws IOException {
+      startIndex = dataIn.readLong();
+      endIndex = dataIn.readLong();
+    }
+
+    @Override
+    public void write(DataOutput dataOut) throws IOException {
+      dataOut.writeLong(startIndex);
+      dataOut.writeLong(endIndex);
+    }
+  }
+
+  /**
+   * RecordReader for EmployeeInputFormat.
+   */
+  public class EmployeeRecordReader extends RecordReader<Text, Employee> {
+
+    private NewObjectsEmployeeInputSplit split;
+    private Text currentKey;
+    private Employee currentValue;
+    private long employeeListIndex = 0L;
+    private long recordsRead = 0L;
+    private List<KV<String, String>> employeeDataList;
+
+    public EmployeeRecordReader() {}
+
+    @Override
+    public void close() throws IOException {}
+
+    @Override
+    public Text getCurrentKey() throws IOException, InterruptedException {
+      return currentKey;
+    }
+
+    @Override
+    public Employee getCurrentValue() throws IOException, InterruptedException {
+      return currentValue;
+    }
+
+    @Override
+    public float getProgress() throws IOException, InterruptedException {
+      return (float) recordsRead / split.getLength();
+    }
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException,
+        InterruptedException {
+      this.split = (NewObjectsEmployeeInputSplit) split;
+      employeeListIndex = this.split.getStartIndex() - 1;
+      recordsRead = 0;
+      employeeDataList = TestEmployeeDataSet.populateEmployeeData();
+      currentValue = new Employee(null, null);
+    }
+
+    @Override
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      if ((recordsRead++) >= split.getLength()) {
+        return false;
+      }
+      employeeListIndex++;
+      KV<String, String> employeeDetails = employeeDataList.get((int) employeeListIndex);
+      String empData[] = employeeDetails.getValue().split("_");
+      /*
+       * New objects must be returned every time for key and value in order to test the scenario as
+       * discussed the in the class' javadoc.
+       */
+      currentKey = new Text(employeeDetails.getKey());
+      currentValue = new Employee(empData[0], empData[1]);
+      return true;
+    }
+  }
+}


[22/50] [abbrv] beam git commit: [BEAM-1686] Use random MQTT clientID when not defined to avoid NPE

Posted by ke...@apache.org.
[BEAM-1686] Use random MQTT clientID when not defined to avoid NPE


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/752ad8a0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/752ad8a0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/752ad8a0

Branch: refs/heads/gearpump-runner
Commit: 752ad8a0ec5597ca84c27cb21862e32d05a81420
Parents: fdba784
Author: Borisa Zivkovic <bo...@huawei.com>
Authored: Fri Mar 10 12:20:13 2017 +0000
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Fri Mar 10 17:00:57 2017 +0100

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |  4 ++
 .../org/apache/beam/sdk/io/mqtt/MqttIOTest.java | 61 ++++++++++++++++++++
 2 files changed, 65 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/752ad8a0/sdks/java/io/mqtt/src/main/java/org/apache/beam/sdk/io/mqtt/MqttIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/mqtt/src/main/java/org/apache/beam/sdk/io/mqtt/MqttIO.java b/sdks/java/io/mqtt/src/main/java/org/apache/beam/sdk/io/mqtt/MqttIO.java
index 26234cf..46f2dcc 100644
--- a/sdks/java/io/mqtt/src/main/java/org/apache/beam/sdk/io/mqtt/MqttIO.java
+++ b/sdks/java/io/mqtt/src/main/java/org/apache/beam/sdk/io/mqtt/MqttIO.java
@@ -209,6 +209,10 @@ public class MqttIO {
         String clientId = getClientId() + "-" + UUID.randomUUID().toString();
         LOG.debug("MQTT client id set to {}", clientId);
         client.setClientId(clientId);
+      } else {
+        String clientId = UUID.randomUUID().toString();
+        LOG.debug("MQTT client id set to random value {}", clientId);
+        client.setClientId(clientId);
       }
       return client;
     }

http://git-wip-us.apache.org/repos/asf/beam/blob/752ad8a0/sdks/java/io/mqtt/src/test/java/org/apache/beam/sdk/io/mqtt/MqttIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/mqtt/src/test/java/org/apache/beam/sdk/io/mqtt/MqttIOTest.java b/sdks/java/io/mqtt/src/test/java/org/apache/beam/sdk/io/mqtt/MqttIOTest.java
index 8a82f40..28ca5f7 100644
--- a/sdks/java/io/mqtt/src/test/java/org/apache/beam/sdk/io/mqtt/MqttIOTest.java
+++ b/sdks/java/io/mqtt/src/test/java/org/apache/beam/sdk/io/mqtt/MqttIOTest.java
@@ -27,6 +27,7 @@ import java.util.Set;
 
 import org.apache.activemq.broker.BrokerService;
 import org.apache.activemq.broker.Connection;
+import org.apache.beam.sdk.io.mqtt.MqttIO.Read;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.RunnableOnService;
 import org.apache.beam.sdk.testing.TestPipeline;
@@ -79,6 +80,66 @@ public class MqttIOTest {
 
   @Test(timeout = 60 * 1000)
   @Category(RunnableOnService.class)
+  public void testReadNoClientId() throws Exception {
+    final String topicName = "READ_TOPIC_NO_CLIENT_ID";
+    Read mqttReader = MqttIO.read()
+    .withConnectionConfiguration(
+        MqttIO.ConnectionConfiguration.create(
+            "tcp://localhost:" + port,
+            topicName))
+  .withMaxNumRecords(10);
+    PCollection<byte[]> output = pipeline.apply(mqttReader);
+    PAssert.that(output).containsInAnyOrder(
+        "This is test 0".getBytes(),
+        "This is test 1".getBytes(),
+        "This is test 2".getBytes(),
+        "This is test 3".getBytes(),
+        "This is test 4".getBytes(),
+        "This is test 5".getBytes(),
+        "This is test 6".getBytes(),
+        "This is test 7".getBytes(),
+        "This is test 8".getBytes(),
+        "This is test 9".getBytes()
+    );
+
+    // produce messages on the brokerService in another thread
+    // This thread prevents to block the pipeline waiting for new messages
+    MQTT client = new MQTT();
+    client.setHost("tcp://localhost:" + port);
+    final BlockingConnection publishConnection = client.blockingConnection();
+    publishConnection.connect();
+    Thread publisherThread = new Thread() {
+      public void run() {
+        try {
+          LOG.info("Waiting pipeline connected to the MQTT broker before sending "
+              + "messages ...");
+          boolean pipelineConnected = false;
+          while (!pipelineConnected) {
+            Thread.sleep(1000);
+            for (Connection connection : brokerService.getBroker().getClients()) {
+              if (!connection.getConnectionId().isEmpty()) {
+                pipelineConnected = true;
+              }
+            }
+          }
+          for (int i = 0; i < 10; i++) {
+            publishConnection.publish(topicName, ("This is test " + i).getBytes(),
+                QoS.AT_LEAST_ONCE, false);
+          }
+        } catch (Exception e) {
+          // nothing to do
+        }
+      }
+    };
+    publisherThread.start();
+    pipeline.run();
+
+    publishConnection.disconnect();
+    publisherThread.join();
+  }
+
+  @Test(timeout = 60 * 1000)
+  @Category(RunnableOnService.class)
   public void testRead() throws Exception {
     PCollection<byte[]> output = pipeline.apply(
         MqttIO.read()


[10/50] [abbrv] beam git commit: Add license to new files.

Posted by ke...@apache.org.
Add license to new files.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b2da21e2
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b2da21e2
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b2da21e2

Branch: refs/heads/gearpump-runner
Commit: b2da21e287660bb3077bf89e092f7aa3c385906b
Parents: 5b86e1f
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Wed Mar 8 13:47:53 2017 -0800
Committer: Robert Bradshaw <ro...@gmail.com>
Committed: Thu Mar 9 20:29:01 2017 -0800

----------------------------------------------------------------------
 sdks/python/apache_beam/runners/api/__init__.py    | 16 ++++++++++++++++
 .../apache_beam/runners/api/beam_runner_api_pb2.py | 17 +++++++++++++++++
 sdks/python/apache_beam/utils/proto_utils.py       | 17 +++++++++++++++++
 sdks/python/apache_beam/utils/urns.py              | 17 +++++++++++++++++
 4 files changed, 67 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b2da21e2/sdks/python/apache_beam/runners/api/__init__.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/api/__init__.py b/sdks/python/apache_beam/runners/api/__init__.py
index e69de29..cce3aca 100644
--- a/sdks/python/apache_beam/runners/api/__init__.py
+++ b/sdks/python/apache_beam/runners/api/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#

http://git-wip-us.apache.org/repos/asf/beam/blob/b2da21e2/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py b/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py
index 66c331b..f235ce8 100644
--- a/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py
+++ b/sdks/python/apache_beam/runners/api/beam_runner_api_pb2.py
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: beam_runner_api.proto
 

http://git-wip-us.apache.org/repos/asf/beam/blob/b2da21e2/sdks/python/apache_beam/utils/proto_utils.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/utils/proto_utils.py b/sdks/python/apache_beam/utils/proto_utils.py
index 0ece8f5..b4bfdca 100644
--- a/sdks/python/apache_beam/utils/proto_utils.py
+++ b/sdks/python/apache_beam/utils/proto_utils.py
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 from google.protobuf import any_pb2
 from google.protobuf import struct_pb2
 

http://git-wip-us.apache.org/repos/asf/beam/blob/b2da21e2/sdks/python/apache_beam/utils/urns.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/utils/urns.py b/sdks/python/apache_beam/utils/urns.py
index 4d1c2f7..186c99c 100644
--- a/sdks/python/apache_beam/utils/urns.py
+++ b/sdks/python/apache_beam/utils/urns.py
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 PICKLED_WINDOW_FN = "beam:window_fn:pickled_python:v0.1"
 GLOBAL_WINDOWS_FN = "beam:window_fn:global_windows:v0.1"
 FIXED_WINDOWS_FN = "beam:window_fn:fixed_windows:v0.1"


[28/50] [abbrv] beam git commit: This closes #2144: Remove Pipeline.getRunner

Posted by ke...@apache.org.
This closes #2144: Remove Pipeline.getRunner


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e31cb2b4
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e31cb2b4
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e31cb2b4

Branch: refs/heads/gearpump-runner
Commit: e31cb2b451763608b0e0cd2c1c80634284ccaaa2
Parents: 75fe559 d41fe1d
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri Mar 10 13:05:48 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Fri Mar 10 13:05:48 2017 -0800

----------------------------------------------------------------------
 .../beam/runners/direct/DirectRunner.java       | 95 ++++++++++----------
 .../direct/TestStreamEvaluatorFactory.java      | 22 +++--
 .../direct/TestStreamEvaluatorFactoryTest.java  |  6 +-
 .../BatchStatefulParDoOverridesTest.java        |  4 +-
 .../DataflowPipelineTranslatorTest.java         | 39 ++++----
 .../runners/dataflow/DataflowRunnerTest.java    |  4 +-
 .../testing/TestDataflowRunnerTest.java         | 50 +++++------
 .../main/java/org/apache/beam/sdk/Pipeline.java |  7 --
 8 files changed, 110 insertions(+), 117 deletions(-)
----------------------------------------------------------------------



[33/50] [abbrv] beam git commit: Remove duplicated dependency from Dataflow runner pom.xml

Posted by ke...@apache.org.
Remove duplicated dependency from Dataflow runner pom.xml


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ed5cb8a6
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ed5cb8a6
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ed5cb8a6

Branch: refs/heads/gearpump-runner
Commit: ed5cb8a685c1afeb7c894c247c2cf6c12e5515a9
Parents: e31cb2b
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri Mar 10 13:14:58 2017 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Fri Mar 10 13:14:58 2017 -0800

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml | 4 ----
 1 file changed, 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/ed5cb8a6/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index f541394..ff79681 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -357,9 +357,5 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
-      <dependency>
-          <groupId>org.apache.beam</groupId>
-          <artifactId>beam-runners-core-construction-java</artifactId>
-      </dependency>
   </dependencies>
 </project>


[39/50] [abbrv] beam git commit: This closes #2224

Posted by ke...@apache.org.
This closes #2224


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f29bf89c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f29bf89c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f29bf89c

Branch: refs/heads/gearpump-runner
Commit: f29bf89c1df6a81c77e60ef8aac1edd1db37124e
Parents: 39688d8 12016e5
Author: Ahmet Altay <al...@google.com>
Authored: Fri Mar 10 16:32:08 2017 -0800
Committer: Ahmet Altay <al...@google.com>
Committed: Fri Mar 10 16:32:08 2017 -0800

----------------------------------------------------------------------
 sdks/python/tox.ini | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------