You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2016/05/20 07:15:19 UTC
[01/14] incubator-beam git commit: Add surefire plugin to java 8
example tests
Repository: incubator-beam
Updated Branches:
refs/heads/master dc98211cc -> af8f5935c
Add surefire plugin to java 8 example tests
Without this the test will just pick up whathever was last written to
beamTestPipelineOptions.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/23ba9764
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/23ba9764
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/23ba9764
Branch: refs/heads/master
Commit: 23ba976403b308103ea8b7dd0505e5847dd44952
Parents: 24bfca2
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Thu May 19 14:05:05 2016 +0200
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
examples/java8/pom.xml | 12 ++++++++++++
1 file changed, 12 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/23ba9764/examples/java8/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java8/pom.xml b/examples/java8/pom.xml
index b4a9ec6..e211739 100644
--- a/examples/java8/pom.xml
+++ b/examples/java8/pom.xml
@@ -51,6 +51,18 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <systemPropertyVariables>
+ <beamTestPipelineOptions>
+ </beamTestPipelineOptions>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
+
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
[07/14] incubator-beam git commit: Add TestFlinkPipelineRunner to
FlinkRunnerRegistrar
Posted by al...@apache.org.
Add TestFlinkPipelineRunner to FlinkRunnerRegistrar
This makes the runner available for selection by integration tests.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/58d66a34
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/58d66a34
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/58d66a34
Branch: refs/heads/master
Commit: 58d66a344985eecc9cc3f43c0ecd5dbc7b4fb2e6
Parents: dc98211
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon May 2 13:11:12 2016 -0700
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
.../beam/runners/flink/FlinkPipelineRunner.java | 16 +----
.../runners/flink/FlinkRunnerRegistrar.java | 4 +-
.../runners/flink/TestFlinkPipelineRunner.java | 66 ++++++++++++++++++++
.../beam/runners/flink/FlinkTestPipeline.java | 2 +-
4 files changed, 71 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/58d66a34/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
index 3edf6f3..b5ffced 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
@@ -108,7 +108,7 @@ public class FlinkPipelineRunner extends PipelineRunner<FlinkRunnerResult> {
this.flinkJobEnv.translate(pipeline);
LOG.info("Starting execution of Flink program.");
-
+
JobExecutionResult result;
try {
result = this.flinkJobEnv.executePipeline();
@@ -138,20 +138,6 @@ public class FlinkPipelineRunner extends PipelineRunner<FlinkRunnerResult> {
return options;
}
- /**
- * Constructs a runner with default properties for testing.
- *
- * @return The newly created runner.
- */
- public static FlinkPipelineRunner createForTest(boolean streaming) {
- FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
- // we use [auto] for testing since this will make it pick up the Testing
- // ExecutionEnvironment
- options.setFlinkMaster("[auto]");
- options.setStreaming(streaming);
- return new FlinkPipelineRunner(options);
- }
-
@Override
public <Output extends POutput, Input extends PInput> Output apply(
PTransform<Input, Output> transform, Input input) {
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/58d66a34/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
index cd99f4e..ec61805 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
@@ -41,7 +41,9 @@ public class FlinkRunnerRegistrar {
public static class Runner implements PipelineRunnerRegistrar {
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
- return ImmutableList.<Class<? extends PipelineRunner<?>>>of(FlinkPipelineRunner.class);
+ return ImmutableList.<Class<? extends PipelineRunner<?>>>of(
+ FlinkPipelineRunner.class,
+ TestFlinkPipelineRunner.class);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/58d66a34/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
new file mode 100644
index 0000000..24883c8
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.PipelineOptionsValidator;
+import org.apache.beam.sdk.runners.PipelineRunner;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+
+public class TestFlinkPipelineRunner extends PipelineRunner<FlinkRunnerResult> {
+
+ private FlinkPipelineRunner delegate;
+
+ private TestFlinkPipelineRunner(FlinkPipelineOptions options) {
+ // We use [auto] for testing since this will make it pick up the Testing ExecutionEnvironment
+ options.setFlinkMaster("[auto]");
+ this.delegate = FlinkPipelineRunner.fromOptions(options);
+ }
+
+ public static TestFlinkPipelineRunner fromOptions(PipelineOptions options) {
+ FlinkPipelineOptions flinkOptions = PipelineOptionsValidator.validate(FlinkPipelineOptions.class, options);
+ return new TestFlinkPipelineRunner(flinkOptions);
+ }
+
+ public static TestFlinkPipelineRunner create(boolean streaming) {
+ FlinkPipelineOptions flinkOptions = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
+ flinkOptions.setStreaming(streaming);
+ return TestFlinkPipelineRunner.fromOptions(flinkOptions);
+ }
+
+ @Override
+ public <OutputT extends POutput, InputT extends PInput>
+ OutputT apply(PTransform<InputT,OutputT> transform, InputT input) {
+ return delegate.apply(transform, input);
+ }
+
+ @Override
+ public FlinkRunnerResult run(Pipeline pipeline) {
+ return delegate.run(pipeline);
+ }
+
+ public PipelineOptions getPipelineOptions() {
+ return delegate.getPipelineOptions();
+ }
+}
+
+
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/58d66a34/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
index f015a66..edde925 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
@@ -60,7 +60,7 @@ public class FlinkTestPipeline extends Pipeline {
* @return The Test Pipeline.
*/
private static FlinkTestPipeline create(boolean streaming) {
- FlinkPipelineRunner flinkRunner = FlinkPipelineRunner.createForTest(streaming);
+ TestFlinkPipelineRunner flinkRunner = TestFlinkPipelineRunner.create(streaming);
return new FlinkTestPipeline(flinkRunner, flinkRunner.getPipelineOptions());
}
[12/14] incubator-beam git commit: Disable Flink streaming
integration tests for now
Posted by al...@apache.org.
Disable Flink streaming integration tests for now
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/2d71af71
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/2d71af71
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/2d71af71
Branch: refs/heads/master
Commit: 2d71af71c26992c0ccbf2ab6df8f2a0aef5e586b
Parents: 55f39bf
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri May 6 10:55:16 2016 -0700
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
runners/flink/runner/pom.xml | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/2d71af71/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index f94ce68..7e8c5a9 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -200,6 +200,7 @@
<goal>test</goal>
</goals>
<configuration>
+ <skip>true</skip>
<groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
<parallel>none</parallel>
<failIfNoTests>true</failIfNoTests>
[02/14] incubator-beam git commit: [BEAM-270] Support
Timestamps/Windows in Flink Batch
Posted by al...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/TfIdfITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/TfIdfITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/TfIdfITCase.java
deleted file mode 100644
index 547f3c3..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/TfIdfITCase.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.examples.complete.TfIdf;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringDelegateCoder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.Keys;
-import org.apache.beam.sdk.transforms.RemoveDuplicates;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.net.URI;
-
-
-public class TfIdfITCase extends JavaProgramTestBase {
-
- protected String resultPath;
-
- public TfIdfITCase(){
- }
-
- static final String[] EXPECTED_RESULT = new String[] {
- "a", "m", "n", "b", "c", "d"};
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
-
- Pipeline pipeline = FlinkTestPipeline.createForBatch();
-
- pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
-
- PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
- .apply(Create.of(
- KV.of(new URI("x"), "a b c d"),
- KV.of(new URI("y"), "a b c"),
- KV.of(new URI("z"), "a m n")))
- .apply(new TfIdf.ComputeTfIdf());
-
- PCollection<String> words = wordToUriAndTfIdf
- .apply(Keys.<String>create())
- .apply(RemoveDuplicates.<String>create());
-
- words.apply(TextIO.Write.to(resultPath));
-
- pipeline.run();
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountITCase.java
deleted file mode 100644
index 3254e78..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountITCase.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.examples.WordCount;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Arrays;
-import java.util.List;
-
-
-public class WordCountITCase extends JavaProgramTestBase {
-
- protected String resultPath;
-
- public WordCountITCase(){
- }
-
- static final String[] WORDS_ARRAY = new String[] {
- "hi there", "hi", "hi sue bob",
- "hi sue", "", "bob hi"};
-
- static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
-
- static final String[] COUNTS_ARRAY = new String[] {
- "hi: 5", "there: 1", "sue: 2", "bob: 2"};
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(COUNTS_ARRAY), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
-
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());
-
- input
- .apply(new WordCount.CountWords())
- .apply(MapElements.via(new WordCount.FormatAsTextFn()))
- .apply(TextIO.Write.to(resultPath));
-
- p.run();
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin2ITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin2ITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin2ITCase.java
deleted file mode 100644
index 6570e7d..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin2ITCase.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-
-public class WordCountJoin2ITCase extends JavaProgramTestBase {
-
- static final String[] WORDS_1 = new String[] {
- "hi there", "hi", "hi sue bob",
- "hi sue", "", "bob hi"};
-
- static final String[] WORDS_2 = new String[] {
- "hi tim", "beauty", "hooray sue bob",
- "hi there", "", "please say hi"};
-
- static final String[] RESULTS = new String[] {
- "beauty -> Tag1: Tag2: 1",
- "bob -> Tag1: 2 Tag2: 1",
- "hi -> Tag1: 5 Tag2: 3",
- "hooray -> Tag1: Tag2: 1",
- "please -> Tag1: Tag2: 1",
- "say -> Tag1: Tag2: 1",
- "sue -> Tag1: 2 Tag2: 1",
- "there -> Tag1: 1 Tag2: 1",
- "tim -> Tag1: Tag2: 1"
- };
-
- static final TupleTag<Long> tag1 = new TupleTag<>("Tag1");
- static final TupleTag<Long> tag2 = new TupleTag<>("Tag2");
-
- protected String resultPath;
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(RESULTS), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- /* Create two PCollections and join them */
- PCollection<KV<String,Long>> occurences1 = p.apply(Create.of(WORDS_1))
- .apply(ParDo.of(new ExtractWordsFn()))
- .apply(Count.<String>perElement());
-
- PCollection<KV<String,Long>> occurences2 = p.apply(Create.of(WORDS_2))
- .apply(ParDo.of(new ExtractWordsFn()))
- .apply(Count.<String>perElement());
-
- /* CoGroup the two collections */
- PCollection<KV<String, CoGbkResult>> mergedOccurences = KeyedPCollectionTuple
- .of(tag1, occurences1)
- .and(tag2, occurences2)
- .apply(CoGroupByKey.<String>create());
-
- /* Format output */
- mergedOccurences.apply(ParDo.of(new FormatCountsFn()))
- .apply(TextIO.Write.named("test").to(resultPath));
-
- p.run();
- }
-
-
- static class ExtractWordsFn extends DoFn<String, String> {
-
- @Override
- public void startBundle(Context c) {
- }
-
- @Override
- public void processElement(ProcessContext c) {
- // Split the line into words.
- String[] words = c.element().split("[^a-zA-Z']+");
-
- // Output each word encountered into the output PCollection.
- for (String word : words) {
- if (!word.isEmpty()) {
- c.output(word);
- }
- }
- }
- }
-
- static class FormatCountsFn extends DoFn<KV<String, CoGbkResult>, String> {
- @Override
- public void processElement(ProcessContext c) {
- CoGbkResult value = c.element().getValue();
- String key = c.element().getKey();
- String countTag1 = tag1.getId() + ": ";
- String countTag2 = tag2.getId() + ": ";
- for (Long count : value.getAll(tag1)) {
- countTag1 += count + " ";
- }
- for (Long count : value.getAll(tag2)) {
- countTag2 += count;
- }
- c.output(key + " -> " + countTag1 + countTag2);
- }
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin3ITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin3ITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin3ITCase.java
deleted file mode 100644
index 60dc74a..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin3ITCase.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-
-public class WordCountJoin3ITCase extends JavaProgramTestBase {
-
- static final String[] WORDS_1 = new String[] {
- "hi there", "hi", "hi sue bob",
- "hi sue", "", "bob hi"};
-
- static final String[] WORDS_2 = new String[] {
- "hi tim", "beauty", "hooray sue bob",
- "hi there", "", "please say hi"};
-
- static final String[] WORDS_3 = new String[] {
- "hi stephan", "beauty", "hooray big fabian",
- "hi yo", "", "please say hi"};
-
- static final String[] RESULTS = new String[] {
- "beauty -> Tag1: Tag2: 1 Tag3: 1",
- "bob -> Tag1: 2 Tag2: 1 Tag3: ",
- "hi -> Tag1: 5 Tag2: 3 Tag3: 3",
- "hooray -> Tag1: Tag2: 1 Tag3: 1",
- "please -> Tag1: Tag2: 1 Tag3: 1",
- "say -> Tag1: Tag2: 1 Tag3: 1",
- "sue -> Tag1: 2 Tag2: 1 Tag3: ",
- "there -> Tag1: 1 Tag2: 1 Tag3: ",
- "tim -> Tag1: Tag2: 1 Tag3: ",
- "stephan -> Tag1: Tag2: Tag3: 1",
- "yo -> Tag1: Tag2: Tag3: 1",
- "fabian -> Tag1: Tag2: Tag3: 1",
- "big -> Tag1: Tag2: Tag3: 1"
- };
-
- static final TupleTag<Long> tag1 = new TupleTag<>("Tag1");
- static final TupleTag<Long> tag2 = new TupleTag<>("Tag2");
- static final TupleTag<Long> tag3 = new TupleTag<>("Tag3");
-
- protected String resultPath;
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(RESULTS), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
-
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- /* Create two PCollections and join them */
- PCollection<KV<String,Long>> occurences1 = p.apply(Create.of(WORDS_1))
- .apply(ParDo.of(new ExtractWordsFn()))
- .apply(Count.<String>perElement());
-
- PCollection<KV<String,Long>> occurences2 = p.apply(Create.of(WORDS_2))
- .apply(ParDo.of(new ExtractWordsFn()))
- .apply(Count.<String>perElement());
-
- PCollection<KV<String,Long>> occurences3 = p.apply(Create.of(WORDS_3))
- .apply(ParDo.of(new ExtractWordsFn()))
- .apply(Count.<String>perElement());
-
- /* CoGroup the two collections */
- PCollection<KV<String, CoGbkResult>> mergedOccurences = KeyedPCollectionTuple
- .of(tag1, occurences1)
- .and(tag2, occurences2)
- .and(tag3, occurences3)
- .apply(CoGroupByKey.<String>create());
-
- /* Format output */
- mergedOccurences.apply(ParDo.of(new FormatCountsFn()))
- .apply(TextIO.Write.named("test").to(resultPath));
-
- p.run();
- }
-
-
- static class ExtractWordsFn extends DoFn<String, String> {
-
- @Override
- public void startBundle(Context c) {
- }
-
- @Override
- public void processElement(ProcessContext c) {
- // Split the line into words.
- String[] words = c.element().split("[^a-zA-Z']+");
-
- // Output each word encountered into the output PCollection.
- for (String word : words) {
- if (!word.isEmpty()) {
- c.output(word);
- }
- }
- }
- }
-
- static class FormatCountsFn extends DoFn<KV<String, CoGbkResult>, String> {
- @Override
- public void processElement(ProcessContext c) {
- CoGbkResult value = c.element().getValue();
- String key = c.element().getKey();
- String countTag1 = tag1.getId() + ": ";
- String countTag2 = tag2.getId() + ": ";
- String countTag3 = tag3.getId() + ": ";
- for (Long count : value.getAll(tag1)) {
- countTag1 += count + " ";
- }
- for (Long count : value.getAll(tag2)) {
- countTag2 += count + " ";
- }
- for (Long count : value.getAll(tag3)) {
- countTag3 += count;
- }
- c.output(key + " -> " + countTag1 + countTag2 + countTag3);
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java
index c76af65..3e5a17d 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java
@@ -44,6 +44,7 @@ import org.apache.beam.sdk.values.KV;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
+import org.apache.flink.streaming.util.StreamingMultipleProgramsTestBase;
import org.apache.flink.streaming.util.TestHarnessUtil;
import org.joda.time.Duration;
import org.joda.time.Instant;
@@ -53,7 +54,7 @@ import java.util.Collection;
import java.util.Comparator;
import java.util.concurrent.ConcurrentLinkedQueue;
-public class GroupAlsoByWindowTest {
+public class GroupAlsoByWindowTest extends StreamingMultipleProgramsTestBase {
private final Combine.CombineFn combiner = new Sum.SumIntegerFn();
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/util/JoinExamples.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/util/JoinExamples.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/util/JoinExamples.java
deleted file mode 100644
index e6b7f64..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/util/JoinExamples.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.util;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.BigQueryIO;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.options.Validation;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.api.services.bigquery.model.TableRow;
-
-/**
- * Copied from {@link org.apache.beam.examples.JoinExamples} because the code
- * is private there.
- */
-public class JoinExamples {
-
- // A 1000-row sample of the GDELT data here: gdelt-bq:full.events.
- private static final String GDELT_EVENTS_TABLE =
- "clouddataflow-readonly:samples.gdelt_sample";
- // A table that maps country codes to country names.
- private static final String COUNTRY_CODES =
- "gdelt-bq:full.crosswalk_geocountrycodetohuman";
-
- /**
- * Join two collections, using country code as the key.
- */
- public static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
- PCollection<TableRow> countryCodes) throws Exception {
-
- final TupleTag<String> eventInfoTag = new TupleTag<>();
- final TupleTag<String> countryInfoTag = new TupleTag<>();
-
- // transform both input collections to tuple collections, where the keys are country
- // codes in both cases.
- PCollection<KV<String, String>> eventInfo = eventsTable.apply(
- ParDo.of(new ExtractEventDataFn()));
- PCollection<KV<String, String>> countryInfo = countryCodes.apply(
- ParDo.of(new ExtractCountryInfoFn()));
-
- // country code 'key' -> CGBKR (<event info>, <country name>)
- PCollection<KV<String, CoGbkResult>> kvpCollection = KeyedPCollectionTuple
- .of(eventInfoTag, eventInfo)
- .and(countryInfoTag, countryInfo)
- .apply(CoGroupByKey.<String>create());
-
- // Process the CoGbkResult elements generated by the CoGroupByKey transform.
- // country code 'key' -> string of <event info>, <country name>
- PCollection<KV<String, String>> finalResultCollection =
- kvpCollection.apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
- @Override
- public void processElement(ProcessContext c) {
- KV<String, CoGbkResult> e = c.element();
- CoGbkResult val = e.getValue();
- String countryCode = e.getKey();
- String countryName;
- countryName = e.getValue().getOnly(countryInfoTag, "Kostas");
- for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
- // Generate a string that combines information from both collection values
- c.output(KV.of(countryCode, "Country name: " + countryName
- + ", Event info: " + eventInfo));
- }
- }
- }));
-
- // write to GCS
- return finalResultCollection
- .apply(ParDo.of(new DoFn<KV<String, String>, String>() {
- @Override
- public void processElement(ProcessContext c) {
- String outputstring = "Country code: " + c.element().getKey()
- + ", " + c.element().getValue();
- c.output(outputstring);
- }
- }));
- }
-
- /**
- * Examines each row (event) in the input table. Output a KV with the key the country
- * code of the event, and the value a string encoding event information.
- */
- static class ExtractEventDataFn extends DoFn<TableRow, KV<String, String>> {
- @Override
- public void processElement(ProcessContext c) {
- TableRow row = c.element();
- String countryCode = (String) row.get("ActionGeo_CountryCode");
- String sqlDate = (String) row.get("SQLDATE");
- String actor1Name = (String) row.get("Actor1Name");
- String sourceUrl = (String) row.get("SOURCEURL");
- String eventInfo = "Date: " + sqlDate + ", Actor1: " + actor1Name + ", url: " + sourceUrl;
- c.output(KV.of(countryCode, eventInfo));
- }
- }
-
-
- /**
- * Examines each row (country info) in the input table. Output a KV with the key the country
- * code, and the value the country name.
- */
- static class ExtractCountryInfoFn extends DoFn<TableRow, KV<String, String>> {
- @Override
- public void processElement(ProcessContext c) {
- TableRow row = c.element();
- String countryCode = (String) row.get("FIPSCC");
- String countryName = (String) row.get("HumanName");
- c.output(KV.of(countryCode, countryName));
- }
- }
-
-
- /**
- * Options supported by {@link JoinExamples}.
- * <p>
- * Inherits standard configuration options.
- */
- private interface Options extends PipelineOptions {
- @Description("Path of the file to write to")
- @Validation.Required
- String getOutput();
- void setOutput(String value);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
- Pipeline p = Pipeline.create(options);
- // the following two 'applys' create multiple inputs to our pipeline, one for each
- // of our two input sources.
- PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE));
- PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES));
- PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
- formattedResults.apply(TextIO.Write.to(options.getOutput()));
- p.run();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java
index 2ca7014..29240e7 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java
@@ -35,7 +35,7 @@ import java.util.List;
/**
* A UnionCoder encodes RawUnionValues.
*/
-class UnionCoder extends StandardCoder<RawUnionValue> {
+public class UnionCoder extends StandardCoder<RawUnionValue> {
// TODO: Think about how to integrate this with a schema object (i.e.
// a tuple of tuple tags).
/**
[10/14] incubator-beam git commit: Add hamcrest dependency to Flink
Runner
Posted by al...@apache.org.
Add hamcrest dependency to Flink Runner
Without it the RunnableOnService tests seem to not work
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/1664c96d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/1664c96d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/1664c96d
Branch: refs/heads/master
Commit: 1664c96db5951d74b5ab9a5850def1dbef8adea6
Parents: af8e988
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Fri May 6 09:38:55 2016 +0200
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
runners/flink/runner/pom.xml | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/1664c96d/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index 7e8c5a9..fda27a8 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -88,6 +88,22 @@
<!-- Test scoped -->
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>hamcrest-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-all</artifactId>
+ <scope>test</scope>
+ </dependency>
+
<!-- Depend on test jar to scan for RunnableOnService tests -->
<dependency>
<groupId>org.apache.beam</groupId>
@@ -124,12 +140,6 @@
<groupId>org.apache.flink</groupId>
<artifactId>flink-test-utils_2.10</artifactId>
<version>${flink.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.mockito</groupId>
- <artifactId>mockito-all</artifactId>
- <scope>test</scope>
</dependency>
<!-- Optional Pipeline Registration -->
<dependency>
[05/14] incubator-beam git commit: [BEAM-270] Support
Timestamps/Windows in Flink Batch
Posted by al...@apache.org.
[BEAM-270] Support Timestamps/Windows in Flink Batch
With this change we always use WindowedValue<T> for the underlying Flink
DataSets instead of just T. This allows us to support windowing as well.
This changes also a lot of other stuff enabled by the above:
- Use WindowedValue throughout
- Add proper translation for Window.into()
- Make side inputs window aware
- Make GroupByKey and Combine transformations window aware, this
includes support for merging windows. GroupByKey is implemented as a
Combine with a concatenating CombineFn, for simplicity
This removes Flink specific transformations for things that are handled
by builtin sources/sinks, among other things this:
- Removes special translation for AvroIO.Read/Write and
TextIO.Read/Write
- Removes special support for Write.Bound, this was not working properly
and is now handled by the Beam machinery that uses DoFns for this
- Removes special translation for binary Co-Group, the code was still
in there but was never used
- Removes ConsoleIO, this can be done using a DoFn
With this change all RunnableOnService tests run on Flink Batch.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/24bfca23
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/24bfca23
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/24bfca23
Branch: refs/heads/master
Commit: 24bfca230d5db3cb75dd0e30093a10f7523c1238
Parents: 4e60a49
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Tue May 10 13:53:03 2016 +0200
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
runners/flink/runner/pom.xml | 10 -
.../apache/beam/runners/flink/io/ConsoleIO.java | 82 --
.../FlinkBatchPipelineTranslator.java | 4 +-
.../FlinkBatchTransformTranslators.java | 846 ++++++++++++-------
.../FlinkBatchTranslationContext.java | 56 +-
.../FlinkStreamingTransformTranslators.java | 22 +-
.../FlinkStreamingTranslationContext.java | 29 +-
.../functions/FlinkAssignContext.java | 56 ++
.../functions/FlinkAssignWindows.java | 51 ++
.../FlinkCoGroupKeyedListAggregator.java | 61 --
.../functions/FlinkCreateFunction.java | 63 --
.../functions/FlinkDoFnFunction.java | 194 ++---
.../FlinkKeyedListAggregationFunction.java | 78 --
.../FlinkMergingNonShuffleReduceFunction.java | 238 ++++++
.../FlinkMergingPartialReduceFunction.java | 205 +++++
.../functions/FlinkMergingReduceFunction.java | 207 +++++
.../functions/FlinkMultiOutputDoFnFunction.java | 157 ++--
.../FlinkMultiOutputProcessContext.java | 176 ++++
.../FlinkMultiOutputPruningFunction.java | 25 +-
.../functions/FlinkNoElementAssignContext.java | 71 ++
.../functions/FlinkPartialReduceFunction.java | 171 +++-
.../functions/FlinkProcessContext.java | 324 +++++++
.../functions/FlinkReduceFunction.java | 174 +++-
.../functions/SideInputInitializer.java | 75 ++
.../flink/translation/functions/UnionCoder.java | 152 ----
.../translation/types/CoderTypeInformation.java | 21 +-
.../translation/types/CoderTypeSerializer.java | 14 +-
.../translation/types/KvCoderComperator.java | 102 ++-
.../types/KvCoderTypeInformation.java | 63 +-
.../types/VoidCoderTypeSerializer.java | 112 ---
.../wrappers/CombineFnAggregatorWrapper.java | 94 ---
.../SerializableFnAggregatorWrapper.java | 31 +-
.../translation/wrappers/SinkOutputFormat.java | 10 +-
.../translation/wrappers/SourceInputFormat.java | 18 +-
.../streaming/FlinkGroupByKeyWrapper.java | 10 +-
.../io/FlinkStreamingCreateFunction.java | 9 +-
.../apache/beam/runners/flink/AvroITCase.java | 129 ---
.../beam/runners/flink/FlattenizeITCase.java | 76 --
.../beam/runners/flink/JoinExamplesITCase.java | 102 ---
.../runners/flink/MaybeEmptyTestITCase.java | 66 --
.../runners/flink/ParDoMultiOutputITCase.java | 102 ---
.../beam/runners/flink/ReadSourceITCase.java | 14 +-
.../flink/RemoveDuplicatesEmptyITCase.java | 72 --
.../runners/flink/RemoveDuplicatesITCase.java | 73 --
.../beam/runners/flink/SideInputITCase.java | 70 --
.../apache/beam/runners/flink/TfIdfITCase.java | 80 --
.../beam/runners/flink/WordCountITCase.java | 77 --
.../runners/flink/WordCountJoin2ITCase.java | 140 ---
.../runners/flink/WordCountJoin3ITCase.java | 158 ----
.../flink/streaming/GroupAlsoByWindowTest.java | 3 +-
.../beam/runners/flink/util/JoinExamples.java | 161 ----
.../beam/sdk/transforms/join/UnionCoder.java | 2 +-
52 files changed, 2605 insertions(+), 2731 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index fda27a8..b29a5bf 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -191,16 +191,6 @@
]
</beamTestPipelineOptions>
</systemPropertyVariables>
- <excludes>
- <!-- Tests that use unsupported windowing -->
- <exclude>**/org/apache/beam/sdk/transforms/CombineTest.java</exclude>
- <exclude>**/org/apache/beam/sdk/transforms/GroupByKeyTest.java</exclude>
- <exclude>**/org/apache/beam/sdk/transforms/ViewTest.java</exclude>
- <exclude>**/org/apache/beam/sdk/transforms/join/CoGroupByKeyTest.java</exclude>
- <exclude>**/org/apache/beam/sdk/transforms/windowing/WindowTest.java</exclude>
- <exclude>**/org/apache/beam/sdk/transforms/windowing/WindowingTest.java</exclude>
- <exclude>**/org/apache/beam/sdk/util/ReshuffleTest.java</exclude>
- </excludes>
</configuration>
</execution>
<execution>
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/io/ConsoleIO.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/io/ConsoleIO.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/io/ConsoleIO.java
deleted file mode 100644
index 9c36c21..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/io/ConsoleIO.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.io;
-
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PDone;
-
-/**
- * Transform for printing the contents of a {@link org.apache.beam.sdk.values.PCollection}.
- * to standard output.
- *
- * This is Flink-specific and will only work when executed using the
- * {@link org.apache.beam.runners.flink.FlinkPipelineRunner}.
- */
-public class ConsoleIO {
-
- /**
- * A PTransform that writes a PCollection to a standard output.
- */
- public static class Write {
-
- /**
- * Returns a ConsoleIO.Write PTransform with a default step name.
- */
- public static Bound create() {
- return new Bound();
- }
-
- /**
- * Returns a ConsoleIO.Write PTransform with the given step name.
- */
- public static Bound named(String name) {
- return new Bound().named(name);
- }
-
- /**
- * A PTransform that writes a bounded PCollection to standard output.
- */
- public static class Bound extends PTransform<PCollection<?>, PDone> {
- private static final long serialVersionUID = 0;
-
- Bound() {
- super("ConsoleIO.Write");
- }
-
- Bound(String name) {
- super(name);
- }
-
- /**
- * Returns a new ConsoleIO.Write PTransform that's like this one but with the given
- * step
- * name. Does not modify this object.
- */
- public Bound named(String name) {
- return new Bound(name);
- }
-
- @Override
- public PDone apply(PCollection<?> input) {
- return PDone.in(input.getPipeline());
- }
- }
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
index 512b822..69c02a2 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
@@ -32,8 +32,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * FlinkBatchPipelineTranslator knows how to translate Pipeline objects into Flink Jobs.
- * This is based on {@link org.apache.beam.runners.dataflow.DataflowPipelineTranslator}
+ * {@link Pipeline.PipelineVisitor} for executing a {@link Pipeline} as a
+ * Flink batch job.
*/
public class FlinkBatchPipelineTranslator extends FlinkPipelineTranslator {
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
index 07785aa..8358807 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
@@ -17,23 +17,24 @@
*/
package org.apache.beam.runners.flink.translation;
-import org.apache.beam.runners.flink.io.ConsoleIO;
-import org.apache.beam.runners.flink.translation.functions.FlinkCoGroupKeyedListAggregator;
-import org.apache.beam.runners.flink.translation.functions.FlinkCreateFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkAssignWindows;
import org.apache.beam.runners.flink.translation.functions.FlinkDoFnFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkKeyedListAggregationFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingNonShuffleReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingPartialReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingReduceFunction;
import org.apache.beam.runners.flink.translation.functions.FlinkMultiOutputDoFnFunction;
import org.apache.beam.runners.flink.translation.functions.FlinkMultiOutputPruningFunction;
import org.apache.beam.runners.flink.translation.functions.FlinkPartialReduceFunction;
import org.apache.beam.runners.flink.translation.functions.FlinkReduceFunction;
-import org.apache.beam.runners.flink.translation.functions.UnionCoder;
import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
import org.apache.beam.runners.flink.translation.types.KvCoderTypeInformation;
import org.apache.beam.runners.flink.translation.wrappers.SinkOutputFormat;
import org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
import org.apache.beam.sdk.coders.VoidCoder;
import org.apache.beam.sdk.io.AvroIO;
import org.apache.beam.sdk.io.BoundedSource;
@@ -41,60 +42,63 @@ import org.apache.beam.sdk.io.Read;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.io.Write;
import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.CombineFnBase;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.Flatten;
import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGbkResultSchema;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.join.UnionCoder;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PValue;
import org.apache.beam.sdk.values.TupleTag;
-import com.google.api.client.util.Maps;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.api.common.functions.GroupReduceFunction;
+import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.operators.Keys;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
-import org.apache.flink.api.java.io.AvroInputFormat;
import org.apache.flink.api.java.io.AvroOutputFormat;
-import org.apache.flink.api.java.io.TextInputFormat;
-import org.apache.flink.api.java.operators.CoGroupOperator;
import org.apache.flink.api.java.operators.DataSink;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.GroupCombineOperator;
import org.apache.flink.api.java.operators.GroupReduceOperator;
import org.apache.flink.api.java.operators.Grouping;
+import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.operators.MapPartitionOperator;
+import org.apache.flink.api.java.operators.SingleInputUdfOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.core.fs.Path;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
- * Translators for transforming
- * Dataflow {@link org.apache.beam.sdk.transforms.PTransform}s to
- * Flink {@link org.apache.flink.api.java.DataSet}s.
+ * Translators for transforming {@link PTransform PTransforms} to
+ * Flink {@link DataSet DataSets}.
*/
public class FlinkBatchTransformTranslators {
@@ -103,113 +107,90 @@ public class FlinkBatchTransformTranslators {
// --------------------------------------------------------------------------------------------
@SuppressWarnings("rawtypes")
- private static final Map<Class<? extends PTransform>, FlinkBatchPipelineTranslator.BatchTransformTranslator> TRANSLATORS = new HashMap<>();
+ private static final Map<
+ Class<? extends PTransform>,
+ FlinkBatchPipelineTranslator.BatchTransformTranslator> TRANSLATORS = new HashMap<>();
- // register the known translators
static {
TRANSLATORS.put(View.CreatePCollectionView.class, new CreatePCollectionViewTranslatorBatch());
TRANSLATORS.put(Combine.PerKey.class, new CombinePerKeyTranslatorBatch());
- // we don't need this because we translate the Combine.PerKey directly
- //TRANSLATORS.put(Combine.GroupedValues.class, new CombineGroupedValuesTranslator());
-
- TRANSLATORS.put(Create.Values.class, new CreateTranslatorBatch());
+ TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslatorBatch());
TRANSLATORS.put(Flatten.FlattenPCollectionList.class, new FlattenPCollectionTranslatorBatch());
- // TODO we're currently ignoring windows here but that has to change in the future
- TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslatorBatch());
+ TRANSLATORS.put(Window.Bound.class, new WindowBoundTranslatorBatch());
- TRANSLATORS.put(ParDo.BoundMulti.class, new ParDoBoundMultiTranslatorBatch());
TRANSLATORS.put(ParDo.Bound.class, new ParDoBoundTranslatorBatch());
-
- TRANSLATORS.put(CoGroupByKey.class, new CoGroupByKeyTranslatorBatch());
-
- TRANSLATORS.put(AvroIO.Read.Bound.class, new AvroIOReadTranslatorBatch());
- TRANSLATORS.put(AvroIO.Write.Bound.class, new AvroIOWriteTranslatorBatch());
+ TRANSLATORS.put(ParDo.BoundMulti.class, new ParDoBoundMultiTranslatorBatch());
TRANSLATORS.put(Read.Bounded.class, new ReadSourceTranslatorBatch());
- TRANSLATORS.put(Write.Bound.class, new WriteSinkTranslatorBatch());
-
- TRANSLATORS.put(TextIO.Read.Bound.class, new TextIOReadTranslatorBatch());
- TRANSLATORS.put(TextIO.Write.Bound.class, new TextIOWriteTranslatorBatch());
-
- // Flink-specific
- TRANSLATORS.put(ConsoleIO.Write.Bound.class, new ConsoleIOWriteTranslatorBatch());
-
}
- public static FlinkBatchPipelineTranslator.BatchTransformTranslator<?> getTranslator(PTransform<?, ?> transform) {
+ public static FlinkBatchPipelineTranslator.BatchTransformTranslator<?> getTranslator(
+ PTransform<?, ?> transform) {
return TRANSLATORS.get(transform.getClass());
}
- private static class ReadSourceTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Read.Bounded<T>> {
+ private static class ReadSourceTranslatorBatch<T>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Read.Bounded<T>> {
@Override
public void translateNode(Read.Bounded<T> transform, FlinkBatchTranslationContext context) {
String name = transform.getName();
BoundedSource<T> source = transform.getSource();
PCollection<T> output = context.getOutput(transform);
- Coder<T> coder = output.getCoder();
- TypeInformation<T> typeInformation = context.getTypeInfo(output);
+ TypeInformation<WindowedValue<T>> typeInformation = context.getTypeInfo(output);
- DataSource<T> dataSource = new DataSource<>(context.getExecutionEnvironment(),
- new SourceInputFormat<>(source, context.getPipelineOptions()), typeInformation, name);
+ DataSource<WindowedValue<T>> dataSource = new DataSource<>(
+ context.getExecutionEnvironment(),
+ new SourceInputFormat<>(source, context.getPipelineOptions()),
+ typeInformation,
+ name);
context.setOutputDataSet(output, dataSource);
}
}
- private static class AvroIOReadTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<AvroIO.Read.Bound<T>> {
- private static final Logger LOG = LoggerFactory.getLogger(AvroIOReadTranslatorBatch.class);
+ private static class WriteSinkTranslatorBatch<T>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Write.Bound<T>> {
@Override
- public void translateNode(AvroIO.Read.Bound<T> transform, FlinkBatchTranslationContext context) {
- String path = transform.getFilepattern();
+ public void translateNode(Write.Bound<T> transform, FlinkBatchTranslationContext context) {
String name = transform.getName();
-// Schema schema = transform.getSchema();
- PValue output = context.getOutput(transform);
-
- TypeInformation<T> typeInformation = context.getTypeInfo(output);
-
- // This is super hacky, but unfortunately we cannot get the type otherwise
- Class<T> extractedAvroType;
- try {
- Field typeField = transform.getClass().getDeclaredField("type");
- typeField.setAccessible(true);
- @SuppressWarnings("unchecked")
- Class<T> avroType = (Class<T>) typeField.get(transform);
- extractedAvroType = avroType;
- } catch (NoSuchFieldException | IllegalAccessException e) {
- // we know that the field is there and it is accessible
- throw new RuntimeException("Could not access type from AvroIO.Bound", e);
- }
-
- DataSource<T> source = new DataSource<>(context.getExecutionEnvironment(),
- new AvroInputFormat<>(new Path(path), extractedAvroType),
- typeInformation, name);
+ PValue input = context.getInput(transform);
+ DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
- context.setOutputDataSet(output, source);
+ inputDataSet.output(new SinkOutputFormat<>(transform, context.getPipelineOptions()))
+ .name(name);
}
}
- private static class AvroIOWriteTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<AvroIO.Write.Bound<T>> {
+ private static class AvroIOWriteTranslatorBatch<T> implements
+ FlinkBatchPipelineTranslator.BatchTransformTranslator<AvroIO.Write.Bound<T>> {
private static final Logger LOG = LoggerFactory.getLogger(AvroIOWriteTranslatorBatch.class);
+
@Override
- public void translateNode(AvroIO.Write.Bound<T> transform, FlinkBatchTranslationContext context) {
- DataSet<T> inputDataSet = context.getInputDataSet(context.getInput(transform));
+ public void translateNode(
+ AvroIO.Write.Bound<T> transform,
+ FlinkBatchTranslationContext context) {
+ DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(context.getInput(transform));
+
String filenamePrefix = transform.getFilenamePrefix();
String filenameSuffix = transform.getFilenameSuffix();
int numShards = transform.getNumShards();
String shardNameTemplate = transform.getShardNameTemplate();
// TODO: Implement these. We need Flink support for this.
- LOG.warn("Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
+ LOG.warn(
+ "Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
filenameSuffix);
- LOG.warn("Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.", shardNameTemplate);
+ LOG.warn(
+ "Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.",
+ shardNameTemplate);
// This is super hacky, but unfortunately we cannot get the type otherwise
Class<T> extractedAvroType;
@@ -224,8 +205,17 @@ public class FlinkBatchTransformTranslators {
throw new RuntimeException("Could not access type from AvroIO.Bound", e);
}
- DataSink<T> dataSink = inputDataSet.output(new AvroOutputFormat<>(new Path
- (filenamePrefix), extractedAvroType));
+ MapOperator<WindowedValue<T>, T> valueStream = inputDataSet.map(
+ new MapFunction<WindowedValue<T>, T>() {
+ @Override
+ public T map(WindowedValue<T> value) throws Exception {
+ return value.getValue();
+ }
+ }).returns(new CoderTypeInformation<>(context.getInput(transform).getCoder()));
+
+
+ DataSink<T> dataSink = valueStream.output(
+ new AvroOutputFormat<>(new Path(filenamePrefix), extractedAvroType));
if (numShards > 0) {
dataSink.setParallelism(numShards);
@@ -233,37 +223,16 @@ public class FlinkBatchTransformTranslators {
}
}
- private static class TextIOReadTranslatorBatch implements FlinkBatchPipelineTranslator.BatchTransformTranslator<TextIO.Read.Bound<String>> {
- private static final Logger LOG = LoggerFactory.getLogger(TextIOReadTranslatorBatch.class);
-
- @Override
- public void translateNode(TextIO.Read.Bound<String> transform, FlinkBatchTranslationContext context) {
- String path = transform.getFilepattern();
- String name = transform.getName();
-
- TextIO.CompressionType compressionType = transform.getCompressionType();
- boolean needsValidation = transform.needsValidation();
-
- // TODO: Implement these. We need Flink support for this.
- LOG.warn("Translation of TextIO.CompressionType not yet supported. Is: {}.", compressionType);
- LOG.warn("Translation of TextIO.Read.needsValidation not yet supported. Is: {}.", needsValidation);
-
- PValue output = context.getOutput(transform);
-
- TypeInformation<String> typeInformation = context.getTypeInfo(output);
- DataSource<String> source = new DataSource<>(context.getExecutionEnvironment(), new TextInputFormat(new Path(path)), typeInformation, name);
-
- context.setOutputDataSet(output, source);
- }
- }
-
- private static class TextIOWriteTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<TextIO.Write.Bound<T>> {
+ private static class TextIOWriteTranslatorBatch<T>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<TextIO.Write.Bound<T>> {
private static final Logger LOG = LoggerFactory.getLogger(TextIOWriteTranslatorBatch.class);
@Override
- public void translateNode(TextIO.Write.Bound<T> transform, FlinkBatchTranslationContext context) {
+ public void translateNode(
+ TextIO.Write.Bound<T> transform,
+ FlinkBatchTranslationContext context) {
PValue input = context.getInput(transform);
- DataSet<T> inputDataSet = context.getInputDataSet(input);
+ DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
String filenamePrefix = transform.getFilenamePrefix();
String filenameSuffix = transform.getFilenameSuffix();
@@ -272,12 +241,25 @@ public class FlinkBatchTransformTranslators {
String shardNameTemplate = transform.getShardNameTemplate();
// TODO: Implement these. We need Flink support for this.
- LOG.warn("Translation of TextIO.Write.needsValidation not yet supported. Is: {}.", needsValidation);
- LOG.warn("Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.", filenameSuffix);
- LOG.warn("Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.", shardNameTemplate);
+ LOG.warn(
+ "Translation of TextIO.Write.needsValidation not yet supported. Is: {}.",
+ needsValidation);
+ LOG.warn(
+ "Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
+ filenameSuffix);
+ LOG.warn(
+ "Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.",
+ shardNameTemplate);
- //inputDataSet.print();
- DataSink<T> dataSink = inputDataSet.writeAsText(filenamePrefix);
+ MapOperator<WindowedValue<T>, T> valueStream = inputDataSet.map(
+ new MapFunction<WindowedValue<T>, T>() {
+ @Override
+ public T map(WindowedValue<T> value) throws Exception {
+ return value.getValue();
+ }
+ }).returns(new CoderTypeInformation<>(transform.getCoder()));
+
+ DataSink<T> dataSink = valueStream.writeAsText(filenamePrefix);
if (numShards > 0) {
dataSink.setParallelism(numShards);
@@ -285,148 +267,414 @@ public class FlinkBatchTransformTranslators {
}
}
- private static class ConsoleIOWriteTranslatorBatch implements FlinkBatchPipelineTranslator.BatchTransformTranslator<ConsoleIO.Write.Bound> {
+ private static class WindowBoundTranslatorBatch<T>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Window.Bound<T>> {
+
@Override
- public void translateNode(ConsoleIO.Write.Bound transform, FlinkBatchTranslationContext context) {
+ public void translateNode(Window.Bound<T> transform, FlinkBatchTranslationContext context) {
PValue input = context.getInput(transform);
- DataSet<?> inputDataSet = context.getInputDataSet(input);
- inputDataSet.printOnTaskManager(transform.getName());
+
+ TypeInformation<WindowedValue<T>> resultTypeInfo =
+ context.getTypeInfo(context.getOutput(transform));
+
+ DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
+
+ @SuppressWarnings("unchecked")
+ final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy =
+ (WindowingStrategy<T, ? extends BoundedWindow>)
+ context.getOutput(transform).getWindowingStrategy();
+
+ WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();
+
+ FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
+ new FlinkAssignWindows<>(windowFn);
+
+ DataSet<WindowedValue<T>> resultDataSet = inputDataSet
+ .flatMap(assignWindowsFunction)
+ .name(context.getOutput(transform).getName())
+ .returns(resultTypeInfo);
+
+ context.setOutputDataSet(context.getOutput(transform), resultDataSet);
}
}
- private static class WriteSinkTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Write.Bound<T>> {
+ private static class GroupByKeyTranslatorBatch<K, InputT>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<GroupByKey<K, InputT>> {
@Override
- public void translateNode(Write.Bound<T> transform, FlinkBatchTranslationContext context) {
- String name = transform.getName();
- PValue input = context.getInput(transform);
- DataSet<T> inputDataSet = context.getInputDataSet(input);
+ public void translateNode(
+ GroupByKey<K, InputT> transform,
+ FlinkBatchTranslationContext context) {
+
+ // for now, this is copied from the Combine.PerKey translater. Once we have the new runner API
+ // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn
+
+ DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+ context.getInputDataSet(context.getInput(transform));
+
+ Combine.KeyedCombineFn<K, InputT, List<InputT>, List<InputT>> combineFn =
+ new Concatenate<InputT>().asKeyedFn();
+
+ KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();
+
+ Coder<List<InputT>> accumulatorCoder;
+
+ try {
+ accumulatorCoder =
+ combineFn.getAccumulatorCoder(
+ context.getInput(transform).getPipeline().getCoderRegistry(),
+ inputCoder.getKeyCoder(),
+ inputCoder.getValueCoder());
+ } catch (CannotProvideCoderException e) {
+ throw new RuntimeException(e);
+ }
+
+ WindowingStrategy<?, ?> windowingStrategy =
+ context.getInput(transform).getWindowingStrategy();
+
+ TypeInformation<WindowedValue<KV<K, InputT>>> kvCoderTypeInformation =
+ new KvCoderTypeInformation<>(
+ WindowedValue.getFullCoder(
+ inputCoder,
+ windowingStrategy.getWindowFn().windowCoder()));
+
+ TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
+ new KvCoderTypeInformation<>(
+ WindowedValue.getFullCoder(
+ KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
+ windowingStrategy.getWindowFn().windowCoder()));
+
+ Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
+ new UnsortedGrouping<>(
+ inputDataSet,
+ new Keys.ExpressionKeys<>(new String[]{"key"},
+ kvCoderTypeInformation));
+
+ FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction;
+ FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction;
+
+ if (windowingStrategy.getWindowFn().isNonMerging()) {
+ @SuppressWarnings("unchecked")
+ WindowingStrategy<?, BoundedWindow> boundedStrategy =
+ (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
+
+ partialReduceFunction = new FlinkPartialReduceFunction<>(
+ combineFn,
+ boundedStrategy,
+ Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+ context.getPipelineOptions());
+
+ reduceFunction = new FlinkReduceFunction<>(
+ combineFn,
+ boundedStrategy,
+ Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+ context.getPipelineOptions());
+
+ } else {
+ if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
+ throw new UnsupportedOperationException(
+ "Merging WindowFn with windows other than IntervalWindow are not supported.");
+ }
+
+ @SuppressWarnings("unchecked")
+ WindowingStrategy<?, IntervalWindow> intervalStrategy =
+ (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
+
+ partialReduceFunction = new FlinkMergingPartialReduceFunction<>(
+ combineFn,
+ intervalStrategy,
+ Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+ context.getPipelineOptions());
+
+ reduceFunction = new FlinkMergingReduceFunction<>(
+ combineFn,
+ intervalStrategy,
+ Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+ context.getPipelineOptions());
+ }
+
+ // Partially GroupReduce the values into the intermediate format AccumT (combine)
+ GroupCombineOperator<
+ WindowedValue<KV<K, InputT>>,
+ WindowedValue<KV<K, List<InputT>>>> groupCombine =
+ new GroupCombineOperator<>(
+ inputGrouping,
+ partialReduceTypeInfo,
+ partialReduceFunction,
+ "GroupCombine: " + transform.getName());
+
+ Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
+ new UnsortedGrouping<>(
+ groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));
+
+ // Fully reduce the values and create output format VO
+ GroupReduceOperator<
+ WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet =
+ new GroupReduceOperator<>(
+ intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getName());
+
+ context.setOutputDataSet(context.getOutput(transform), outputDataSet);
- inputDataSet.output(new SinkOutputFormat<>(transform, context.getPipelineOptions())).name(name);
}
}
/**
- * Translates a GroupByKey while ignoring window assignments. Current ignores windows.
+ * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
+ *
+ * <p>For internal use to translate {@link GroupByKey}. For a large {@link PCollection} this
+ * is expected to crash!
+ *
+ * <p>This is copied from the dataflow runner code.
+ *
+ * @param <T> the type of elements to concatenate.
*/
- private static class GroupByKeyTranslatorBatch<K, V> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<GroupByKey<K, V>> {
+ private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
+ @Override
+ public List<T> createAccumulator() {
+ return new ArrayList<T>();
+ }
@Override
- public void translateNode(GroupByKey<K, V> transform, FlinkBatchTranslationContext context) {
- DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
- GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();
+ public List<T> addInput(List<T> accumulator, T input) {
+ accumulator.add(input);
+ return accumulator;
+ }
- TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));
+ @Override
+ public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+ List<T> result = createAccumulator();
+ for (List<T> accumulator : accumulators) {
+ result.addAll(accumulator);
+ }
+ return result;
+ }
- Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));
+ @Override
+ public List<T> extractOutput(List<T> accumulator) {
+ return accumulator;
+ }
- GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
- new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
+ @Override
+ public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+ return ListCoder.of(inputCoder);
+ }
- context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+ @Override
+ public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+ return ListCoder.of(inputCoder);
}
}
- private static class CombinePerKeyTranslatorBatch<K, VI, VA, VO> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Combine.PerKey<K, VI, VO>> {
+
+ private static class CombinePerKeyTranslatorBatch<K, InputT, AccumT, OutputT>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+ Combine.PerKey<K, InputT, OutputT>> {
@Override
- public void translateNode(Combine.PerKey<K, VI, VO> transform, FlinkBatchTranslationContext context) {
- DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(context.getInput(transform));
+ @SuppressWarnings("unchecked")
+ public void translateNode(
+ Combine.PerKey<K, InputT, OutputT> transform,
+ FlinkBatchTranslationContext context) {
+ DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+ context.getInputDataSet(context.getInput(transform));
- @SuppressWarnings("unchecked")
- Combine.KeyedCombineFn<K, VI, VA, VO> keyedCombineFn = (Combine.KeyedCombineFn<K, VI, VA, VO>) transform.getFn();
+ CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn =
+ (CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT>) transform.getFn();
+
+ KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();
- KvCoder<K, VI> inputCoder = (KvCoder<K, VI>) context.getInput(transform).getCoder();
+ Coder<AccumT> accumulatorCoder;
- Coder<VA> accumulatorCoder =
- null;
try {
- accumulatorCoder = keyedCombineFn.getAccumulatorCoder(context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder());
+ accumulatorCoder =
+ combineFn.getAccumulatorCoder(
+ context.getInput(transform).getPipeline().getCoderRegistry(),
+ inputCoder.getKeyCoder(),
+ inputCoder.getValueCoder());
} catch (CannotProvideCoderException e) {
- e.printStackTrace();
- // TODO
+ throw new RuntimeException(e);
}
- TypeInformation<KV<K, VI>> kvCoderTypeInformation = new KvCoderTypeInformation<>(inputCoder);
- TypeInformation<KV<K, VA>> partialReduceTypeInfo = new KvCoderTypeInformation<>(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder));
+ WindowingStrategy<?, ?> windowingStrategy =
+ context.getInput(transform).getWindowingStrategy();
+
+ TypeInformation<WindowedValue<KV<K, InputT>>> kvCoderTypeInformation =
+ new KvCoderTypeInformation<>(
+ WindowedValue.getFullCoder(
+ inputCoder,
+ windowingStrategy.getWindowFn().windowCoder()));
+
+ TypeInformation<WindowedValue<KV<K, AccumT>>> partialReduceTypeInfo =
+ new KvCoderTypeInformation<>(
+ WindowedValue.getFullCoder(
+ KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
+ windowingStrategy.getWindowFn().windowCoder()));
+
+ Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
+ new UnsortedGrouping<>(
+ inputDataSet,
+ new Keys.ExpressionKeys<>(new String[]{"key"},
+ kvCoderTypeInformation));
+
+ // construct a map from side input to WindowingStrategy so that
+ // the DoFn runner can map main-input windows to side input windows
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+ for (PCollectionView<?> sideInput: transform.getSideInputs()) {
+ sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+ }
- Grouping<KV<K, VI>> inputGrouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation));
+ if (windowingStrategy.getWindowFn().isNonMerging()) {
+ WindowingStrategy<?, BoundedWindow> boundedStrategy =
+ (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
+
+ FlinkPartialReduceFunction<K, InputT, AccumT, ?> partialReduceFunction =
+ new FlinkPartialReduceFunction<>(
+ combineFn,
+ boundedStrategy,
+ sideInputStrategies,
+ context.getPipelineOptions());
+
+ FlinkReduceFunction<K, AccumT, OutputT, ?> reduceFunction =
+ new FlinkReduceFunction<>(
+ combineFn,
+ boundedStrategy,
+ sideInputStrategies,
+ context.getPipelineOptions());
+
+ // Partially GroupReduce the values into the intermediate format AccumT (combine)
+ GroupCombineOperator<
+ WindowedValue<KV<K, InputT>>,
+ WindowedValue<KV<K, AccumT>>> groupCombine =
+ new GroupCombineOperator<>(
+ inputGrouping,
+ partialReduceTypeInfo,
+ partialReduceFunction,
+ "GroupCombine: " + transform.getName());
+
+ transformSideInputs(transform.getSideInputs(), groupCombine, context);
+
+ TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
+ context.getTypeInfo(context.getOutput(transform));
+
+ Grouping<WindowedValue<KV<K, AccumT>>> intermediateGrouping =
+ new UnsortedGrouping<>(
+ groupCombine,
+ new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));
+
+ // Fully reduce the values and create output format OutputT
+ GroupReduceOperator<
+ WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
+ new GroupReduceOperator<>(
+ intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());
+
+ transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+
+ context.setOutputDataSet(context.getOutput(transform), outputDataSet);
- FlinkPartialReduceFunction<K, VI, VA> partialReduceFunction = new FlinkPartialReduceFunction<>(keyedCombineFn);
+ } else {
+ if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
+ throw new UnsupportedOperationException(
+ "Merging WindowFn with windows other than IntervalWindow are not supported.");
+ }
- // Partially GroupReduce the values into the intermediate format VA (combine)
- GroupCombineOperator<KV<K, VI>, KV<K, VA>> groupCombine =
- new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction,
- "GroupCombine: " + transform.getName());
+ // for merging windows we can't to a pre-shuffle combine step since
+ // elements would not be in their correct windows for side-input access
- // Reduce fully to VO
- GroupReduceFunction<KV<K, VA>, KV<K, VO>> reduceFunction = new FlinkReduceFunction<>(keyedCombineFn);
+ WindowingStrategy<?, IntervalWindow> intervalStrategy =
+ (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
- TypeInformation<KV<K, VO>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform));
+ FlinkMergingNonShuffleReduceFunction<K, InputT, AccumT, OutputT, ?> reduceFunction =
+ new FlinkMergingNonShuffleReduceFunction<>(
+ combineFn,
+ intervalStrategy,
+ sideInputStrategies,
+ context.getPipelineOptions());
- Grouping<KV<K, VA>> intermediateGrouping = new UnsortedGrouping<>(groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));
+ TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
+ context.getTypeInfo(context.getOutput(transform));
+
+ Grouping<WindowedValue<KV<K, InputT>>> grouping =
+ new UnsortedGrouping<>(
+ inputDataSet,
+ new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation));
+
+ // Fully reduce the values and create output format OutputT
+ GroupReduceOperator<
+ WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
+ new GroupReduceOperator<>(
+ grouping, reduceTypeInfo, reduceFunction, transform.getName());
+
+ transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+
+ context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+ }
- // Fully reduce the values and create output format VO
- GroupReduceOperator<KV<K, VA>, KV<K, VO>> outputDataSet =
- new GroupReduceOperator<>(intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());
- context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
}
-// private static class CombineGroupedValuesTranslator<K, VI, VO> implements FlinkPipelineTranslator.TransformTranslator<Combine.GroupedValues<K, VI, VO>> {
-//
-// @Override
-// public void translateNode(Combine.GroupedValues<K, VI, VO> transform, TranslationContext context) {
-// DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(transform.getInput());
-//
-// Combine.KeyedCombineFn<? super K, ? super VI, ?, VO> keyedCombineFn = transform.getFn();
-//
-// GroupReduceFunction<KV<K, VI>, KV<K, VO>> groupReduceFunction = new FlinkCombineFunction<>(keyedCombineFn);
-//
-// TypeInformation<KV<K, VO>> typeInformation = context.getTypeInfo(transform.getOutput());
-//
-// Grouping<KV<K, VI>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{""}, inputDataSet.getType()));
-//
-// GroupReduceOperator<KV<K, VI>, KV<K, VO>> outputDataSet =
-// new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
-// context.setOutputDataSet(transform.getOutput(), outputDataSet);
-// }
-// }
-
- private static class ParDoBoundTranslatorBatch<IN, OUT> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<ParDo.Bound<IN, OUT>> {
- private static final Logger LOG = LoggerFactory.getLogger(ParDoBoundTranslatorBatch.class);
+ private static class ParDoBoundTranslatorBatch<InputT, OutputT>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+ ParDo.Bound<InputT, OutputT>> {
@Override
- public void translateNode(ParDo.Bound<IN, OUT> transform, FlinkBatchTranslationContext context) {
- DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform));
+ public void translateNode(
+ ParDo.Bound<InputT, OutputT> transform,
+ FlinkBatchTranslationContext context) {
+ DataSet<WindowedValue<InputT>> inputDataSet =
+ context.getInputDataSet(context.getInput(transform));
- final DoFn<IN, OUT> doFn = transform.getFn();
+ final DoFn<InputT, OutputT> doFn = transform.getFn();
- TypeInformation<OUT> typeInformation = context.getTypeInfo(context.getOutput(transform));
+ TypeInformation<WindowedValue<OutputT>> typeInformation =
+ context.getTypeInfo(context.getOutput(transform));
- FlinkDoFnFunction<IN, OUT> doFnWrapper = new FlinkDoFnFunction<>(doFn, context.getPipelineOptions());
- MapPartitionOperator<IN, OUT> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());
+ List<PCollectionView<?>> sideInputs = transform.getSideInputs();
- transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+ // construct a map from side input to WindowingStrategy so that
+ // the DoFn runner can map main-input windows to side input windows
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+ for (PCollectionView<?> sideInput: sideInputs) {
+ sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+ }
+
+ FlinkDoFnFunction<InputT, OutputT> doFnWrapper =
+ new FlinkDoFnFunction<>(
+ doFn,
+ context.getOutput(transform).getWindowingStrategy(),
+ sideInputStrategies,
+ context.getPipelineOptions());
+
+ MapPartitionOperator<WindowedValue<InputT>, WindowedValue<OutputT>> outputDataSet =
+ new MapPartitionOperator<>(
+ inputDataSet,
+ typeInformation,
+ doFnWrapper,
+ transform.getName());
+
+ transformSideInputs(sideInputs, outputDataSet, context);
context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
}
- private static class ParDoBoundMultiTranslatorBatch<IN, OUT> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<ParDo.BoundMulti<IN, OUT>> {
- private static final Logger LOG = LoggerFactory.getLogger(ParDoBoundMultiTranslatorBatch.class);
+ private static class ParDoBoundMultiTranslatorBatch<InputT, OutputT>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+ ParDo.BoundMulti<InputT, OutputT>> {
@Override
- public void translateNode(ParDo.BoundMulti<IN, OUT> transform, FlinkBatchTranslationContext context) {
- DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform));
+ public void translateNode(
+ ParDo.BoundMulti<InputT, OutputT> transform,
+ FlinkBatchTranslationContext context) {
+ DataSet<WindowedValue<InputT>> inputDataSet =
+ context.getInputDataSet(context.getInput(transform));
- final DoFn<IN, OUT> doFn = transform.getFn();
+ final DoFn<InputT, OutputT> doFn = transform.getFn();
Map<TupleTag<?>, PCollection<?>> outputs = context.getOutput(transform).getAll();
Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
- // put the main output at index 0, FlinkMultiOutputDoFnFunction also expects this
+ // put the main output at index 0, FlinkMultiOutputDoFnFunction expects this
outputMap.put(transform.getMainOutputTag(), 0);
int count = 1;
for (TupleTag<?> tag: outputs.keySet()) {
@@ -435,58 +683,118 @@ public class FlinkBatchTransformTranslators {
}
}
+ // assume that the windowing strategy is the same for all outputs
+ WindowingStrategy<?, ?> windowingStrategy = null;
+
// collect all output Coders and create a UnionCoder for our tagged outputs
List<Coder<?>> outputCoders = Lists.newArrayList();
for (PCollection<?> coll: outputs.values()) {
outputCoders.add(coll.getCoder());
+ windowingStrategy = coll.getWindowingStrategy();
+ }
+
+ if (windowingStrategy == null) {
+ throw new IllegalStateException("No outputs defined.");
}
UnionCoder unionCoder = UnionCoder.of(outputCoders);
- @SuppressWarnings("unchecked")
- TypeInformation<RawUnionValue> typeInformation = new CoderTypeInformation<>(unionCoder);
+ TypeInformation<WindowedValue<RawUnionValue>> typeInformation =
+ new CoderTypeInformation<>(
+ WindowedValue.getFullCoder(
+ unionCoder,
+ windowingStrategy.getWindowFn().windowCoder()));
- @SuppressWarnings("unchecked")
- FlinkMultiOutputDoFnFunction<IN, OUT> doFnWrapper = new FlinkMultiOutputDoFnFunction(doFn, context.getPipelineOptions(), outputMap);
- MapPartitionOperator<IN, RawUnionValue> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());
+ List<PCollectionView<?>> sideInputs = transform.getSideInputs();
- transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+ // construct a map from side input to WindowingStrategy so that
+ // the DoFn runner can map main-input windows to side input windows
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+ for (PCollectionView<?> sideInput: sideInputs) {
+ sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+ }
- for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
- TypeInformation<Object> outputType = context.getTypeInfo(output.getValue());
- int outputTag = outputMap.get(output.getKey());
- FlinkMultiOutputPruningFunction<Object> pruningFunction = new FlinkMultiOutputPruningFunction<>(outputTag);
- FlatMapOperator<RawUnionValue, Object> pruningOperator = new
- FlatMapOperator<>(outputDataSet, outputType,
- pruningFunction, output.getValue().getName());
- context.setOutputDataSet(output.getValue(), pruningOperator);
+ @SuppressWarnings("unchecked")
+ FlinkMultiOutputDoFnFunction<InputT, OutputT> doFnWrapper =
+ new FlinkMultiOutputDoFnFunction(
+ doFn,
+ windowingStrategy,
+ sideInputStrategies,
+ context.getPipelineOptions(),
+ outputMap);
+
+ MapPartitionOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>> taggedDataSet =
+ new MapPartitionOperator<>(
+ inputDataSet,
+ typeInformation,
+ doFnWrapper,
+ transform.getName());
+
+ transformSideInputs(sideInputs, taggedDataSet, context);
+ for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
+ pruneOutput(
+ taggedDataSet,
+ context,
+ outputMap.get(output.getKey()),
+ (PCollection) output.getValue());
}
}
+
+ private <T> void pruneOutput(
+ MapPartitionOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>> taggedDataSet,
+ FlinkBatchTranslationContext context,
+ int integerTag,
+ PCollection<T> collection) {
+ TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection);
+
+ FlinkMultiOutputPruningFunction<T> pruningFunction =
+ new FlinkMultiOutputPruningFunction<>(integerTag);
+
+ FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator =
+ new FlatMapOperator<>(
+ taggedDataSet,
+ outputType,
+ pruningFunction,
+ collection.getName());
+
+ context.setOutputDataSet(collection, pruningOperator);
+ }
}
- private static class FlattenPCollectionTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Flatten.FlattenPCollectionList<T>> {
+ private static class FlattenPCollectionTranslatorBatch<T>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+ Flatten.FlattenPCollectionList<T>> {
@Override
@SuppressWarnings("unchecked")
- public void translateNode(Flatten.FlattenPCollectionList<T> transform, FlinkBatchTranslationContext context) {
+ public void translateNode(
+ Flatten.FlattenPCollectionList<T> transform,
+ FlinkBatchTranslationContext context) {
+
List<PCollection<T>> allInputs = context.getInput(transform).getAll();
- DataSet<T> result = null;
+ DataSet<WindowedValue<T>> result = null;
+
if (allInputs.isEmpty()) {
+
// create an empty dummy source to satisfy downstream operations
// we cannot create an empty source in Flink, therefore we have to
// add the flatMap that simply never forwards the single element
DataSource<String> dummySource =
context.getExecutionEnvironment().fromElements("dummy");
- result = dummySource.flatMap(new FlatMapFunction<String, T>() {
+ result = dummySource.flatMap(new FlatMapFunction<String, WindowedValue<T>>() {
@Override
- public void flatMap(String s, Collector<T> collector) throws Exception {
+ public void flatMap(String s, Collector<WindowedValue<T>> collector) throws Exception {
// never return anything
}
- }).returns(new CoderTypeInformation<>((Coder<T>) VoidCoder.of()));
+ }).returns(
+ new CoderTypeInformation<>(
+ WindowedValue.getFullCoder(
+ (Coder<T>) VoidCoder.of(),
+ GlobalWindow.Coder.INSTANCE)));
} else {
for (PCollection<T> collection : allInputs) {
- DataSet<T> current = context.getInputDataSet(collection);
+ DataSet<WindowedValue<T>> current = context.getInputDataSet(collection);
if (result == null) {
result = current;
} else {
@@ -494,103 +802,47 @@ public class FlinkBatchTransformTranslators {
}
}
}
- context.setOutputDataSet(context.getOutput(transform), result);
- }
- }
- private static class CreatePCollectionViewTranslatorBatch<R, T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<View.CreatePCollectionView<R, T>> {
- @Override
- public void translateNode(View.CreatePCollectionView<R, T> transform, FlinkBatchTranslationContext context) {
- DataSet<T> inputDataSet = context.getInputDataSet(context.getInput(transform));
- PCollectionView<T> input = transform.apply(null);
- context.setSideInputDataSet(input, inputDataSet);
+ // insert a dummy filter, there seems to be a bug in Flink
+ // that produces duplicate elements after the union in some cases
+ // if we don't
+ result = result.filter(new FilterFunction<WindowedValue<T>>() {
+ @Override
+ public boolean filter(WindowedValue<T> tWindowedValue) throws Exception {
+ return true;
+ }
+ }).name("UnionFixFilter");
+ context.setOutputDataSet(context.getOutput(transform), result);
}
}
- private static class CreateTranslatorBatch<OUT> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Create.Values<OUT>> {
+ private static class CreatePCollectionViewTranslatorBatch<ElemT, ViewT>
+ implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+ View.CreatePCollectionView<ElemT, ViewT>> {
@Override
- public void translateNode(Create.Values<OUT> transform, FlinkBatchTranslationContext context) {
- TypeInformation<OUT> typeInformation = context.getOutputTypeInfo();
- Iterable<OUT> elements = transform.getElements();
-
- // we need to serialize the elements to byte arrays, since they might contain
- // elements that are not serializable by Java serialization. We deserialize them
- // in the FlatMap function using the Coder.
-
- List<byte[]> serializedElements = Lists.newArrayList();
- Coder<OUT> coder = context.getOutput(transform).getCoder();
- for (OUT element: elements) {
- ByteArrayOutputStream bao = new ByteArrayOutputStream();
- try {
- coder.encode(element, bao, Coder.Context.OUTER);
- serializedElements.add(bao.toByteArray());
- } catch (IOException e) {
- throw new RuntimeException("Could not serialize Create elements using Coder: " + e);
- }
- }
+ public void translateNode(
+ View.CreatePCollectionView<ElemT, ViewT> transform,
+ FlinkBatchTranslationContext context) {
+ DataSet<WindowedValue<ElemT>> inputDataSet =
+ context.getInputDataSet(context.getInput(transform));
- DataSet<Integer> initDataSet = context.getExecutionEnvironment().fromElements(1);
- FlinkCreateFunction<Integer, OUT> flatMapFunction = new FlinkCreateFunction<>(serializedElements, coder);
- FlatMapOperator<Integer, OUT> outputDataSet = new FlatMapOperator<>(initDataSet, typeInformation, flatMapFunction, transform.getName());
+ PCollectionView<ViewT> input = transform.getView();
- context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+ context.setSideInputDataSet(input, inputDataSet);
}
}
- private static void transformSideInputs(List<PCollectionView<?>> sideInputs,
- MapPartitionOperator<?, ?> outputDataSet,
- FlinkBatchTranslationContext context) {
+ private static void transformSideInputs(
+ List<PCollectionView<?>> sideInputs,
+ SingleInputUdfOperator<?, ?, ?> outputDataSet,
+ FlinkBatchTranslationContext context) {
// get corresponding Flink broadcast DataSets
- for(PCollectionView<?> input : sideInputs) {
+ for (PCollectionView<?> input : sideInputs) {
DataSet<?> broadcastSet = context.getSideInputDataSet(input);
outputDataSet.withBroadcastSet(broadcastSet, input.getTagInternal().getId());
}
}
-// Disabled because it depends on a pending pull request to the DataFlowSDK
- /**
- * Special composite transform translator. Only called if the CoGroup is two dimensional.
- * @param <K>
- */
- private static class CoGroupByKeyTranslatorBatch<K, V1, V2> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<CoGroupByKey<K>> {
-
- @Override
- public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
- KeyedPCollectionTuple<K> input = context.getInput(transform);
-
- CoGbkResultSchema schema = input.getCoGbkResultSchema();
- List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();
-
- KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
- KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);
-
- TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
- TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();
-
- PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
- PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();
-
- DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
- DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);
-
- TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();
-
- FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);
-
- Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
- Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());
-
- DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
- keySelector1, keySelector2,
- aggregator, typeInfo, null, transform.getName());
- context.setOutputDataSet(context.getOutput(transform), out);
- }
- }
-
- // --------------------------------------------------------------------------------------------
- // Miscellaneous
- // --------------------------------------------------------------------------------------------
-
private FlinkBatchTransformTranslators() {}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
index 501b1ea..ecc3a65 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
@@ -18,26 +18,28 @@
package org.apache.beam.runners.flink.translation;
import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.types.KvCoderTypeInformation;
import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.AppliedPTransform;
import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.PInput;
import org.apache.beam.sdk.values.POutput;
import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TypedPValue;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
-import org.apache.flink.api.java.typeutils.GenericTypeInfo;
import java.util.HashMap;
import java.util.Map;
+/**
+ * Helper for {@link FlinkBatchPipelineTranslator} and translators in
+ * {@link FlinkBatchTransformTranslators}.
+ */
public class FlinkBatchTranslationContext {
private final Map<PValue, DataSet<?>> dataSets;
@@ -81,13 +83,13 @@ public class FlinkBatchTranslationContext {
}
@SuppressWarnings("unchecked")
- public <T> DataSet<T> getInputDataSet(PValue value) {
+ public <T> DataSet<WindowedValue<T>> getInputDataSet(PValue value) {
// assume that the DataSet is used as an input if retrieved here
danglingDataSets.remove(value);
- return (DataSet<T>) dataSets.get(value);
+ return (DataSet<WindowedValue<T>>) dataSets.get(value);
}
- public void setOutputDataSet(PValue value, DataSet<?> set) {
+ public <T> void setOutputDataSet(PValue value, DataSet<WindowedValue<T>> set) {
if (!dataSets.containsKey(value)) {
dataSets.put(value, set);
danglingDataSets.put(value, set);
@@ -107,40 +109,32 @@ public class FlinkBatchTranslationContext {
return (DataSet<T>) broadcastDataSets.get(value);
}
- public void setSideInputDataSet(PCollectionView<?> value, DataSet<?> set) {
+ public <ViewT, ElemT> void setSideInputDataSet(
+ PCollectionView<ViewT> value,
+ DataSet<WindowedValue<ElemT>> set) {
if (!broadcastDataSets.containsKey(value)) {
broadcastDataSets.put(value, set);
}
}
-
- @SuppressWarnings("unchecked")
- public <T> TypeInformation<T> getTypeInfo(PInput output) {
- if (output instanceof TypedPValue) {
- Coder<?> outputCoder = ((TypedPValue) output).getCoder();
- if (outputCoder instanceof KvCoder) {
- return new KvCoderTypeInformation((KvCoder) outputCoder);
- } else {
- return new CoderTypeInformation(outputCoder);
- }
- }
- return new GenericTypeInfo<>((Class<T>)Object.class);
- }
-
- public <T> TypeInformation<T> getInputTypeInfo() {
- return getTypeInfo(currentTransform.getInput());
- }
- public <T> TypeInformation<T> getOutputTypeInfo() {
- return getTypeInfo((PValue) currentTransform.getOutput());
+ @SuppressWarnings("unchecked")
+ public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
+ Coder<T> valueCoder = collection.getCoder();
+ WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
+ WindowedValue.getFullCoder(
+ valueCoder,
+ collection.getWindowingStrategy().getWindowFn().windowCoder());
+
+ return new CoderTypeInformation<>(windowedValueCoder);
}
@SuppressWarnings("unchecked")
- <I extends PInput> I getInput(PTransform<I, ?> transform) {
- return (I) currentTransform.getInput();
+ <T extends PInput> T getInput(PTransform<T, ?> transform) {
+ return (T) currentTransform.getInput();
}
@SuppressWarnings("unchecked")
- <O extends POutput> O getOutput(PTransform<?, O> transform) {
- return (O) currentTransform.getOutput();
+ <T extends POutput> T getOutput(PTransform<?, T> transform) {
+ return (T) currentTransform.getOutput();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java
index 2778d5c..b3fed99 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java
@@ -18,7 +18,6 @@
package org.apache.beam.runners.flink.translation;
-import org.apache.beam.runners.flink.translation.functions.UnionCoder;
import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
import org.apache.beam.runners.flink.translation.types.FlinkCoder;
import org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat;
@@ -46,6 +45,7 @@ import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.join.UnionCoder;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
@@ -229,29 +229,15 @@ public class FlinkStreamingTransformTranslators {
BoundedSource<T> boundedSource = transform.getSource();
PCollection<T> output = context.getOutput(transform);
- Coder<T> defaultOutputCoder = boundedSource.getDefaultOutputCoder();
- CoderTypeInformation<T> typeInfo = new CoderTypeInformation<>(defaultOutputCoder);
+ TypeInformation<WindowedValue<T>> typeInfo = context.getTypeInfo(output);
- DataStream<T> source = context.getExecutionEnvironment().createInput(
+ DataStream<WindowedValue<T>> source = context.getExecutionEnvironment().createInput(
new SourceInputFormat<>(
boundedSource,
context.getPipelineOptions()),
typeInfo);
- DataStream<WindowedValue<T>> windowedStream = source.flatMap(
- new FlatMapFunction<T, WindowedValue<T>>() {
- @Override
- public void flatMap(T value, Collector<WindowedValue<T>> out) throws Exception {
- out.collect(
- WindowedValue.of(value,
- Instant.now(),
- GlobalWindow.INSTANCE,
- PaneInfo.NO_FIRING));
- }
- })
- .assignTimestampsAndWatermarks(new IngestionTimeExtractor<WindowedValue<T>>());
-
- context.setOutputDataStream(output, windowedStream);
+ context.setOutputDataStream(output, source);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
index 8bc7317..0cb80ba 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
@@ -17,21 +17,30 @@
*/
package org.apache.beam.runners.flink.translation;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.AppliedPTransform;
import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PInput;
import org.apache.beam.sdk.values.POutput;
import org.apache.beam.sdk.values.PValue;
import com.google.common.base.Preconditions;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.util.HashMap;
import java.util.Map;
+/**
+ * Helper for keeping track of which {@link DataStream DataStreams} map
+ * to which {@link PTransform PTransforms}.
+ */
public class FlinkStreamingTranslationContext {
private final StreamExecutionEnvironment env;
@@ -80,12 +89,24 @@ public class FlinkStreamingTranslationContext {
}
@SuppressWarnings("unchecked")
- public <I extends PInput> I getInput(PTransform<I, ?> transform) {
- return (I) currentTransform.getInput();
+ public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
+ Coder<T> valueCoder = collection.getCoder();
+ WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
+ WindowedValue.getFullCoder(
+ valueCoder,
+ collection.getWindowingStrategy().getWindowFn().windowCoder());
+
+ return new CoderTypeInformation<>(windowedValueCoder);
+ }
+
+
+ @SuppressWarnings("unchecked")
+ public <T extends PInput> T getInput(PTransform<T, ?> transform) {
+ return (T) currentTransform.getInput();
}
@SuppressWarnings("unchecked")
- public <O extends POutput> O getOutput(PTransform<?, O> transform) {
- return (O) currentTransform.getOutput();
+ public <T extends POutput> T getOutput(PTransform<?, T> transform) {
+ return (T) currentTransform.getOutput();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
new file mode 100644
index 0000000..7ea8c20
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * {@link org.apache.beam.sdk.transforms.windowing.WindowFn.AssignContext} for
+ * Flink functions.
+ */
+class FlinkAssignContext<InputT, W extends BoundedWindow>
+ extends WindowFn<InputT, W>.AssignContext {
+ private final WindowedValue<InputT> value;
+
+ FlinkAssignContext(WindowFn<InputT, W> fn, WindowedValue<InputT> value) {
+ fn.super();
+ this.value = value;
+ }
+
+ @Override
+ public InputT element() {
+ return value.getValue();
+ }
+
+ @Override
+ public Instant timestamp() {
+ return value.getTimestamp();
+ }
+
+ @Override
+ public Collection<? extends BoundedWindow> windows() {
+ return value.getWindows();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
new file mode 100644
index 0000000..e07e49a
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+
+import java.util.Collection;
+
+/**
+ * Flink {@link FlatMapFunction} for implementing
+ * {@link org.apache.beam.sdk.transforms.windowing.Window.Bound}.
+ */
+public class FlinkAssignWindows<T, W extends BoundedWindow>
+ implements FlatMapFunction<WindowedValue<T>, WindowedValue<T>> {
+
+ private final WindowFn<T, W> windowFn;
+
+ public FlinkAssignWindows(WindowFn<T, W> windowFn) {
+ this.windowFn = windowFn;
+ }
+
+ @Override
+ public void flatMap(
+ WindowedValue<T> input, Collector<WindowedValue<T>> collector) throws Exception {
+ Collection<W> windows = windowFn.assignWindows(new FlinkAssignContext<>(windowFn, input));
+ for (W window: windows) {
+ collector.collect(
+ WindowedValue.of(input.getValue(), input.getTimestamp(), window, input.getPane()));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCoGroupKeyedListAggregator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCoGroupKeyedListAggregator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCoGroupKeyedListAggregator.java
deleted file mode 100644
index 8e7cdd7..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCoGroupKeyedListAggregator.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGbkResultSchema;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.TupleTag;
-
-import org.apache.flink.api.common.functions.CoGroupFunction;
-import org.apache.flink.util.Collector;
-
-import java.util.ArrayList;
-import java.util.List;
-
-
-public class FlinkCoGroupKeyedListAggregator<K,V1,V2> implements CoGroupFunction<KV<K,V1>, KV<K,V2>, KV<K, CoGbkResult>>{
-
- private CoGbkResultSchema schema;
- private TupleTag<?> tupleTag1;
- private TupleTag<?> tupleTag2;
-
- public FlinkCoGroupKeyedListAggregator(CoGbkResultSchema schema, TupleTag<?> tupleTag1, TupleTag<?> tupleTag2) {
- this.schema = schema;
- this.tupleTag1 = tupleTag1;
- this.tupleTag2 = tupleTag2;
- }
-
- @Override
- public void coGroup(Iterable<KV<K,V1>> first, Iterable<KV<K,V2>> second, Collector<KV<K, CoGbkResult>> out) throws Exception {
- K k = null;
- List<RawUnionValue> result = new ArrayList<>();
- int index1 = schema.getIndex(tupleTag1);
- for (KV<K,?> entry : first) {
- k = entry.getKey();
- result.add(new RawUnionValue(index1, entry.getValue()));
- }
- int index2 = schema.getIndex(tupleTag2);
- for (KV<K,?> entry : second) {
- k = entry.getKey();
- result.add(new RawUnionValue(index2, entry.getValue()));
- }
- out.collect(KV.of(k, new CoGbkResult(schema, result)));
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCreateFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCreateFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCreateFunction.java
deleted file mode 100644
index e5ac748..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCreateFunction.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.runners.flink.translation.types.VoidCoderTypeSerializer;
-import org.apache.beam.sdk.coders.Coder;
-
-import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.util.Collector;
-
-import java.io.ByteArrayInputStream;
-import java.util.List;
-
-/**
- * This is a hack for transforming a {@link org.apache.beam.sdk.transforms.Create}
- * operation. Flink does not allow {@code null} in it's equivalent operation:
- * {@link org.apache.flink.api.java.ExecutionEnvironment#fromElements(Object[])}. Therefore
- * we use a DataSource with one dummy element and output the elements of the Create operation
- * inside this FlatMap.
- */
-public class FlinkCreateFunction<IN, OUT> implements FlatMapFunction<IN, OUT> {
-
- private final List<byte[]> elements;
- private final Coder<OUT> coder;
-
- public FlinkCreateFunction(List<byte[]> elements, Coder<OUT> coder) {
- this.elements = elements;
- this.coder = coder;
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public void flatMap(IN value, Collector<OUT> out) throws Exception {
-
- for (byte[] element : elements) {
- ByteArrayInputStream bai = new ByteArrayInputStream(element);
- OUT outValue = coder.decode(bai, Coder.Context.OUTER);
- if (outValue == null) {
- // TODO Flink doesn't allow null values in records
- out.collect((OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE);
- } else {
- out.collect(outValue);
- }
- }
-
- out.close();
- }
-}
[03/14] incubator-beam git commit: [BEAM-270] Support
Timestamps/Windows in Flink Batch
Posted by al...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
index 43e458f..9cbc6b9 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
@@ -17,43 +17,179 @@
*/
package org.apache.beam.runners.flink.translation.functions;
-import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
-import org.apache.flink.api.common.functions.GroupReduceFunction;
+import org.apache.flink.api.common.functions.RichGroupReduceFunction;
import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
/**
- * Flink {@link org.apache.flink.api.common.functions.GroupReduceFunction} for executing a
- * {@link org.apache.beam.sdk.transforms.Combine.PerKey} operation. This reads the input
- * {@link org.apache.beam.sdk.values.KV} elements, extracts the key and merges the
- * accumulators resulting from the PartialReduce which produced the input VA.
+ * This is the second part for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
+ * on Flink, the second part is {@link FlinkReduceFunction}. This function performs the final
+ * combination of the pre-combined values after a shuffle.
+ *
+ * <p>The input to {@link #reduce(Iterable, Collector)} are elements of the same key but
+ * for different windows. We have to ensure that we only combine elements of matching
+ * windows.
*/
-public class FlinkReduceFunction<K, VA, VO> implements GroupReduceFunction<KV<K, VA>, KV<K, VO>> {
+public class FlinkReduceFunction<K, AccumT, OutputT, W extends BoundedWindow>
+ extends RichGroupReduceFunction<WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> {
+
+ protected final CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> combineFn;
+
+ protected final DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn;
+
+ protected final WindowingStrategy<?, W> windowingStrategy;
+
+ protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+ protected final SerializedPipelineOptions serializedOptions;
- private final Combine.KeyedCombineFn<K, ?, VA, VO> keyedCombineFn;
+ public FlinkReduceFunction(
+ CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
+ WindowingStrategy<?, W> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+ PipelineOptions pipelineOptions) {
- public FlinkReduceFunction(Combine.KeyedCombineFn<K, ?, VA, VO> keyedCombineFn) {
- this.keyedCombineFn = keyedCombineFn;
+ this.combineFn = keyedCombineFn;
+
+ this.windowingStrategy = windowingStrategy;
+ this.sideInputs = sideInputs;
+
+ this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+ // dummy DoFn because we need one for ProcessContext
+ this.doFn = new DoFn<KV<K, AccumT>, KV<K, OutputT>>() {
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+
+ }
+ };
}
@Override
- public void reduce(Iterable<KV<K, VA>> values, Collector<KV<K, VO>> out) throws Exception {
- Iterator<KV<K, VA>> it = values.iterator();
+ public void reduce(
+ Iterable<WindowedValue<KV<K, AccumT>>> elements,
+ Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+ FlinkProcessContext<KV<K, AccumT>, KV<K, OutputT>> processContext =
+ new FlinkProcessContext<>(
+ serializedOptions.getPipelineOptions(),
+ getRuntimeContext(),
+ doFn,
+ windowingStrategy,
+ out,
+ sideInputs);
+
+ PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
+ PerKeyCombineFnRunners.create(combineFn);
- KV<K, VA> current = it.next();
- K k = current.getKey();
- VA accumulator = current.getValue();
+ @SuppressWarnings("unchecked")
+ OutputTimeFn<? super BoundedWindow> outputTimeFn =
+ (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
- while (it.hasNext()) {
- current = it.next();
- keyedCombineFn.mergeAccumulators(k, ImmutableList.of(accumulator, current.getValue()) );
+
+ // get all elements so that we can sort them, has to fit into
+ // memory
+ // this seems very unprudent, but correct, for now
+ ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
+ for (WindowedValue<KV<K, AccumT>> inputValue: elements) {
+ for (WindowedValue<KV<K, AccumT>> exploded: inputValue.explodeWindows()) {
+ sortedInput.add(exploded);
+ }
+ }
+ Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
+ @Override
+ public int compare(
+ WindowedValue<KV<K, AccumT>> o1,
+ WindowedValue<KV<K, AccumT>> o2) {
+ return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+ .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+ }
+ });
+
+ // iterate over the elements that are sorted by window timestamp
+ //
+ final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
+
+ // get the first accumulator
+ WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
+ K key = currentValue.getValue().getKey();
+ BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
+ AccumT accumulator = currentValue.getValue().getValue();
+
+ // we use this to keep track of the timestamps assigned by the OutputTimeFn,
+ // in FlinkPartialReduceFunction we already merge the timestamps assigned
+ // to individual elements, here we just merge them
+ List<Instant> windowTimestamps = new ArrayList<>();
+ windowTimestamps.add(currentValue.getTimestamp());
+
+ while (iterator.hasNext()) {
+ WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
+ BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
+
+ if (nextWindow.equals(currentWindow)) {
+ // continue accumulating
+ processContext = processContext.forWindowedValue(nextValue);
+ accumulator = combineFnRunner.mergeAccumulators(
+ key, ImmutableList.of(accumulator, nextValue.getValue().getValue()), processContext);
+
+ windowTimestamps.add(nextValue.getTimestamp());
+ } else {
+ // emit the value that we currently have
+ processContext = processContext.forWindowedValue(currentValue);
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+ outputTimeFn.merge(currentWindow, windowTimestamps),
+ currentWindow,
+ PaneInfo.NO_FIRING));
+
+ windowTimestamps.clear();
+
+ currentWindow = nextWindow;
+ accumulator = nextValue.getValue().getValue();
+ windowTimestamps.add(nextValue.getTimestamp());
+ }
+
+ // we have to keep track so that we can set the context to the right
+ // windowed value when windows change in the iterable
+ currentValue = nextValue;
}
- out.collect(KV.of(k, keyedCombineFn.extractOutput(k, accumulator)));
+ // if at the end of the iteration we have a change in windows
+ // the ProcessContext will not have been updated
+ processContext = processContext.forWindowedValue(currentValue);
+
+ // emit the final accumulator
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+ outputTimeFn.merge(currentWindow, windowTimestamps),
+ currentWindow,
+ PaneInfo.NO_FIRING));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
new file mode 100644
index 0000000..451b31b
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@link BroadcastVariableInitializer} that initializes the broadcast input as a {@code Map}
+ * from window to side input.
+ */
+public class SideInputInitializer<ElemT, ViewT, W extends BoundedWindow>
+ implements BroadcastVariableInitializer<WindowedValue<ElemT>, Map<BoundedWindow, ViewT>> {
+
+ PCollectionView<ViewT> view;
+
+ public SideInputInitializer(PCollectionView<ViewT> view) {
+ this.view = view;
+ }
+
+ @Override
+ public Map<BoundedWindow, ViewT> initializeBroadcastVariable(
+ Iterable<WindowedValue<ElemT>> inputValues) {
+
+ // first partition into windows
+ Map<BoundedWindow, List<WindowedValue<ElemT>>> partitionedElements = new HashMap<>();
+ for (WindowedValue<ElemT> value: inputValues) {
+ for (BoundedWindow window: value.getWindows()) {
+ List<WindowedValue<ElemT>> windowedValues = partitionedElements.get(window);
+ if (windowedValues == null) {
+ windowedValues = new ArrayList<>();
+ partitionedElements.put(window, windowedValues);
+ }
+ windowedValues.add(value);
+ }
+ }
+
+ Map<BoundedWindow, ViewT> resultMap = new HashMap<>();
+
+ for (Map.Entry<BoundedWindow, List<WindowedValue<ElemT>>> elements:
+ partitionedElements.entrySet()) {
+
+ @SuppressWarnings("unchecked")
+ Iterable<WindowedValue<?>> elementsIterable =
+ (List<WindowedValue<?>>) (List<?>) elements.getValue();
+
+ resultMap.put(elements.getKey(), view.fromIterableInternal(elementsIterable));
+ }
+
+ return resultMap;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/UnionCoder.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/UnionCoder.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/UnionCoder.java
deleted file mode 100644
index cc6fd8b..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/UnionCoder.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StandardCoder;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.util.PropertyNames;
-import org.apache.beam.sdk.util.VarInt;
-import org.apache.beam.sdk.util.common.ElementByteSizeObserver;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.List;
-
-/**
- * A UnionCoder encodes RawUnionValues.
- *
- * This file copied from {@link org.apache.beam.sdk.transforms.join.UnionCoder}
- */
-@SuppressWarnings("serial")
-public class UnionCoder extends StandardCoder<RawUnionValue> {
- // TODO: Think about how to integrate this with a schema object (i.e.
- // a tuple of tuple tags).
- /**
- * Builds a union coder with the given list of element coders. This list
- * corresponds to a mapping of union tag to Coder. Union tags start at 0.
- */
- public static UnionCoder of(List<Coder<?>> elementCoders) {
- return new UnionCoder(elementCoders);
- }
-
- @JsonCreator
- public static UnionCoder jsonOf(
- @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
- List<Coder<?>> elements) {
- return UnionCoder.of(elements);
- }
-
- private int getIndexForEncoding(RawUnionValue union) {
- if (union == null) {
- throw new IllegalArgumentException("cannot encode a null tagged union");
- }
- int index = union.getUnionTag();
- if (index < 0 || index >= elementCoders.size()) {
- throw new IllegalArgumentException(
- "union value index " + index + " not in range [0.." +
- (elementCoders.size() - 1) + "]");
- }
- return index;
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public void encode(
- RawUnionValue union,
- OutputStream outStream,
- Context context)
- throws IOException {
- int index = getIndexForEncoding(union);
- // Write out the union tag.
- VarInt.encode(index, outStream);
-
- // Write out the actual value.
- Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
- coder.encode(
- union.getValue(),
- outStream,
- context);
- }
-
- @Override
- public RawUnionValue decode(InputStream inStream, Context context)
- throws IOException {
- int index = VarInt.decodeInt(inStream);
- Object value = elementCoders.get(index).decode(inStream, context);
- return new RawUnionValue(index, value);
- }
-
- @Override
- public List<? extends Coder<?>> getCoderArguments() {
- return null;
- }
-
- @Override
- public List<? extends Coder<?>> getComponents() {
- return elementCoders;
- }
-
- /**
- * Since this coder uses elementCoders.get(index) and coders that are known to run in constant
- * time, we defer the return value to that coder.
- */
- @Override
- public boolean isRegisterByteSizeObserverCheap(RawUnionValue union, Context context) {
- int index = getIndexForEncoding(union);
- @SuppressWarnings("unchecked")
- Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
- return coder.isRegisterByteSizeObserverCheap(union.getValue(), context);
- }
-
- /**
- * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder.
- */
- @Override
- public void registerByteSizeObserver(
- RawUnionValue union, ElementByteSizeObserver observer, Context context)
- throws Exception {
- int index = getIndexForEncoding(union);
- // Write out the union tag.
- observer.update(VarInt.getLength(index));
- // Write out the actual value.
- @SuppressWarnings("unchecked")
- Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
- coder.registerByteSizeObserver(union.getValue(), observer, context);
- }
-
- /////////////////////////////////////////////////////////////////////////////
-
- private final List<Coder<?>> elementCoders;
-
- private UnionCoder(List<Coder<?>> elementCoders) {
- this.elementCoders = elementCoders;
- }
-
- @Override
- public void verifyDeterministic() throws NonDeterministicException {
- verifyDeterministic(
- "UnionCoder is only deterministic if all element coders are",
- elementCoders);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
index 895ecef..4434cf8 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
@@ -18,7 +18,8 @@
package org.apache.beam.runners.flink.translation.types;
import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.util.WindowedValue;
import com.google.common.base.Preconditions;
@@ -71,9 +72,6 @@ public class CoderTypeInformation<T> extends TypeInformation<T> implements Atomi
@Override
@SuppressWarnings("unchecked")
public TypeSerializer<T> createSerializer(ExecutionConfig config) {
- if (coder instanceof VoidCoder) {
- return (TypeSerializer<T>) new VoidCoderTypeSerializer();
- }
return new CoderTypeSerializer<>(coder);
}
@@ -84,8 +82,12 @@ public class CoderTypeInformation<T> extends TypeInformation<T> implements Atomi
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
CoderTypeInformation that = (CoderTypeInformation) o;
@@ -113,6 +115,11 @@ public class CoderTypeInformation<T> extends TypeInformation<T> implements Atomi
@Override
public TypeComparator<T> createComparator(boolean sortOrderAscending, ExecutionConfig
executionConfig) {
- return new CoderComparator<>(coder);
+ WindowedValue.WindowedValueCoder windowCoder = (WindowedValue.WindowedValueCoder) coder;
+ if (windowCoder.getValueCoder() instanceof KvCoder) {
+ return new KvCoderComperator(windowCoder);
+ } else {
+ return new CoderComparator<>(coder);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
index c6f3921..097316b 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
@@ -33,7 +33,7 @@ import java.io.ObjectInputStream;
/**
* Flink {@link org.apache.flink.api.common.typeutils.TypeSerializer} for
- * Dataflow {@link org.apache.beam.sdk.coders.Coder}s
+ * Dataflow {@link org.apache.beam.sdk.coders.Coder Coders}.
*/
public class CoderTypeSerializer<T> extends TypeSerializer<T> {
@@ -128,14 +128,20 @@ public class CoderTypeSerializer<T> extends TypeSerializer<T> {
}
@Override
- public void copy(DataInputView dataInputView, DataOutputView dataOutputView) throws IOException {
+ public void copy(
+ DataInputView dataInputView,
+ DataOutputView dataOutputView) throws IOException {
serialize(deserialize(dataInputView), dataOutputView);
}
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
CoderTypeSerializer that = (CoderTypeSerializer) o;
return coder.equals(that.coder);
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java
index 6f0c651..79b127d 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java
@@ -20,6 +20,8 @@ package org.apache.beam.runners.flink.translation.types;
import org.apache.beam.runners.flink.translation.wrappers.DataInputViewWrapper;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.KV;
import org.apache.flink.api.common.typeutils.TypeComparator;
@@ -31,14 +33,13 @@ import java.io.IOException;
import java.io.ObjectInputStream;
/**
- * Flink {@link org.apache.flink.api.common.typeutils.TypeComparator} for
- * {@link org.apache.beam.sdk.coders.KvCoder}. We have a special comparator
+ * Flink {@link TypeComparator} for {@link KvCoder}. We have a special comparator
* for {@link KV} that always compares on the key only.
*/
-public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
+public class KvCoderComperator <K, V> extends TypeComparator<WindowedValue<KV<K, V>>> {
- private KvCoder<K, V> coder;
- private Coder<K> keyCoder;
+ private final WindowedValue.WindowedValueCoder<KV<K, V>> coder;
+ private final Coder<K> keyCoder;
// We use these for internal encoding/decoding for creating copies and comparing
// serialized forms using a Coder
@@ -52,9 +53,10 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
// For deserializing the key
private transient DataInputViewWrapper inputWrapper;
- public KvCoderComperator(KvCoder<K, V> coder) {
+ public KvCoderComperator(WindowedValue.WindowedValueCoder<KV<K, V>> coder) {
this.coder = coder;
- this.keyCoder = coder.getKeyCoder();
+ KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
+ this.keyCoder = kvCoder.getKeyCoder();
buffer1 = new InspectableByteArrayOutputStream();
buffer2 = new InspectableByteArrayOutputStream();
@@ -74,8 +76,8 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
}
@Override
- public int hash(KV<K, V> record) {
- K key = record.getKey();
+ public int hash(WindowedValue<KV<K, V>> record) {
+ K key = record.getValue().getKey();
if (key != null) {
return key.hashCode();
} else {
@@ -84,27 +86,27 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
}
@Override
- public void setReference(KV<K, V> toCompare) {
+ public void setReference(WindowedValue<KV<K, V>> toCompare) {
referenceBuffer.reset();
try {
- keyCoder.encode(toCompare.getKey(), referenceBuffer, Coder.Context.OUTER);
+ keyCoder.encode(toCompare.getValue().getKey(), referenceBuffer, Coder.Context.OUTER);
} catch (IOException e) {
throw new RuntimeException("Could not set reference " + toCompare + ": " + e);
}
}
@Override
- public boolean equalToReference(KV<K, V> candidate) {
+ public boolean equalToReference(WindowedValue<KV<K, V>> candidate) {
try {
buffer2.reset();
- keyCoder.encode(candidate.getKey(), buffer2, Coder.Context.OUTER);
+ keyCoder.encode(candidate.getValue().getKey(), buffer2, Coder.Context.OUTER);
byte[] arr = referenceBuffer.getBuffer();
byte[] arrOther = buffer2.getBuffer();
if (referenceBuffer.size() != buffer2.size()) {
return false;
}
int len = buffer2.size();
- for(int i = 0; i < len; i++ ) {
+ for (int i = 0; i < len; i++) {
if (arr[i] != arrOther[i]) {
return false;
}
@@ -116,8 +118,9 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
}
@Override
- public int compareToReference(TypeComparator<KV<K, V>> other) {
- InspectableByteArrayOutputStream otherReferenceBuffer = ((KvCoderComperator<K, V>) other).referenceBuffer;
+ public int compareToReference(TypeComparator<WindowedValue<KV<K, V>>> other) {
+ InspectableByteArrayOutputStream otherReferenceBuffer =
+ ((KvCoderComperator<K, V>) other).referenceBuffer;
byte[] arr = referenceBuffer.getBuffer();
byte[] arrOther = otherReferenceBuffer.getBuffer();
@@ -135,19 +138,19 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
@Override
- public int compare(KV<K, V> first, KV<K, V> second) {
+ public int compare(WindowedValue<KV<K, V>> first, WindowedValue<KV<K, V>> second) {
try {
buffer1.reset();
buffer2.reset();
- keyCoder.encode(first.getKey(), buffer1, Coder.Context.OUTER);
- keyCoder.encode(second.getKey(), buffer2, Coder.Context.OUTER);
+ keyCoder.encode(first.getValue().getKey(), buffer1, Coder.Context.OUTER);
+ keyCoder.encode(second.getValue().getKey(), buffer2, Coder.Context.OUTER);
byte[] arr = buffer1.getBuffer();
byte[] arrOther = buffer2.getBuffer();
if (buffer1.size() != buffer2.size()) {
return buffer1.size() - buffer2.size();
}
int len = buffer1.size();
- for(int i = 0; i < len; i++ ) {
+ for (int i = 0; i < len; i++) {
if (arr[i] != arrOther[i]) {
return arr[i] - arrOther[i];
}
@@ -159,38 +162,19 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
}
@Override
- public int compareSerialized(DataInputView firstSource, DataInputView secondSource) throws IOException {
-
+ public int compareSerialized(
+ DataInputView firstSource,
+ DataInputView secondSource) throws IOException {
inputWrapper.setInputView(firstSource);
- K firstKey = keyCoder.decode(inputWrapper, Coder.Context.NESTED);
+ WindowedValue<KV<K, V>> first = coder.decode(inputWrapper, Coder.Context.NESTED);
inputWrapper.setInputView(secondSource);
- K secondKey = keyCoder.decode(inputWrapper, Coder.Context.NESTED);
-
- try {
- buffer1.reset();
- buffer2.reset();
- keyCoder.encode(firstKey, buffer1, Coder.Context.OUTER);
- keyCoder.encode(secondKey, buffer2, Coder.Context.OUTER);
- byte[] arr = buffer1.getBuffer();
- byte[] arrOther = buffer2.getBuffer();
- if (buffer1.size() != buffer2.size()) {
- return buffer1.size() - buffer2.size();
- }
- int len = buffer1.size();
- for(int i = 0; i < len; i++ ) {
- if (arr[i] != arrOther[i]) {
- return arr[i] - arrOther[i];
- }
- }
- return 0;
- } catch (IOException e) {
- throw new RuntimeException("Could not compare reference.", e);
- }
+ WindowedValue<KV<K, V>> second = coder.decode(inputWrapper, Coder.Context.NESTED);
+ return compare(first, second);
}
@Override
public boolean supportsNormalizedKey() {
- return true;
+ return false;
}
@Override
@@ -209,12 +193,18 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
}
@Override
- public void putNormalizedKey(KV<K, V> record, MemorySegment target, int offset, int numBytes) {
+ public void putNormalizedKey(
+ WindowedValue<KV<K, V>> record,
+ MemorySegment target,
+ int offset,
+ int numBytes) {
+
buffer1.reset();
try {
- keyCoder.encode(record.getKey(), buffer1, Coder.Context.NESTED);
+ keyCoder.encode(record.getValue().getKey(), buffer1, Coder.Context.NESTED);
} catch (IOException e) {
- throw new RuntimeException("Could not serializer " + record + " using coder " + coder + ": " + e);
+ throw new RuntimeException(
+ "Could not serializer " + record + " using coder " + coder + ": " + e);
}
final byte[] data = buffer1.getBuffer();
final int limit = offset + numBytes;
@@ -231,12 +221,16 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
}
@Override
- public void writeWithKeyNormalization(KV<K, V> record, DataOutputView target) throws IOException {
+ public void writeWithKeyNormalization(
+ WindowedValue<KV<K, V>> record,
+ DataOutputView target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
- public KV<K, V> readWithKeyDenormalization(KV<K, V> reuse, DataInputView source) throws IOException {
+ public WindowedValue<KV<K, V>> readWithKeyDenormalization(
+ WindowedValue<KV<K, V>> reuse,
+ DataInputView source) throws IOException {
throw new UnsupportedOperationException();
}
@@ -246,14 +240,14 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
}
@Override
- public TypeComparator<KV<K, V>> duplicate() {
+ public TypeComparator<WindowedValue<KV<K, V>>> duplicate() {
return new KvCoderComperator<>(coder);
}
@Override
public int extractKeys(Object record, Object[] target, int index) {
- KV<K, V> kv = (KV<K, V>) record;
- K k = kv.getKey();
+ WindowedValue<KV<K, V>> kv = (WindowedValue<KV<K, V>>) record;
+ K k = kv.getValue().getKey();
target[index] = k;
return 1;
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java
index 74f3821..ba53f64 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java
@@ -18,6 +18,7 @@
package org.apache.beam.runners.flink.translation.types;
import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.values.KV;
import com.google.common.base.Preconditions;
@@ -31,27 +32,32 @@ import org.apache.flink.api.common.typeutils.TypeSerializer;
import java.util.List;
/**
- * Flink {@link org.apache.flink.api.common.typeinfo.TypeInformation} for
- * Dataflow {@link org.apache.beam.sdk.coders.KvCoder}.
+ * Flink {@link TypeInformation} for {@link KvCoder}. This creates special comparator
+ * for {@link KV} that always compares on the key only.
*/
-public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
+public class KvCoderTypeInformation<K, V> extends CompositeType<WindowedValue<KV<K, V>>> {
- private KvCoder<K, V> coder;
+ private final WindowedValue.WindowedValueCoder<KV<K, V>> coder;
+// private KvCoder<K, V> coder;
// We don't have the Class, so we have to pass null here. What a shame...
- private static Object DUMMY = new Object();
+ private static Object dummy = new Object();
@SuppressWarnings("unchecked")
- public KvCoderTypeInformation(KvCoder<K, V> coder) {
- super(((Class<KV<K,V>>) DUMMY.getClass()));
+ public KvCoderTypeInformation(WindowedValue.WindowedValueCoder<KV<K, V>> coder) {
+ super((Class) dummy.getClass());
this.coder = coder;
Preconditions.checkNotNull(coder);
}
@Override
@SuppressWarnings("unchecked")
- public TypeComparator<KV<K, V>> createComparator(int[] logicalKeyFields, boolean[] orders, int logicalFieldOffset, ExecutionConfig config) {
- return new KvCoderComperator((KvCoder) coder);
+ public TypeComparator<WindowedValue<KV<K, V>>> createComparator(
+ int[] logicalKeyFields,
+ boolean[] orders,
+ int logicalFieldOffset,
+ ExecutionConfig config) {
+ return new KvCoderComperator(coder);
}
@Override
@@ -71,7 +77,7 @@ public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
@Override
@SuppressWarnings("unchecked")
- public Class<KV<K, V>> getTypeClass() {
+ public Class<WindowedValue<KV<K, V>>> getTypeClass() {
return privateGetTypeClass();
}
@@ -87,7 +93,7 @@ public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
@Override
@SuppressWarnings("unchecked")
- public TypeSerializer<KV<K, V>> createSerializer(ExecutionConfig config) {
+ public TypeSerializer<WindowedValue<KV<K, V>>> createSerializer(ExecutionConfig config) {
return new CoderTypeSerializer<>(coder);
}
@@ -98,8 +104,12 @@ public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
KvCoderTypeInformation that = (KvCoderTypeInformation) o;
@@ -122,10 +132,11 @@ public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
@Override
@SuppressWarnings("unchecked")
public <X> TypeInformation<X> getTypeAt(int pos) {
+ KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
if (pos == 0) {
- return (TypeInformation<X>) new CoderTypeInformation<>(coder.getKeyCoder());
+ return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getKeyCoder());
} else if (pos == 1) {
- return (TypeInformation<X>) new CoderTypeInformation<>(coder.getValueCoder());
+ return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getValueCoder());
} else {
throw new RuntimeException("Invalid field position " + pos);
}
@@ -134,11 +145,12 @@ public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
@Override
@SuppressWarnings("unchecked")
public <X> TypeInformation<X> getTypeAt(String fieldExpression) {
+ KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
switch (fieldExpression) {
case "key":
- return (TypeInformation<X>) new CoderTypeInformation<>(coder.getKeyCoder());
+ return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getKeyCoder());
case "value":
- return (TypeInformation<X>) new CoderTypeInformation<>(coder.getValueCoder());
+ return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getValueCoder());
default:
throw new UnsupportedOperationException("Only KvCoder has fields.");
}
@@ -162,17 +174,24 @@ public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
}
@Override
- public void getFlatFields(String fieldExpression, int offset, List<FlatFieldDescriptor> result) {
- CoderTypeInformation keyTypeInfo = new CoderTypeInformation<>(coder.getKeyCoder());
+ public void getFlatFields(
+ String fieldExpression,
+ int offset,
+ List<FlatFieldDescriptor> result) {
+ KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
+
+ CoderTypeInformation keyTypeInfo =
+ new CoderTypeInformation<>(kvCoder.getKeyCoder());
result.add(new FlatFieldDescriptor(0, keyTypeInfo));
}
@Override
- protected TypeComparatorBuilder<KV<K, V>> createTypeComparatorBuilder() {
+ protected TypeComparatorBuilder<WindowedValue<KV<K, V>>> createTypeComparatorBuilder() {
return new KvCoderTypeComparatorBuilder();
}
- private class KvCoderTypeComparatorBuilder implements TypeComparatorBuilder<KV<K, V>> {
+ private class KvCoderTypeComparatorBuilder
+ implements TypeComparatorBuilder<WindowedValue<KV<K, V>>> {
@Override
public void initializeTypeComparatorBuilder(int size) {}
@@ -181,7 +200,7 @@ public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
public void addComparatorField(int fieldId, TypeComparator<?> comparator) {}
@Override
- public TypeComparator<KV<K, V>> createTypeComparator(ExecutionConfig config) {
+ public TypeComparator<WindowedValue<KV<K, V>>> createTypeComparator(ExecutionConfig config) {
return new KvCoderComperator<>(coder);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/VoidCoderTypeSerializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/VoidCoderTypeSerializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/VoidCoderTypeSerializer.java
deleted file mode 100644
index 7b48208..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/VoidCoderTypeSerializer.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import org.apache.flink.api.common.typeutils.TypeSerializer;
-import org.apache.flink.core.memory.DataInputView;
-import org.apache.flink.core.memory.DataOutputView;
-
-import java.io.IOException;
-
-/**
- * Special Flink {@link org.apache.flink.api.common.typeutils.TypeSerializer} for
- * {@link org.apache.beam.sdk.coders.VoidCoder}. We need this because Flink does not
- * allow returning {@code null} from an input reader. We return a {@link VoidValue} instead
- * that behaves like a {@code null}, hopefully.
- */
-public class VoidCoderTypeSerializer extends TypeSerializer<VoidCoderTypeSerializer.VoidValue> {
-
- @Override
- public boolean isImmutableType() {
- return false;
- }
-
- @Override
- public VoidCoderTypeSerializer duplicate() {
- return this;
- }
-
- @Override
- public VoidValue createInstance() {
- return VoidValue.INSTANCE;
- }
-
- @Override
- public VoidValue copy(VoidValue from) {
- return from;
- }
-
- @Override
- public VoidValue copy(VoidValue from, VoidValue reuse) {
- return from;
- }
-
- @Override
- public int getLength() {
- return 0;
- }
-
- @Override
- public void serialize(VoidValue record, DataOutputView target) throws IOException {
- target.writeByte(1);
- }
-
- @Override
- public VoidValue deserialize(DataInputView source) throws IOException {
- source.readByte();
- return VoidValue.INSTANCE;
- }
-
- @Override
- public VoidValue deserialize(VoidValue reuse, DataInputView source) throws IOException {
- return deserialize(source);
- }
-
- @Override
- public void copy(DataInputView source, DataOutputView target) throws IOException {
- source.readByte();
- target.writeByte(1);
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj instanceof VoidCoderTypeSerializer) {
- VoidCoderTypeSerializer other = (VoidCoderTypeSerializer) obj;
- return other.canEqual(this);
- } else {
- return false;
- }
- }
-
- @Override
- public boolean canEqual(Object obj) {
- return obj instanceof VoidCoderTypeSerializer;
- }
-
- @Override
- public int hashCode() {
- return 0;
- }
-
- public static class VoidValue {
- private VoidValue() {}
-
- public static VoidValue INSTANCE = new VoidValue();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/CombineFnAggregatorWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/CombineFnAggregatorWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/CombineFnAggregatorWrapper.java
deleted file mode 100644
index e5567d3..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/CombineFnAggregatorWrapper.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers;
-
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
-
-import com.google.common.collect.Lists;
-
-import org.apache.flink.api.common.accumulators.Accumulator;
-
-import java.io.Serializable;
-
-/**
- * Wrapper that wraps a {@link org.apache.beam.sdk.transforms.Combine.CombineFn}
- * in a Flink {@link org.apache.flink.api.common.accumulators.Accumulator} for using
- * the combine function as an aggregator in a {@link org.apache.beam.sdk.transforms.ParDo}
- * operation.
- */
-public class CombineFnAggregatorWrapper<AI, AA, AR> implements Aggregator<AI, AR>, Accumulator<AI, Serializable> {
-
- private AA aa;
- private Combine.CombineFn<? super AI, AA, AR> combiner;
-
- public CombineFnAggregatorWrapper() {
- }
-
- public CombineFnAggregatorWrapper(Combine.CombineFn<? super AI, AA, AR> combiner) {
- this.combiner = combiner;
- this.aa = combiner.createAccumulator();
- }
-
- @Override
- public void add(AI value) {
- combiner.addInput(aa, value);
- }
-
- @Override
- public Serializable getLocalValue() {
- return (Serializable) combiner.extractOutput(aa);
- }
-
- @Override
- public void resetLocal() {
- aa = combiner.createAccumulator();
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public void merge(Accumulator<AI, Serializable> other) {
- aa = combiner.mergeAccumulators(Lists.newArrayList(aa, ((CombineFnAggregatorWrapper<AI, AA, AR>)other).aa));
- }
-
- @Override
- public Accumulator<AI, Serializable> clone() {
- // copy it by merging
- AA aaCopy = combiner.mergeAccumulators(Lists.newArrayList(aa));
- CombineFnAggregatorWrapper<AI, AA, AR> result = new
- CombineFnAggregatorWrapper<>(combiner);
- result.aa = aaCopy;
- return result;
- }
-
- @Override
- public void addValue(AI value) {
- add(value);
- }
-
- @Override
- public String getName() {
- return "CombineFn: " + combiner.toString();
- }
-
- @Override
- public Combine.CombineFn getCombineFn() {
- return combiner;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
index eb32fa2..82d3fb8 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
@@ -33,20 +33,21 @@ import java.io.Serializable;
* the function as an aggregator in a {@link org.apache.beam.sdk.transforms.ParDo}
* operation.
*/
-public class SerializableFnAggregatorWrapper<AI, AO> implements Aggregator<AI, AO>, Accumulator<AI, Serializable> {
+public class SerializableFnAggregatorWrapper<InputT, OutputT>
+ implements Aggregator<InputT, OutputT>, Accumulator<InputT, Serializable> {
- private AO aa;
- private Combine.CombineFn<AI, ?, AO> combiner;
+ private OutputT aa;
+ private Combine.CombineFn<InputT, ?, OutputT> combiner;
- public SerializableFnAggregatorWrapper(Combine.CombineFn<AI, ?, AO> combiner) {
+ public SerializableFnAggregatorWrapper(Combine.CombineFn<InputT, ?, OutputT> combiner) {
this.combiner = combiner;
resetLocal();
}
-
+
@Override
@SuppressWarnings("unchecked")
- public void add(AI value) {
- this.aa = combiner.apply(ImmutableList.of((AI) aa, value));
+ public void add(InputT value) {
+ this.aa = combiner.apply(ImmutableList.of((InputT) aa, value));
}
@Override
@@ -56,17 +57,17 @@ public class SerializableFnAggregatorWrapper<AI, AO> implements Aggregator<AI, A
@Override
public void resetLocal() {
- this.aa = combiner.apply(ImmutableList.<AI>of());
+ this.aa = combiner.apply(ImmutableList.<InputT>of());
}
@Override
@SuppressWarnings("unchecked")
- public void merge(Accumulator<AI, Serializable> other) {
- this.aa = combiner.apply(ImmutableList.of((AI) aa, (AI) other.getLocalValue()));
+ public void merge(Accumulator<InputT, Serializable> other) {
+ this.aa = combiner.apply(ImmutableList.of((InputT) aa, (InputT) other.getLocalValue()));
}
@Override
- public void addValue(AI value) {
+ public void addValue(InputT value) {
add(value);
}
@@ -76,15 +77,15 @@ public class SerializableFnAggregatorWrapper<AI, AO> implements Aggregator<AI, A
}
@Override
- public Combine.CombineFn<AI, ?, AO> getCombineFn() {
+ public Combine.CombineFn<InputT, ?, OutputT> getCombineFn() {
return combiner;
}
@Override
- public Accumulator<AI, Serializable> clone() {
+ public Accumulator<InputT, Serializable> clone() {
// copy it by merging
- AO resultCopy = combiner.apply(Lists.newArrayList((AI) aa));
- SerializableFnAggregatorWrapper<AI, AO> result = new
+ OutputT resultCopy = combiner.apply(Lists.newArrayList((InputT) aa));
+ SerializableFnAggregatorWrapper<InputT, OutputT> result = new
SerializableFnAggregatorWrapper<>(combiner);
result.aa = resultCopy;
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java
index 53e544d..c0a7132 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java
@@ -22,6 +22,7 @@ import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions
import org.apache.beam.sdk.io.Sink;
import org.apache.beam.sdk.io.Write;
import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.WindowedValue;
import org.apache.flink.api.common.io.OutputFormat;
import org.apache.flink.configuration.Configuration;
@@ -31,10 +32,11 @@ import java.io.IOException;
import java.lang.reflect.Field;
/**
- * Wrapper class to use generic Write.Bound transforms as sinks.
+ * Wrapper for executing a {@link Sink} on Flink as an {@link OutputFormat}.
+ *
* @param <T> The type of the incoming records.
*/
-public class SinkOutputFormat<T> implements OutputFormat<T> {
+public class SinkOutputFormat<T> implements OutputFormat<WindowedValue<T>> {
private final Sink<T> sink;
@@ -75,9 +77,9 @@ public class SinkOutputFormat<T> implements OutputFormat<T> {
}
@Override
- public void writeRecord(T record) throws IOException {
+ public void writeRecord(WindowedValue<T> record) throws IOException {
try {
- writer.write(record);
+ writer.write(record.getValue());
} catch (Exception e) {
throw new IOException("Couldn't write record.", e);
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
index debd1a1..1d06b1a 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
@@ -21,12 +21,16 @@ import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.Source;
import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.io.statistics.BaseStatistics;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.InputSplitAssigner;
+import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -35,10 +39,10 @@ import java.util.List;
/**
- * A Flink {@link org.apache.flink.api.common.io.InputFormat} that wraps a
- * Dataflow {@link org.apache.beam.sdk.io.Source}.
+ * Wrapper for executing a {@link Source} as a Flink {@link InputFormat}.
*/
-public class SourceInputFormat<T> implements InputFormat<T, SourceInputSplit<T>> {
+public class SourceInputFormat<T>
+ implements InputFormat<WindowedValue<T>, SourceInputSplit<T>> {
private static final Logger LOG = LoggerFactory.getLogger(SourceInputFormat.class);
private final BoundedSource<T> initialSource;
@@ -122,12 +126,16 @@ public class SourceInputFormat<T> implements InputFormat<T, SourceInputSplit<T>>
}
@Override
- public T nextRecord(T t) throws IOException {
+ public WindowedValue<T> nextRecord(WindowedValue<T> t) throws IOException {
if (inputAvailable) {
final T current = reader.getCurrent();
+ final Instant timestamp = reader.getCurrentTimestamp();
// advance reader to have a record ready next time
inputAvailable = reader.advance();
- return current;
+ return WindowedValue.of(
+ current,
+ timestamp,
+ GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
}
return null;
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java
index 3bf566b..6b69d54 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java
@@ -18,7 +18,6 @@
package org.apache.beam.runners.flink.translation.wrappers.streaming;
import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.types.VoidCoderTypeSerializer;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.coders.VoidCoder;
@@ -54,7 +53,7 @@ public class FlinkGroupByKeyWrapper {
@Override
public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
- return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
+ return isKeyVoid ? (K) VoidValue.INSTANCE :
value.getValue().getKey();
}
@@ -64,4 +63,11 @@ public class FlinkGroupByKeyWrapper {
}
});
}
+
+ // special type to return as key for null key
+ public static class VoidValue {
+ private VoidValue() {}
+
+ public static VoidValue INSTANCE = new VoidValue();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java
index d6aff7d..8cd8351 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java
@@ -17,7 +17,6 @@
*/
package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
-import org.apache.beam.runners.flink.translation.types.VoidCoderTypeSerializer;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
@@ -47,17 +46,11 @@ public class FlinkStreamingCreateFunction<IN, OUT> implements FlatMapFunction<IN
@Override
public void flatMap(IN value, Collector<WindowedValue<OUT>> out) throws Exception {
- @SuppressWarnings("unchecked")
- OUT voidValue = (OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE;
for (byte[] element : elements) {
ByteArrayInputStream bai = new ByteArrayInputStream(element);
OUT outValue = coder.decode(bai, Coder.Context.OUTER);
- if (outValue == null) {
- out.collect(WindowedValue.of(voidValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
- } else {
- out.collect(WindowedValue.of(outValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
- }
+ out.collect(WindowedValue.of(outValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
}
out.close();
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/AvroITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/AvroITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/AvroITCase.java
deleted file mode 100644
index 113fee0..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/AvroITCase.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.AvroCoder;
-import org.apache.beam.sdk.io.AvroIO;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-
-public class AvroITCase extends JavaProgramTestBase {
-
- protected String resultPath;
- protected String tmpPath;
-
- public AvroITCase(){
- }
-
- static final String[] EXPECTED_RESULT = new String[] {
- "Joe red 3",
- "Mary blue 4",
- "Mark green 1",
- "Julia purple 5"
- };
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- tmpPath = getTempDirPath("tmp");
-
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
- runProgram(tmpPath, resultPath);
- }
-
- private static void runProgram(String tmpPath, String resultPath) {
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- p
- .apply(Create.of(
- new User("Joe", 3, "red"),
- new User("Mary", 4, "blue"),
- new User("Mark", 1, "green"),
- new User("Julia", 5, "purple"))
- .withCoder(AvroCoder.of(User.class)))
-
- .apply(AvroIO.Write.to(tmpPath)
- .withSchema(User.class));
-
- p.run();
-
- p = FlinkTestPipeline.createForBatch();
-
- p
- .apply(AvroIO.Read.from(tmpPath).withSchema(User.class).withoutValidation())
-
- .apply(ParDo.of(new DoFn<User, String>() {
- @Override
- public void processElement(ProcessContext c) throws Exception {
- User u = c.element();
- String result = u.getName() + " " + u.getFavoriteColor() + " " + u.getFavoriteNumber();
- c.output(result);
- }
- }))
-
- .apply(TextIO.Write.to(resultPath));
-
- p.run();
- }
-
- private static class User {
-
- private String name;
- private int favoriteNumber;
- private String favoriteColor;
-
- public User() {}
-
- public User(String name, int favoriteNumber, String favoriteColor) {
- this.name = name;
- this.favoriteNumber = favoriteNumber;
- this.favoriteColor = favoriteColor;
- }
-
- public String getName() {
- return name;
- }
-
- public String getFavoriteColor() {
- return favoriteColor;
- }
-
- public int getFavoriteNumber() {
- return favoriteNumber;
- }
- }
-
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlattenizeITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlattenizeITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlattenizeITCase.java
deleted file mode 100644
index ac0a3d7..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlattenizeITCase.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionList;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-public class FlattenizeITCase extends JavaProgramTestBase {
-
- private String resultPath;
- private String resultPath2;
-
- private static final String[] words = {"hello", "this", "is", "a", "DataSet!"};
- private static final String[] words2 = {"hello", "this", "is", "another", "DataSet!"};
- private static final String[] words3 = {"hello", "this", "is", "yet", "another", "DataSet!"};
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- resultPath2 = getTempDirPath("result2");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- String join = Joiner.on('\n').join(words);
- String join2 = Joiner.on('\n').join(words2);
- String join3 = Joiner.on('\n').join(words3);
- compareResultsByLinesInMemory(join + "\n" + join2, resultPath);
- compareResultsByLinesInMemory(join + "\n" + join2 + "\n" + join3, resultPath2);
- }
-
-
- @Override
- protected void testProgram() throws Exception {
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- PCollection<String> p1 = p.apply(Create.of(words));
- PCollection<String> p2 = p.apply(Create.of(words2));
-
- PCollectionList<String> list = PCollectionList.of(p1).and(p2);
-
- list.apply(Flatten.<String>pCollections()).apply(TextIO.Write.to(resultPath));
-
- PCollection<String> p3 = p.apply(Create.of(words3));
-
- PCollectionList<String> list2 = list.and(p3);
-
- list2.apply(Flatten.<String>pCollections()).apply(TextIO.Write.to(resultPath2));
-
- p.run();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/JoinExamplesITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/JoinExamplesITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/JoinExamplesITCase.java
deleted file mode 100644
index 47685b6..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/JoinExamplesITCase.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.runners.flink.util.JoinExamples;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Arrays;
-import java.util.List;
-
-
-/**
- * Unfortunately we need to copy the code from the Dataflow SDK because it is not public there.
- */
-public class JoinExamplesITCase extends JavaProgramTestBase {
-
- protected String resultPath;
-
- public JoinExamplesITCase(){
- }
-
- private static final TableRow row1 = new TableRow()
- .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
- .set("Actor1Name", "BANGKOK").set("SOURCEURL", "http://cnn.com");
- private static final TableRow row2 = new TableRow()
- .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
- .set("Actor1Name", "LAOS").set("SOURCEURL", "http://www.chicagotribune.com");
- private static final TableRow row3 = new TableRow()
- .set("ActionGeo_CountryCode", "BE").set("SQLDATE", "20141213")
- .set("Actor1Name", "AFGHANISTAN").set("SOURCEURL", "http://cnn.com");
- static final TableRow[] EVENTS = new TableRow[] {
- row1, row2, row3
- };
- static final List<TableRow> EVENT_ARRAY = Arrays.asList(EVENTS);
-
- private static final TableRow cc1 = new TableRow()
- .set("FIPSCC", "VM").set("HumanName", "Vietnam");
- private static final TableRow cc2 = new TableRow()
- .set("FIPSCC", "BE").set("HumanName", "Belgium");
- static final TableRow[] CCS = new TableRow[] {
- cc1, cc2
- };
- static final List<TableRow> CC_ARRAY = Arrays.asList(CCS);
-
- static final String[] JOINED_EVENTS = new String[] {
- "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: LAOS, "
- + "url: http://www.chicagotribune.com",
- "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: BANGKOK, "
- + "url: http://cnn.com",
- "Country code: BE, Country name: Belgium, Event info: Date: 20141213, Actor1: AFGHANISTAN, "
- + "url: http://cnn.com"
- };
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(JOINED_EVENTS), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
-
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY));
- PCollection<TableRow> input2 = p.apply(Create.of(CC_ARRAY));
-
- PCollection<String> output = JoinExamples.joinEvents(input1, input2);
-
- output.apply(TextIO.Write.to(resultPath));
-
- p.run();
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/MaybeEmptyTestITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/MaybeEmptyTestITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/MaybeEmptyTestITCase.java
deleted file mode 100644
index 4d66fa4..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/MaybeEmptyTestITCase.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.VoidCoder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.io.Serializable;
-
-public class MaybeEmptyTestITCase extends JavaProgramTestBase implements Serializable {
-
- protected String resultPath;
-
- protected final String expected = "test";
-
- public MaybeEmptyTestITCase() {
- }
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(expected, resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
-
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- p.apply(Create.of((Void) null)).setCoder(VoidCoder.of())
- .apply(ParDo.of(
- new DoFn<Void, String>() {
- @Override
- public void processElement(DoFn<Void, String>.ProcessContext c) {
- c.output(expected);
- }
- })).apply(TextIO.Write.to(resultPath));
- p.run();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ParDoMultiOutputITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ParDoMultiOutputITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ParDoMultiOutputITCase.java
deleted file mode 100644
index a2ef4e2..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ParDoMultiOutputITCase.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionTuple;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.TupleTagList;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.io.Serializable;
-
-public class ParDoMultiOutputITCase extends JavaProgramTestBase implements Serializable {
-
- private String resultPath;
-
- private static String[] expectedWords = {"MAAA", "MAAFOOO"};
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on("\n").join(expectedWords), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- PCollection<String> words = p.apply(Create.of("Hello", "Whatupmyman", "hey", "SPECIALthere", "MAAA", "MAAFOOO"));
-
- // Select words whose length is below a cut off,
- // plus the lengths of words that are above the cut off.
- // Also select words starting with "MARKER".
- final int wordLengthCutOff = 3;
- // Create tags to use for the main and side outputs.
- final TupleTag<String> wordsBelowCutOffTag = new TupleTag<String>(){};
- final TupleTag<Integer> wordLengthsAboveCutOffTag = new TupleTag<Integer>(){};
- final TupleTag<String> markedWordsTag = new TupleTag<String>(){};
-
- PCollectionTuple results =
- words.apply(ParDo
- .withOutputTags(wordsBelowCutOffTag, TupleTagList.of(wordLengthsAboveCutOffTag)
- .and(markedWordsTag))
- .of(new DoFn<String, String>() {
- final TupleTag<String> specialWordsTag = new TupleTag<String>() {
- };
-
- public void processElement(ProcessContext c) {
- String word = c.element();
- if (word.length() <= wordLengthCutOff) {
- c.output(word);
- } else {
- c.sideOutput(wordLengthsAboveCutOffTag, word.length());
- }
- if (word.startsWith("MAA")) {
- c.sideOutput(markedWordsTag, word);
- }
-
- if (word.startsWith("SPECIAL")) {
- c.sideOutput(specialWordsTag, word);
- }
- }
- }));
-
- // Extract the PCollection results, by tag.
- PCollection<String> wordsBelowCutOff = results.get(wordsBelowCutOffTag);
- PCollection<Integer> wordLengthsAboveCutOff = results.get
- (wordLengthsAboveCutOffTag);
- PCollection<String> markedWords = results.get(markedWordsTag);
-
- markedWords.apply(TextIO.Write.to(resultPath));
-
- p.run();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
index 66c959e..bb79b27 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
@@ -28,6 +28,9 @@ import com.google.common.base.Joiner;
import org.apache.flink.test.util.JavaProgramTestBase;
+import java.io.File;
+import java.net.URI;
+
/**
* Reads from a bounded source in batch execution.
*/
@@ -44,6 +47,13 @@ public class ReadSourceITCase extends JavaProgramTestBase {
@Override
protected void preSubmit() throws Exception {
resultPath = getTempDirPath("result");
+
+ // need to create the dir, otherwise Beam sinks don't
+ // work for these tests
+
+ if (!new File(new URI(resultPath)).mkdirs()) {
+ throw new RuntimeException("Could not create output dir.");
+ }
}
@Override
@@ -56,7 +66,7 @@ public class ReadSourceITCase extends JavaProgramTestBase {
runProgram(resultPath);
}
- private static void runProgram(String resultPath) {
+ private static void runProgram(String resultPath) throws Exception {
Pipeline p = FlinkTestPipeline.createForBatch();
@@ -69,7 +79,7 @@ public class ReadSourceITCase extends JavaProgramTestBase {
}
}));
- result.apply(TextIO.Write.to(resultPath));
+ result.apply(TextIO.Write.to(new URI(resultPath).getPath() + "/part"));
p.run();
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesEmptyITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesEmptyITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesEmptyITCase.java
deleted file mode 100644
index 471d326..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesEmptyITCase.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.RemoveDuplicates;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Collections;
-import java.util.List;
-
-
-public class RemoveDuplicatesEmptyITCase extends JavaProgramTestBase {
-
- protected String resultPath;
-
- public RemoveDuplicatesEmptyITCase(){
- }
-
- static final String[] EXPECTED_RESULT = new String[] {};
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
-
- List<String> strings = Collections.emptyList();
-
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- PCollection<String> input =
- p.apply(Create.of(strings))
- .setCoder(StringUtf8Coder.of());
-
- PCollection<String> output =
- input.apply(RemoveDuplicates.<String>create());
-
- output.apply(TextIO.Write.to(resultPath));
- p.run();
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesITCase.java
deleted file mode 100644
index 0544f20..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesITCase.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.RemoveDuplicates;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Arrays;
-import java.util.List;
-
-
-public class RemoveDuplicatesITCase extends JavaProgramTestBase {
-
- protected String resultPath;
-
- public RemoveDuplicatesITCase(){
- }
-
- static final String[] EXPECTED_RESULT = new String[] {
- "k1", "k5", "k2", "k3"};
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
- }
-
- @Override
- protected void testProgram() throws Exception {
-
- List<String> strings = Arrays.asList("k1", "k5", "k5", "k2", "k1", "k2", "k3");
-
- Pipeline p = FlinkTestPipeline.createForBatch();
-
- PCollection<String> input =
- p.apply(Create.of(strings))
- .setCoder(StringUtf8Coder.of());
-
- PCollection<String> output =
- input.apply(RemoveDuplicates.<String>create());
-
- output.apply(TextIO.Write.to(resultPath));
- p.run();
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/SideInputITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/SideInputITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/SideInputITCase.java
deleted file mode 100644
index 2c7c65e..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/SideInputITCase.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.values.PCollectionView;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.io.Serializable;
-
-public class SideInputITCase extends JavaProgramTestBase implements Serializable {
-
- private static final String expected = "Hello!";
-
- protected String resultPath;
-
- @Override
- protected void testProgram() throws Exception {
-
-
- Pipeline p = FlinkTestPipeline.createForBatch();
-
-
- final PCollectionView<String> sidesInput = p
- .apply(Create.of(expected))
- .apply(View.<String>asSingleton());
-
- p.apply(Create.of("bli"))
- .apply(ParDo.of(new DoFn<String, String>() {
- @Override
- public void processElement(ProcessContext c) throws Exception {
- String s = c.sideInput(sidesInput);
- c.output(s);
- }
- }).withSideInputs(sidesInput)).apply(TextIO.Write.to(resultPath));
-
- p.run();
- }
-
- @Override
- protected void preSubmit() throws Exception {
- resultPath = getTempDirPath("result");
- }
-
- @Override
- protected void postSubmit() throws Exception {
- compareResultsByLinesInMemory(expected, resultPath);
- }
-}
[09/14] incubator-beam git commit: Remove unused threadCount from
integration tests
Posted by al...@apache.org.
Remove unused threadCount from integration tests
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/55f39bf7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/55f39bf7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/55f39bf7
Branch: refs/heads/master
Commit: 55f39bf7cbe65980ed4233146517d608977ddaf6
Parents: bfc1a2b
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri May 6 10:54:41 2016 -0700
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
runners/flink/runner/pom.xml | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/55f39bf7/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index cde9108..f94ce68 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -168,8 +168,7 @@
</goals>
<configuration>
<groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
- <parallel>all</parallel>
- <threadCount>4</threadCount>
+ <parallel>none</parallel>
<failIfNoTests>true</failIfNoTests>
<dependenciesToScan>
<dependency>org.apache.beam:java-sdk-all</dependency>
@@ -202,8 +201,7 @@
</goals>
<configuration>
<groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
- <parallel>all</parallel>
- <threadCount>4</threadCount>
+ <parallel>none</parallel>
<failIfNoTests>true</failIfNoTests>
<dependenciesToScan>
<dependency>org.apache.beam:java-sdk-all</dependency>
[13/14] incubator-beam git commit: Fix Dangling Flink DataSets
Posted by al...@apache.org.
Fix Dangling Flink DataSets
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/26fa0b21
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/26fa0b21
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/26fa0b21
Branch: refs/heads/master
Commit: 26fa0b21cfda3049e26d47ce174a9b29fe3ec29c
Parents: 1664c96
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Fri May 6 08:26:50 2016 +0200
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
.../translation/FlinkBatchPipelineTranslator.java | 14 ++++++++++++++
.../translation/FlinkBatchTranslationContext.java | 18 +++++++++++++++++-
2 files changed, 31 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/26fa0b21/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
index 3d39e81..512b822 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
@@ -17,6 +17,7 @@
*/
package org.apache.beam.runners.flink.translation;
+import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.runners.TransformTreeNode;
import org.apache.beam.sdk.transforms.AppliedPTransform;
@@ -24,7 +25,9 @@ import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.join.CoGroupByKey;
import org.apache.beam.sdk.values.PValue;
+import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.io.DiscardingOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,6 +50,17 @@ public class FlinkBatchPipelineTranslator extends FlinkPipelineTranslator {
this.batchContext = new FlinkBatchTranslationContext(env, options);
}
+ @Override
+ @SuppressWarnings("rawtypes, unchecked")
+ public void translate(Pipeline pipeline) {
+ super.translate(pipeline);
+
+ // terminate dangling DataSets
+ for (DataSet<?> dataSet: batchContext.getDanglingDataSets().values()) {
+ dataSet.output(new DiscardingOutputFormat());
+ }
+ }
+
// --------------------------------------------------------------------------------------------
// Pipeline Visitor Methods
// --------------------------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/26fa0b21/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
index 71950cf..501b1ea 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
@@ -43,6 +43,13 @@ public class FlinkBatchTranslationContext {
private final Map<PValue, DataSet<?>> dataSets;
private final Map<PCollectionView<?>, DataSet<?>> broadcastDataSets;
+ /**
+ * For keeping track about which DataSets don't have a successor. We
+ * need to terminate these with a discarding sink because the Beam
+ * model allows dangling operations.
+ */
+ private final Map<PValue, DataSet<?>> danglingDataSets;
+
private final ExecutionEnvironment env;
private final PipelineOptions options;
@@ -55,10 +62,16 @@ public class FlinkBatchTranslationContext {
this.options = options;
this.dataSets = new HashMap<>();
this.broadcastDataSets = new HashMap<>();
+
+ this.danglingDataSets = new HashMap<>();
}
// ------------------------------------------------------------------------
-
+
+ public Map<PValue, DataSet<?>> getDanglingDataSets() {
+ return danglingDataSets;
+ }
+
public ExecutionEnvironment getExecutionEnvironment() {
return env;
}
@@ -69,12 +82,15 @@ public class FlinkBatchTranslationContext {
@SuppressWarnings("unchecked")
public <T> DataSet<T> getInputDataSet(PValue value) {
+ // assume that the DataSet is used as an input if retrieved here
+ danglingDataSets.remove(value);
return (DataSet<T>) dataSets.get(value);
}
public void setOutputDataSet(PValue value, DataSet<?> set) {
if (!dataSets.containsKey(value)) {
dataSets.put(value, set);
+ danglingDataSets.put(value, set);
}
}
[04/14] incubator-beam git commit: [BEAM-270] Support
Timestamps/Windows in Flink Batch
Posted by al...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
index 3566f7e..89243a3 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
@@ -18,173 +18,85 @@
package org.apache.beam.runners.flink.translation.functions;
import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
-import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.TimerInternals;
import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingInternals;
-import org.apache.beam.sdk.util.state.StateInternals;
+import org.apache.beam.sdk.util.WindowingStrategy;
import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.common.collect.ImmutableList;
import org.apache.flink.api.common.functions.RichMapPartitionFunction;
import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
+import java.util.Map;
/**
* Encapsulates a {@link org.apache.beam.sdk.transforms.DoFn}
* inside a Flink {@link org.apache.flink.api.common.functions.RichMapPartitionFunction}.
*/
-public class FlinkDoFnFunction<IN, OUT> extends RichMapPartitionFunction<IN, OUT> {
+public class FlinkDoFnFunction<InputT, OutputT>
+ extends RichMapPartitionFunction<WindowedValue<InputT>, WindowedValue<OutputT>> {
- private final DoFn<IN, OUT> doFn;
+ private final DoFn<InputT, OutputT> doFn;
private final SerializedPipelineOptions serializedOptions;
- public FlinkDoFnFunction(DoFn<IN, OUT> doFn, PipelineOptions options) {
- this.doFn = doFn;
- this.serializedOptions = new SerializedPipelineOptions(options);
- }
-
- @Override
- public void mapPartition(Iterable<IN> values, Collector<OUT> out) throws Exception {
- ProcessContext context = new ProcessContext(doFn, out);
- this.doFn.startBundle(context);
- for (IN value : values) {
- context.inValue = value;
- doFn.processElement(context);
- }
- this.doFn.finishBundle(context);
- }
-
- private class ProcessContext extends DoFn<IN, OUT>.ProcessContext {
-
- IN inValue;
- Collector<OUT> outCollector;
-
- public ProcessContext(DoFn<IN, OUT> fn, Collector<OUT> outCollector) {
- fn.super();
- super.setupDelegateAggregators();
- this.outCollector = outCollector;
- }
-
- @Override
- public IN element() {
- return this.inValue;
- }
-
+ private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
- @Override
- public Instant timestamp() {
- return Instant.now();
- }
+ private final boolean requiresWindowAccess;
+ private final boolean hasSideInputs;
- @Override
- public BoundedWindow window() {
- return GlobalWindow.INSTANCE;
- }
-
- @Override
- public PaneInfo pane() {
- return PaneInfo.NO_FIRING;
- }
+ private final WindowingStrategy<?, ?> windowingStrategy;
- @Override
- public WindowingInternals<IN, OUT> windowingInternals() {
- return new WindowingInternals<IN, OUT>() {
- @Override
- public StateInternals stateInternals() {
- return null;
- }
-
- @Override
- public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
-
- }
-
- @Override
- public TimerInternals timerInternals() {
- return null;
- }
+ public FlinkDoFnFunction(
+ DoFn<InputT, OutputT> doFn,
+ WindowingStrategy<?, ?> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+ PipelineOptions options) {
+ this.doFn = doFn;
+ this.sideInputs = sideInputs;
+ this.serializedOptions = new SerializedPipelineOptions(options);
+ this.windowingStrategy = windowingStrategy;
- @Override
- public Collection<? extends BoundedWindow> windows() {
- return ImmutableList.of(GlobalWindow.INSTANCE);
- }
+ this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+ this.hasSideInputs = !sideInputs.isEmpty();
+ }
- @Override
- public PaneInfo pane() {
- return PaneInfo.NO_FIRING;
- }
+ @Override
+ public void mapPartition(
+ Iterable<WindowedValue<InputT>> values,
+ Collector<WindowedValue<OutputT>> out) throws Exception {
+
+ FlinkProcessContext<InputT, OutputT> context = new FlinkProcessContext<>(
+ serializedOptions.getPipelineOptions(),
+ getRuntimeContext(),
+ doFn,
+ windowingStrategy,
+ out,
+ sideInputs);
- @Override
- public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
- }
+ this.doFn.startBundle(context);
- @Override
- public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
- throw new RuntimeException("sideInput() not implemented.");
+ if (!requiresWindowAccess || hasSideInputs) {
+ // we don't need to explode the windows
+ for (WindowedValue<InputT> value : values) {
+ context = context.forWindowedValue(value);
+ doFn.processElement(context);
+ }
+ } else {
+ // we need to explode the windows because we have per-window
+ // side inputs and window access also only works if an element
+ // is in only one window
+ for (WindowedValue<InputT> value : values) {
+ for (WindowedValue<InputT> explodedValue: value.explodeWindows()) {
+ context = context.forWindowedValue(value);
+ doFn.processElement(context);
}
- };
- }
-
- @Override
- public PipelineOptions getPipelineOptions() {
- return serializedOptions.getPipelineOptions();
- }
-
- @Override
- public <T> T sideInput(PCollectionView<T> view) {
- List<T> sideInput = getRuntimeContext().getBroadcastVariable(view.getTagInternal().getId());
- List<WindowedValue<?>> windowedValueList = new ArrayList<>(sideInput.size());
- for (T input : sideInput) {
- windowedValueList.add(WindowedValue.of(input, Instant.now(), ImmutableList.of(GlobalWindow.INSTANCE), pane()));
}
- return view.fromIterableInternal(windowedValueList);
}
- @Override
- public void output(OUT output) {
- outCollector.collect(output);
- }
-
- @Override
- public void outputWithTimestamp(OUT output, Instant timestamp) {
- // not FLink's way, just output normally
- output(output);
- }
-
- @Override
- public <T> void sideOutput(TupleTag<T> tag, T output) {
- // ignore the side output, this can happen when a user does not register
- // side outputs but then outputs using a freshly created TupleTag.
- }
-
- @Override
- public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
- sideOutput(tag, output);
- }
-
- @Override
- protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(String name, Combine.CombineFn<AggInputT, ?, AggOutputT> combiner) {
- SerializableFnAggregatorWrapper<AggInputT, AggOutputT> wrapper = new SerializableFnAggregatorWrapper<>(combiner);
- getRuntimeContext().addAccumulator(name, wrapper);
- return wrapper;
- }
-
-
+ // set the windowed value to null so that the logic
+ // or outputting in finishBundle kicks in
+ context = context.forWindowedValue(null);
+ this.doFn.finishBundle(context);
}
+
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkKeyedListAggregationFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkKeyedListAggregationFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkKeyedListAggregationFunction.java
deleted file mode 100644
index 7c7084d..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkKeyedListAggregationFunction.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.sdk.values.KV;
-
-import org.apache.flink.api.common.functions.GroupReduceFunction;
-import org.apache.flink.util.Collector;
-
-import java.util.Iterator;
-
-/**
- * Flink {@link org.apache.flink.api.common.functions.GroupReduceFunction} for executing a
- * {@link org.apache.beam.sdk.transforms.GroupByKey} operation. This reads the input
- * {@link org.apache.beam.sdk.values.KV} elements, extracts the key and collects
- * the values in a {@code List}.
- */
-public class FlinkKeyedListAggregationFunction<K,V> implements GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> {
-
- @Override
- public void reduce(Iterable<KV<K, V>> values, Collector<KV<K, Iterable<V>>> out) throws Exception {
- Iterator<KV<K, V>> it = values.iterator();
- KV<K, V> first = it.next();
- Iterable<V> passThrough = new PassThroughIterable<>(first, it);
- out.collect(KV.of(first.getKey(), passThrough));
- }
-
- private static class PassThroughIterable<K, V> implements Iterable<V>, Iterator<V> {
- private KV<K, V> first;
- private Iterator<KV<K, V>> iterator;
-
- public PassThroughIterable(KV<K, V> first, Iterator<KV<K, V>> iterator) {
- this.first = first;
- this.iterator = iterator;
- }
-
- @Override
- public Iterator<V> iterator() {
- return this;
- }
-
- @Override
- public boolean hasNext() {
- return first != null || iterator.hasNext();
- }
-
- @Override
- public V next() {
- if (first != null) {
- V result = first.getValue();
- first = null;
- return result;
- } else {
- return iterator.next().getValue();
- }
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException("Cannot remove elements from input.");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
new file mode 100644
index 0000000..9074d72
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.api.common.functions.RichGroupReduceFunction;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Special version of {@link FlinkReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ *
+ * <p>This is different from the pair of function for the non-merging windows case
+ * in that we cannot do combining before the shuffle because elements would not
+ * yet be in their correct windows for side-input access.
+ */
+public class FlinkMergingNonShuffleReduceFunction<
+ K, InputT, AccumT, OutputT, W extends IntervalWindow>
+ extends RichGroupReduceFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> {
+
+ private final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn;
+
+ private final DoFn<KV<K, InputT>, KV<K, OutputT>> doFn;
+
+ private final WindowingStrategy<?, W> windowingStrategy;
+
+ private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+ private final SerializedPipelineOptions serializedOptions;
+
+ public FlinkMergingNonShuffleReduceFunction(
+ CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn,
+ WindowingStrategy<?, W> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+ PipelineOptions pipelineOptions) {
+
+ this.combineFn = keyedCombineFn;
+
+ this.windowingStrategy = windowingStrategy;
+ this.sideInputs = sideInputs;
+
+ this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+ // dummy DoFn because we need one for ProcessContext
+ this.doFn = new DoFn<KV<K, InputT>, KV<K, OutputT>>() {
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+
+ }
+ };
+ }
+
+ @Override
+ public void reduce(
+ Iterable<WindowedValue<KV<K, InputT>>> elements,
+ Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+ FlinkProcessContext<KV<K, InputT>, KV<K, OutputT>> processContext =
+ new FlinkProcessContext<>(
+ serializedOptions.getPipelineOptions(),
+ getRuntimeContext(),
+ doFn,
+ windowingStrategy,
+ out,
+ sideInputs);
+
+ PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFnRunner =
+ PerKeyCombineFnRunners.create(combineFn);
+
+ @SuppressWarnings("unchecked")
+ OutputTimeFn<? super BoundedWindow> outputTimeFn =
+ (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+ // get all elements so that we can sort them, has to fit into
+ // memory
+ // this seems very unprudent, but correct, for now
+ List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+ for (WindowedValue<KV<K, InputT>> inputValue: elements) {
+ for (WindowedValue<KV<K, InputT>> exploded: inputValue.explodeWindows()) {
+ sortedInput.add(exploded);
+ }
+ }
+ Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+ @Override
+ public int compare(
+ WindowedValue<KV<K, InputT>> o1,
+ WindowedValue<KV<K, InputT>> o2) {
+ return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+ .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+ }
+ });
+
+ // merge windows, we have to do it in an extra pre-processing step and
+ // can't do it as we go since the window of early elements would not
+ // be correct when calling the CombineFn
+ mergeWindow(sortedInput);
+
+ // iterate over the elements that are sorted by window timestamp
+ final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
+
+ // create accumulator using the first elements key
+ WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+ K key = currentValue.getValue().getKey();
+ IntervalWindow currentWindow =
+ (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+ InputT firstValue = currentValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(currentValue);
+ AccumT accumulator = combineFnRunner.createAccumulator(key, processContext);
+ accumulator = combineFnRunner.addInput(key, accumulator, firstValue, processContext);
+
+ // we use this to keep track of the timestamps assigned by the OutputTimeFn
+ Instant windowTimestamp =
+ outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+ while (iterator.hasNext()) {
+ WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+ IntervalWindow nextWindow = (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+ if (currentWindow.equals(nextWindow)) {
+ // continue accumulating and merge windows
+
+ InputT value = nextValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(nextValue);
+ accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+
+ windowTimestamp = outputTimeFn.combine(
+ windowTimestamp,
+ outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+ } else {
+ // emit the value that we currently have
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+ windowTimestamp,
+ currentWindow,
+ PaneInfo.NO_FIRING));
+
+ currentWindow = nextWindow;
+ InputT value = nextValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(nextValue);
+ accumulator = combineFnRunner.createAccumulator(key, processContext);
+ accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+ windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+ }
+ }
+
+ // emit the final accumulator
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+ windowTimestamp,
+ currentWindow,
+ PaneInfo.NO_FIRING));
+ }
+
+ /**
+ * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+ * This replaces windows in the input list.
+ */
+ private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
+ int currentStart = 0;
+ IntervalWindow currentWindow =
+ (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+ for (int i = 1; i < elements.size(); i++) {
+ WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
+ IntervalWindow nextWindow =
+ (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+ if (currentWindow.intersects(nextWindow)) {
+ // we continue
+ currentWindow = currentWindow.span(nextWindow);
+ } else {
+ // retrofit the merged window to all windows up to "currentStart"
+ for (int j = i - 1; j >= currentStart; j--) {
+ WindowedValue<KV<K, InputT>> value = elements.get(j);
+ elements.set(
+ j,
+ WindowedValue.of(
+ value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+ }
+ currentStart = i;
+ currentWindow = nextWindow;
+ }
+ }
+ if (currentStart < elements.size() - 1) {
+ // we have to retrofit the last batch
+ for (int j = elements.size() - 1; j >= currentStart; j--) {
+ WindowedValue<KV<K, InputT>> value = elements.get(j);
+ elements.set(
+ j,
+ WindowedValue.of(
+ value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
new file mode 100644
index 0000000..c12e420
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Special version of {@link FlinkPartialReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ */
+public class FlinkMergingPartialReduceFunction<K, InputT, AccumT, W extends IntervalWindow>
+ extends FlinkPartialReduceFunction<K, InputT, AccumT, W> {
+
+ public FlinkMergingPartialReduceFunction(
+ CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
+ WindowingStrategy<?, W> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+ PipelineOptions pipelineOptions) {
+ super(combineFn, windowingStrategy, sideInputs, pipelineOptions);
+ }
+
+ @Override
+ public void combine(
+ Iterable<WindowedValue<KV<K, InputT>>> elements,
+ Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
+
+ FlinkProcessContext<KV<K, InputT>, KV<K, AccumT>> processContext =
+ new FlinkProcessContext<>(
+ serializedOptions.getPipelineOptions(),
+ getRuntimeContext(),
+ doFn,
+ windowingStrategy,
+ out,
+ sideInputs);
+
+ PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
+ PerKeyCombineFnRunners.create(combineFn);
+
+ @SuppressWarnings("unchecked")
+ OutputTimeFn<? super BoundedWindow> outputTimeFn =
+ (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+ // get all elements so that we can sort them, has to fit into
+ // memory
+ // this seems very unprudent, but correct, for now
+ List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+ for (WindowedValue<KV<K, InputT>> inputValue: elements) {
+ for (WindowedValue<KV<K, InputT>> exploded: inputValue.explodeWindows()) {
+ sortedInput.add(exploded);
+ }
+ }
+ Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+ @Override
+ public int compare(
+ WindowedValue<KV<K, InputT>> o1,
+ WindowedValue<KV<K, InputT>> o2) {
+ return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+ .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+ }
+ });
+
+ // merge windows, we have to do it in an extra pre-processing step and
+ // can't do it as we go since the window of early elements would not
+ // be correct when calling the CombineFn
+ mergeWindow(sortedInput);
+
+ // iterate over the elements that are sorted by window timestamp
+ final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
+
+ // create accumulator using the first elements key
+ WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+ K key = currentValue.getValue().getKey();
+ IntervalWindow currentWindow =
+ (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+ InputT firstValue = currentValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(currentValue);
+ AccumT accumulator = combineFnRunner.createAccumulator(key, processContext);
+ accumulator = combineFnRunner.addInput(key, accumulator, firstValue, processContext);
+
+ // we use this to keep track of the timestamps assigned by the OutputTimeFn
+ Instant windowTimestamp =
+ outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+ while (iterator.hasNext()) {
+ WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+ IntervalWindow nextWindow = (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+ if (currentWindow.equals(nextWindow)) {
+ // continue accumulating and merge windows
+
+ InputT value = nextValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(nextValue);
+ accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+
+ windowTimestamp = outputTimeFn.combine(
+ windowTimestamp,
+ outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+ } else {
+ // emit the value that we currently have
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, accumulator),
+ windowTimestamp,
+ currentWindow,
+ PaneInfo.NO_FIRING));
+
+ currentWindow = nextWindow;
+ InputT value = nextValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(nextValue);
+ accumulator = combineFnRunner.createAccumulator(key, processContext);
+ accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+ windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+ }
+ }
+
+ // emit the final accumulator
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, accumulator),
+ windowTimestamp,
+ currentWindow,
+ PaneInfo.NO_FIRING));
+ }
+
+ /**
+ * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+ * This replaces windows in the input list.
+ */
+ private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
+ int currentStart = 0;
+ IntervalWindow currentWindow =
+ (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+ for (int i = 1; i < elements.size(); i++) {
+ WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
+ IntervalWindow nextWindow =
+ (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+ if (currentWindow.intersects(nextWindow)) {
+ // we continue
+ currentWindow = currentWindow.span(nextWindow);
+ } else {
+ // retrofit the merged window to all windows up to "currentStart"
+ for (int j = i - 1; j >= currentStart; j--) {
+ WindowedValue<KV<K, InputT>> value = elements.get(j);
+ elements.set(
+ j,
+ WindowedValue.of(
+ value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+ }
+ currentStart = i;
+ currentWindow = nextWindow;
+ }
+ }
+ if (currentStart < elements.size() - 1) {
+ // we have to retrofit the last batch
+ for (int j = elements.size() - 1; j >= currentStart; j--) {
+ WindowedValue<KV<K, InputT>> value = elements.get(j);
+ elements.set(
+ j,
+ WindowedValue.of(
+ value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
new file mode 100644
index 0000000..07d1c97
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Special version of {@link FlinkReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ */
+public class FlinkMergingReduceFunction<K, AccumT, OutputT, W extends IntervalWindow>
+ extends FlinkReduceFunction<K, AccumT, OutputT, W> {
+
+ public FlinkMergingReduceFunction(
+ CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
+ WindowingStrategy<?, W> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+ PipelineOptions pipelineOptions) {
+ super(keyedCombineFn, windowingStrategy, sideInputs, pipelineOptions);
+ }
+
+ @Override
+ public void reduce(
+ Iterable<WindowedValue<KV<K, AccumT>>> elements,
+ Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+ FlinkProcessContext<KV<K, AccumT>, KV<K, OutputT>> processContext =
+ new FlinkProcessContext<>(
+ serializedOptions.getPipelineOptions(),
+ getRuntimeContext(),
+ doFn,
+ windowingStrategy,
+ out,
+ sideInputs);
+
+ PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
+ PerKeyCombineFnRunners.create(combineFn);
+
+ @SuppressWarnings("unchecked")
+ OutputTimeFn<? super BoundedWindow> outputTimeFn =
+ (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+
+ // get all elements so that we can sort them, has to fit into
+ // memory
+ // this seems very unprudent, but correct, for now
+ ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
+ for (WindowedValue<KV<K, AccumT>> inputValue: elements) {
+ for (WindowedValue<KV<K, AccumT>> exploded: inputValue.explodeWindows()) {
+ sortedInput.add(exploded);
+ }
+ }
+ Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
+ @Override
+ public int compare(
+ WindowedValue<KV<K, AccumT>> o1,
+ WindowedValue<KV<K, AccumT>> o2) {
+ return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+ .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+ }
+ });
+
+ // merge windows, we have to do it in an extra pre-processing step and
+ // can't do it as we go since the window of early elements would not
+ // be correct when calling the CombineFn
+ mergeWindow(sortedInput);
+
+ // iterate over the elements that are sorted by window timestamp
+ final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
+
+ // get the first accumulator
+ WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
+ K key = currentValue.getValue().getKey();
+ IntervalWindow currentWindow =
+ (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+ AccumT accumulator = currentValue.getValue().getValue();
+
+ // we use this to keep track of the timestamps assigned by the OutputTimeFn,
+ // in FlinkPartialReduceFunction we already merge the timestamps assigned
+ // to individual elements, here we just merge them
+ List<Instant> windowTimestamps = new ArrayList<>();
+ windowTimestamps.add(currentValue.getTimestamp());
+
+ while (iterator.hasNext()) {
+ WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
+ IntervalWindow nextWindow =
+ (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+ if (nextWindow.equals(currentWindow)) {
+ // continue accumulating and merge windows
+
+ processContext = processContext.forWindowedValue(nextValue);
+
+ accumulator = combineFnRunner.mergeAccumulators(
+ key, ImmutableList.of(accumulator, nextValue.getValue().getValue()), processContext);
+
+ windowTimestamps.add(nextValue.getTimestamp());
+ } else {
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+ outputTimeFn.merge(currentWindow, windowTimestamps),
+ currentWindow,
+ PaneInfo.NO_FIRING));
+
+ windowTimestamps.clear();
+
+ processContext = processContext.forWindowedValue(nextValue);
+
+ currentWindow = nextWindow;
+ accumulator = nextValue.getValue().getValue();
+ windowTimestamps.add(nextValue.getTimestamp());
+ }
+ }
+
+ // emit the final accumulator
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+ outputTimeFn.merge(currentWindow, windowTimestamps),
+ currentWindow,
+ PaneInfo.NO_FIRING));
+ }
+
+ /**
+ * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+ * This replaces windows in the input list.
+ */
+ private void mergeWindow(List<WindowedValue<KV<K, AccumT>>> elements) {
+ int currentStart = 0;
+ IntervalWindow currentWindow =
+ (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+ for (int i = 1; i < elements.size(); i++) {
+ WindowedValue<KV<K, AccumT>> nextValue = elements.get(i);
+ IntervalWindow nextWindow =
+ (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+ if (currentWindow.intersects(nextWindow)) {
+ // we continue
+ currentWindow = currentWindow.span(nextWindow);
+ } else {
+ // retrofit the merged window to all windows up to "currentStart"
+ for (int j = i - 1; j >= currentStart; j--) {
+ WindowedValue<KV<K, AccumT>> value = elements.get(j);
+ elements.set(
+ j,
+ WindowedValue.of(
+ value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+ }
+ currentStart = i;
+ currentWindow = nextWindow;
+ }
+ }
+ if (currentStart < elements.size() - 1) {
+ // we have to retrofit the last batch
+ for (int j = elements.size() - 1; j >= currentStart; j--) {
+ WindowedValue<KV<K, AccumT>> value = elements.get(j);
+ elements.set(
+ j,
+ WindowedValue.of(
+ value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java
index 476dc5e..f92e76f 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java
@@ -18,28 +18,17 @@
package org.apache.beam.runners.flink.translation.functions;
import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingInternals;
+import org.apache.beam.sdk.util.WindowingStrategy;
import org.apache.beam.sdk.values.PCollectionView;
import org.apache.beam.sdk.values.TupleTag;
-import com.google.common.collect.ImmutableList;
-
import org.apache.flink.api.common.functions.RichMapPartitionFunction;
import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Map;
/**
@@ -50,112 +39,72 @@ import java.util.Map;
* and must tag all outputs with the output number. Afterwards a filter will filter out
* those elements that are not to be in a specific output.
*/
-public class FlinkMultiOutputDoFnFunction<IN, OUT> extends RichMapPartitionFunction<IN, RawUnionValue> {
-
- private final DoFn<IN, OUT> doFn;
- private final SerializedPipelineOptions serializedPipelineOptions;
- private final Map<TupleTag<?>, Integer> outputMap;
-
- public FlinkMultiOutputDoFnFunction(DoFn<IN, OUT> doFn, PipelineOptions options, Map<TupleTag<?>, Integer> outputMap) {
- this.doFn = doFn;
- this.serializedPipelineOptions = new SerializedPipelineOptions(options);
- this.outputMap = outputMap;
- }
-
- @Override
- public void mapPartition(Iterable<IN> values, Collector<RawUnionValue> out) throws Exception {
- ProcessContext context = new ProcessContext(doFn, out);
- this.doFn.startBundle(context);
- for (IN value : values) {
- context.inValue = value;
- doFn.processElement(context);
- }
- this.doFn.finishBundle(context);
- }
+public class FlinkMultiOutputDoFnFunction<InputT, OutputT>
+ extends RichMapPartitionFunction<WindowedValue<InputT>, WindowedValue<RawUnionValue>> {
- private class ProcessContext extends DoFn<IN, OUT>.ProcessContext {
+ private final DoFn<InputT, OutputT> doFn;
+ private final SerializedPipelineOptions serializedOptions;
- IN inValue;
- Collector<RawUnionValue> outCollector;
+ private final Map<TupleTag<?>, Integer> outputMap;
- public ProcessContext(DoFn<IN, OUT> fn, Collector<RawUnionValue> outCollector) {
- fn.super();
- this.outCollector = outCollector;
- }
+ private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
- @Override
- public IN element() {
- return this.inValue;
- }
+ private final boolean requiresWindowAccess;
+ private final boolean hasSideInputs;
- @Override
- public Instant timestamp() {
- return Instant.now();
- }
+ private final WindowingStrategy<?, ?> windowingStrategy;
- @Override
- public BoundedWindow window() {
- return GlobalWindow.INSTANCE;
- }
+ public FlinkMultiOutputDoFnFunction(
+ DoFn<InputT, OutputT> doFn,
+ WindowingStrategy<?, ?> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+ PipelineOptions options,
+ Map<TupleTag<?>, Integer> outputMap) {
+ this.doFn = doFn;
+ this.serializedOptions = new SerializedPipelineOptions(options);
+ this.outputMap = outputMap;
- @Override
- public PaneInfo pane() {
- return PaneInfo.NO_FIRING;
- }
+ this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+ this.hasSideInputs = !sideInputs.isEmpty();
+ this.windowingStrategy = windowingStrategy;
+ this.sideInputs = sideInputs;
+ }
- @Override
- public WindowingInternals<IN, OUT> windowingInternals() {
- return null;
- }
+ @Override
+ public void mapPartition(
+ Iterable<WindowedValue<InputT>> values,
+ Collector<WindowedValue<RawUnionValue>> out) throws Exception {
+
+ FlinkProcessContext<InputT, OutputT> context = new FlinkMultiOutputProcessContext<>(
+ serializedOptions.getPipelineOptions(),
+ getRuntimeContext(),
+ doFn,
+ windowingStrategy,
+ out,
+ outputMap,
+ sideInputs);
- @Override
- public PipelineOptions getPipelineOptions() {
- return serializedPipelineOptions.getPipelineOptions();
- }
+ this.doFn.startBundle(context);
- @Override
- public <T> T sideInput(PCollectionView<T> view) {
- List<T> sideInput = getRuntimeContext().getBroadcastVariable(view.getTagInternal()
- .getId());
- List<WindowedValue<?>> windowedValueList = new ArrayList<>(sideInput.size());
- for (T input : sideInput) {
- windowedValueList.add(WindowedValue.of(input, Instant.now(), ImmutableList.of(GlobalWindow.INSTANCE), pane()));
+ if (!requiresWindowAccess || hasSideInputs) {
+ // we don't need to explode the windows
+ for (WindowedValue<InputT> value : values) {
+ context = context.forWindowedValue(value);
+ doFn.processElement(context);
}
- return view.fromIterableInternal(windowedValueList);
- }
-
- @Override
- public void output(OUT value) {
- // assume that index 0 is the default output
- outCollector.collect(new RawUnionValue(0, value));
- }
-
- @Override
- public void outputWithTimestamp(OUT output, Instant timestamp) {
- // not FLink's way, just output normally
- output(output);
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public <T> void sideOutput(TupleTag<T> tag, T value) {
- Integer index = outputMap.get(tag);
- if (index != null) {
- outCollector.collect(new RawUnionValue(index, value));
+ } else {
+ // we need to explode the windows because we have per-window
+ // side inputs and window access also only works if an element
+ // is in only one window
+ for (WindowedValue<InputT> value : values) {
+ for (WindowedValue<InputT> explodedValue: value.explodeWindows()) {
+ context = context.forWindowedValue(value);
+ doFn.processElement(context);
+ }
}
}
- @Override
- public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
- sideOutput(tag, output);
- }
-
- @Override
- protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(String name, Combine.CombineFn<AggInputT, ?, AggOutputT> combiner) {
- SerializableFnAggregatorWrapper<AggInputT, AggOutputT> wrapper = new SerializableFnAggregatorWrapper<>(combiner);
- getRuntimeContext().addAccumulator(name, wrapper);
- return null;
- }
+ this.doFn.finishBundle(context);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputProcessContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputProcessContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputProcessContext.java
new file mode 100644
index 0000000..71b6d27
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputProcessContext.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * {@link DoFn.ProcessContext} for {@link FlinkMultiOutputDoFnFunction} that supports
+ * side outputs.
+ */
+class FlinkMultiOutputProcessContext<InputT, OutputT>
+ extends FlinkProcessContext<InputT, OutputT> {
+
+ // we need a different Collector from the base class
+ private final Collector<WindowedValue<RawUnionValue>> collector;
+
+ private final Map<TupleTag<?>, Integer> outputMap;
+
+
+ FlinkMultiOutputProcessContext(
+ PipelineOptions pipelineOptions,
+ RuntimeContext runtimeContext,
+ DoFn<InputT, OutputT> doFn,
+ WindowingStrategy<?, ?> windowingStrategy,
+ Collector<WindowedValue<RawUnionValue>> collector,
+ Map<TupleTag<?>, Integer> outputMap,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
+ super(
+ pipelineOptions,
+ runtimeContext,
+ doFn,
+ windowingStrategy,
+ new Collector<WindowedValue<OutputT>>() {
+ @Override
+ public void collect(WindowedValue<OutputT> outputTWindowedValue) {
+
+ }
+
+ @Override
+ public void close() {
+
+ }
+ },
+ sideInputs);
+
+ this.collector = collector;
+ this.outputMap = outputMap;
+ }
+
+ @Override
+ public FlinkProcessContext<InputT, OutputT> forWindowedValue(
+ WindowedValue<InputT> windowedValue) {
+ this.windowedValue = windowedValue;
+ return this;
+ }
+
+ @Override
+ public void outputWithTimestamp(OutputT value, Instant timestamp) {
+ if (windowedValue == null) {
+ // we are in startBundle() or finishBundle()
+
+ try {
+ Collection windows = windowingStrategy.getWindowFn().assignWindows(
+ new FlinkNoElementAssignContext(
+ windowingStrategy.getWindowFn(),
+ value,
+ timestamp));
+
+ collector.collect(
+ WindowedValue.of(
+ new RawUnionValue(0, value),
+ timestamp != null ? timestamp : new Instant(Long.MIN_VALUE),
+ windows,
+ PaneInfo.NO_FIRING));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ collector.collect(
+ WindowedValue.of(
+ new RawUnionValue(0, value),
+ windowedValue.getTimestamp(),
+ windowedValue.getWindows(),
+ windowedValue.getPane()));
+ }
+ }
+
+ @Override
+ protected void outputWithTimestampAndWindow(
+ OutputT value,
+ Instant timestamp,
+ Collection<? extends BoundedWindow> windows,
+ PaneInfo pane) {
+ collector.collect(
+ WindowedValue.of(
+ new RawUnionValue(0, value), timestamp, windows, pane));
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public <T> void sideOutput(TupleTag<T> tag, T value) {
+ if (windowedValue != null) {
+ sideOutputWithTimestamp(tag, value, windowedValue.getTimestamp());
+ } else {
+ sideOutputWithTimestamp(tag, value, null);
+ }
+ }
+
+ @Override
+ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T value, Instant timestamp) {
+ Integer index = outputMap.get(tag);
+
+ if (index == null) {
+ throw new IllegalArgumentException("Unknown side output tag: " + tag);
+ }
+
+ if (windowedValue == null) {
+ // we are in startBundle() or finishBundle()
+
+ try {
+ Collection windows = windowingStrategy.getWindowFn().assignWindows(
+ new FlinkNoElementAssignContext(
+ windowingStrategy.getWindowFn(),
+ value,
+ timestamp));
+
+ collector.collect(
+ WindowedValue.of(
+ new RawUnionValue(index, value),
+ timestamp != null ? timestamp : new Instant(Long.MIN_VALUE),
+ windows,
+ PaneInfo.NO_FIRING));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ collector.collect(
+ WindowedValue.of(
+ new RawUnionValue(index, value),
+ windowedValue.getTimestamp(),
+ windowedValue.getWindows(),
+ windowedValue.getPane()));
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
index 58a36b2..9205a55 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
@@ -18,27 +18,34 @@
package org.apache.beam.runners.flink.translation.functions;
import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.util.WindowedValue;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.util.Collector;
/**
- * A FlatMap function that filters out those elements that don't belong in this output. We need
- * this to implement MultiOutput ParDo functions.
+ * A {@link FlatMapFunction} function that filters out those elements that don't belong in this
+ * output. We need this to implement MultiOutput ParDo functions in combination with
+ * {@link FlinkMultiOutputDoFnFunction}.
*/
-public class FlinkMultiOutputPruningFunction<T> implements FlatMapFunction<RawUnionValue, T> {
+public class FlinkMultiOutputPruningFunction<T>
+ implements FlatMapFunction<WindowedValue<RawUnionValue>, WindowedValue<T>> {
- private final int outputTag;
+ private final int ourOutputTag;
- public FlinkMultiOutputPruningFunction(int outputTag) {
- this.outputTag = outputTag;
+ public FlinkMultiOutputPruningFunction(int ourOutputTag) {
+ this.ourOutputTag = ourOutputTag;
}
@Override
@SuppressWarnings("unchecked")
- public void flatMap(RawUnionValue rawUnionValue, Collector<T> collector) throws Exception {
- if (rawUnionValue.getUnionTag() == outputTag) {
- collector.collect((T) rawUnionValue.getValue());
+ public void flatMap(
+ WindowedValue<RawUnionValue> windowedValue,
+ Collector<WindowedValue<T>> collector) throws Exception {
+ int unionTag = windowedValue.getValue().getUnionTag();
+ if (unionTag == ourOutputTag) {
+ collector.collect(
+ (WindowedValue<T>) windowedValue.withValue(windowedValue.getValue().getValue()));
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoElementAssignContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoElementAssignContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoElementAssignContext.java
new file mode 100644
index 0000000..892f7a1
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoElementAssignContext.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * {@link WindowFn.AssignContext} for calling a {@link WindowFn} for elements emitted from
+ * {@link org.apache.beam.sdk.transforms.DoFn#startBundle(DoFn.Context)}
+ * or {@link DoFn#finishBundle(DoFn.Context)}.
+ *
+ * <p>In those cases the {@code WindowFn} is not allowed to access any element information.
+ */
+class FlinkNoElementAssignContext<InputT, W extends BoundedWindow>
+ extends WindowFn<InputT, W>.AssignContext {
+
+ private final InputT element;
+ private final Instant timestamp;
+
+ FlinkNoElementAssignContext(
+ WindowFn<InputT, W> fn,
+ InputT element,
+ Instant timestamp) {
+ fn.super();
+
+ this.element = element;
+ // the timestamp can be null, in that case output is called
+ // without a timestamp
+ this.timestamp = timestamp;
+ }
+
+ @Override
+ public InputT element() {
+ return element;
+ }
+
+ @Override
+ public Instant timestamp() {
+ if (timestamp != null) {
+ return timestamp;
+ } else {
+ throw new UnsupportedOperationException("No timestamp available.");
+ }
+ }
+
+ @Override
+ public Collection<? extends BoundedWindow> windows() {
+ throw new UnsupportedOperationException("No windows available.");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
index a2bab2b..c29e1df 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
@@ -17,45 +17,170 @@
*/
package org.apache.beam.runners.flink.translation.functions;
-import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.flink.api.common.functions.GroupCombineFunction;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.api.common.functions.RichGroupCombineFunction;
import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.Iterator;
+import java.util.Map;
/**
- * Flink {@link org.apache.flink.api.common.functions.GroupCombineFunction} for executing a
- * {@link org.apache.beam.sdk.transforms.Combine.PerKey} operation. This reads the input
- * {@link org.apache.beam.sdk.values.KV} elements VI, extracts the key and emits accumulated
- * values which have the intermediate format VA.
+ * This is is the first step for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
+ * on Flink. The second part is {@link FlinkReduceFunction}. This function performs a local
+ * combine step before shuffling while the latter does the final combination after a shuffle.
+ *
+ * <p>The input to {@link #combine(Iterable, Collector)} are elements of the same key but
+ * for different windows. We have to ensure that we only combine elements of matching
+ * windows.
*/
-public class FlinkPartialReduceFunction<K, VI, VA> implements GroupCombineFunction<KV<K, VI>, KV<K, VA>> {
+public class FlinkPartialReduceFunction<K, InputT, AccumT, W extends BoundedWindow>
+ extends RichGroupCombineFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, AccumT>>> {
+
+ protected final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn;
+
+ protected final DoFn<KV<K, InputT>, KV<K, AccumT>> doFn;
+
+ protected final WindowingStrategy<?, W> windowingStrategy;
+
+ protected final SerializedPipelineOptions serializedOptions;
- private final Combine.KeyedCombineFn<K, VI, VA, ?> keyedCombineFn;
+ protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
- public FlinkPartialReduceFunction(Combine.KeyedCombineFn<K, VI, VA, ?>
- keyedCombineFn) {
- this.keyedCombineFn = keyedCombineFn;
+ public FlinkPartialReduceFunction(
+ CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
+ WindowingStrategy<?, W> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+ PipelineOptions pipelineOptions) {
+
+ this.combineFn = combineFn;
+ this.windowingStrategy = windowingStrategy;
+ this.sideInputs = sideInputs;
+ this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+ // dummy DoFn because we need one for ProcessContext
+ this.doFn = new DoFn<KV<K, InputT>, KV<K, AccumT>>() {
+ @Override
+ public void processElement(ProcessContext c) throws Exception {
+
+ }
+ };
}
@Override
- public void combine(Iterable<KV<K, VI>> elements, Collector<KV<K, VA>> out) throws Exception {
+ public void combine(
+ Iterable<WindowedValue<KV<K, InputT>>> elements,
+ Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
+
+ FlinkProcessContext<KV<K, InputT>, KV<K, AccumT>> processContext =
+ new FlinkProcessContext<>(
+ serializedOptions.getPipelineOptions(),
+ getRuntimeContext(),
+ doFn,
+ windowingStrategy,
+ out,
+ sideInputs);
+
+ PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
+ PerKeyCombineFnRunners.create(combineFn);
+
+ @SuppressWarnings("unchecked")
+ OutputTimeFn<? super BoundedWindow> outputTimeFn =
+ (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+ // get all elements so that we can sort them, has to fit into
+ // memory
+ // this seems very unprudent, but correct, for now
+ ArrayList<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+ for (WindowedValue<KV<K, InputT>> inputValue: elements) {
+ for (WindowedValue<KV<K, InputT>> exploded: inputValue.explodeWindows()) {
+ sortedInput.add(exploded);
+ }
+ }
+ Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+ @Override
+ public int compare(
+ WindowedValue<KV<K, InputT>> o1,
+ WindowedValue<KV<K, InputT>> o2) {
+ return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+ .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+ }
+ });
+
+ // iterate over the elements that are sorted by window timestamp
+ //
+ final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
- final Iterator<KV<K, VI>> iterator = elements.iterator();
// create accumulator using the first elements key
- KV<K, VI> first = iterator.next();
- K key = first.getKey();
- VI value = first.getValue();
- VA accumulator = keyedCombineFn.createAccumulator(key);
- accumulator = keyedCombineFn.addInput(key, accumulator, value);
-
- while(iterator.hasNext()) {
- value = iterator.next().getValue();
- accumulator = keyedCombineFn.addInput(key, accumulator, value);
+ WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+ K key = currentValue.getValue().getKey();
+ BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
+ InputT firstValue = currentValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(currentValue);
+ AccumT accumulator = combineFnRunner.createAccumulator(key, processContext);
+ accumulator = combineFnRunner.addInput(key, accumulator, firstValue, processContext);
+
+ // we use this to keep track of the timestamps assigned by the OutputTimeFn
+ Instant windowTimestamp =
+ outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+ while (iterator.hasNext()) {
+ WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+ BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
+
+ if (nextWindow.equals(currentWindow)) {
+ // continue accumulating
+ InputT value = nextValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(nextValue);
+ accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+
+ windowTimestamp = outputTimeFn.combine(
+ windowTimestamp,
+ outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+ } else {
+ // emit the value that we currently have
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, accumulator),
+ windowTimestamp,
+ currentWindow,
+ PaneInfo.NO_FIRING));
+
+ currentWindow = nextWindow;
+ InputT value = nextValue.getValue().getValue();
+ processContext = processContext.forWindowedValue(nextValue);
+ accumulator = combineFnRunner.createAccumulator(key, processContext);
+ accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+ windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+ }
}
- out.collect(KV.of(key, accumulator));
+ // emit the final accumulator
+ out.collect(
+ WindowedValue.of(
+ KV.of(key, accumulator),
+ windowTimestamp,
+ currentWindow,
+ PaneInfo.NO_FIRING));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/24bfca23/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkProcessContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkProcessContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkProcessContext.java
new file mode 100644
index 0000000..0f1885c
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkProcessContext.java
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.TimerInternals;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingInternals;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.state.StateInternals;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * {@link org.apache.beam.sdk.transforms.DoFn.ProcessContext} for our Flink Wrappers.
+ */
+class FlinkProcessContext<InputT, OutputT>
+ extends DoFn<InputT, OutputT>.ProcessContext {
+
+ private final PipelineOptions pipelineOptions;
+ private final RuntimeContext runtimeContext;
+ private Collector<WindowedValue<OutputT>> collector;
+ private final boolean requiresWindowAccess;
+
+ protected WindowedValue<InputT> windowedValue;
+
+ protected WindowingStrategy<?, ?> windowingStrategy;
+
+ private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+ FlinkProcessContext(
+ PipelineOptions pipelineOptions,
+ RuntimeContext runtimeContext,
+ DoFn<InputT, OutputT> doFn,
+ WindowingStrategy<?, ?> windowingStrategy,
+ Collector<WindowedValue<OutputT>> collector,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
+ doFn.super();
+ Preconditions.checkNotNull(pipelineOptions);
+ Preconditions.checkNotNull(runtimeContext);
+ Preconditions.checkNotNull(doFn);
+ Preconditions.checkNotNull(collector);
+
+ this.pipelineOptions = pipelineOptions;
+ this.runtimeContext = runtimeContext;
+ this.collector = collector;
+ this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+ this.windowingStrategy = windowingStrategy;
+ this.sideInputs = sideInputs;
+
+ super.setupDelegateAggregators();
+ }
+
+ FlinkProcessContext(
+ PipelineOptions pipelineOptions,
+ RuntimeContext runtimeContext,
+ DoFn<InputT, OutputT> doFn,
+ WindowingStrategy<?, ?> windowingStrategy,
+ Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
+ doFn.super();
+ Preconditions.checkNotNull(pipelineOptions);
+ Preconditions.checkNotNull(runtimeContext);
+ Preconditions.checkNotNull(doFn);
+
+ this.pipelineOptions = pipelineOptions;
+ this.runtimeContext = runtimeContext;
+ this.collector = null;
+ this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+ this.windowingStrategy = windowingStrategy;
+ this.sideInputs = sideInputs;
+
+ super.setupDelegateAggregators();
+ }
+
+ public FlinkProcessContext<InputT, OutputT> forOutput(
+ Collector<WindowedValue<OutputT>> collector) {
+ this.collector = collector;
+
+ // for now, returns ourselves, to be easy on the GC
+ return this;
+ }
+
+
+
+ public FlinkProcessContext<InputT, OutputT> forWindowedValue(
+ WindowedValue<InputT> windowedValue) {
+ this.windowedValue = windowedValue;
+
+ // for now, returns ourselves, to be easy on the GC
+ return this;
+ }
+
+ @Override
+ public InputT element() {
+ return this.windowedValue.getValue();
+ }
+
+
+ @Override
+ public Instant timestamp() {
+ return windowedValue.getTimestamp();
+ }
+
+ @Override
+ public BoundedWindow window() {
+ if (!requiresWindowAccess) {
+ throw new UnsupportedOperationException(
+ "window() is only available in the context of a DoFn marked as RequiresWindow.");
+ }
+ return Iterables.getOnlyElement(windowedValue.getWindows());
+ }
+
+ @Override
+ public PaneInfo pane() {
+ return windowedValue.getPane();
+ }
+
+ @Override
+ public WindowingInternals<InputT, OutputT> windowingInternals() {
+
+ return new WindowingInternals<InputT, OutputT>() {
+
+ @Override
+ public StateInternals stateInternals() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void outputWindowedValue(
+ OutputT value,
+ Instant timestamp,
+ Collection<? extends BoundedWindow> windows,
+ PaneInfo pane) {
+ collector.collect(WindowedValue.of(value, timestamp, windows, pane));
+ outputWithTimestampAndWindow(value, timestamp, windows, pane);
+ }
+
+ @Override
+ public TimerInternals timerInternals() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Collection<? extends BoundedWindow> windows() {
+ return windowedValue.getWindows();
+ }
+
+ @Override
+ public PaneInfo pane() {
+ return windowedValue.getPane();
+ }
+
+ @Override
+ public <T> void writePCollectionViewData(TupleTag<?> tag,
+ Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public <ViewT> ViewT sideInput(
+ PCollectionView<ViewT> view,
+ BoundedWindow mainInputWindow) {
+
+ Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
+ Preconditions.checkNotNull(
+ sideInputs.get(view),
+ "Side input for " + view + " not available.");
+
+ // get the side input strategy for mapping the window
+ WindowingStrategy<?, ?> windowingStrategy = sideInputs.get(view);
+
+ BoundedWindow sideInputWindow =
+ windowingStrategy.getWindowFn().getSideInputWindow(mainInputWindow);
+
+ Map<BoundedWindow, ViewT> sideInputs =
+ runtimeContext.getBroadcastVariableWithInitializer(
+ view.getTagInternal().getId(), new SideInputInitializer<>(view));
+ return sideInputs.get(sideInputWindow);
+ }
+ };
+ }
+
+ @Override
+ public PipelineOptions getPipelineOptions() {
+ return pipelineOptions;
+ }
+
+ @Override
+ public <ViewT> ViewT sideInput(PCollectionView<ViewT> view) {
+ Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
+ Preconditions.checkNotNull(sideInputs.get(view), "Side input for " + view + " not available.");
+ Iterator<? extends BoundedWindow> windowIter = windowedValue.getWindows().iterator();
+ BoundedWindow window;
+ if (!windowIter.hasNext()) {
+ throw new IllegalStateException(
+ "sideInput called when main input element is not in any windows");
+ } else {
+ window = windowIter.next();
+ if (windowIter.hasNext()) {
+ throw new IllegalStateException(
+ "sideInput called when main input element is in multiple windows");
+ }
+ }
+
+ // get the side input strategy for mapping the window
+ WindowingStrategy<?, ?> windowingStrategy = sideInputs.get(view);
+
+ BoundedWindow sideInputWindow =
+ windowingStrategy.getWindowFn().getSideInputWindow(window);
+
+ Map<BoundedWindow, ViewT> sideInputs =
+ runtimeContext.getBroadcastVariableWithInitializer(
+ view.getTagInternal().getId(), new SideInputInitializer<>(view));
+ ViewT result = sideInputs.get(sideInputWindow);
+ if (result == null) {
+ result = view.fromIterableInternal(Collections.<WindowedValue<?>>emptyList());
+ }
+ return result;
+ }
+
+ @Override
+ public void output(OutputT value) {
+ if (windowedValue != null) {
+ outputWithTimestamp(value, windowedValue.getTimestamp());
+ } else {
+ outputWithTimestamp(value, null);
+ }
+ }
+
+ @Override
+ public void outputWithTimestamp(OutputT value, Instant timestamp) {
+ if (windowedValue == null) {
+ // we are in startBundle() or finishBundle()
+
+ try {
+ Collection windows = windowingStrategy.getWindowFn().assignWindows(
+ new FlinkNoElementAssignContext(
+ windowingStrategy.getWindowFn(),
+ value,
+ timestamp));
+
+ collector.collect(
+ WindowedValue.of(
+ value,
+ timestamp != null ? timestamp : new Instant(Long.MIN_VALUE),
+ windows,
+ PaneInfo.NO_FIRING));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ collector.collect(
+ WindowedValue.of(
+ value,
+ timestamp,
+ windowedValue.getWindows(),
+ windowedValue.getPane()));
+ }
+ }
+
+ protected void outputWithTimestampAndWindow(
+ OutputT value,
+ Instant timestamp,
+ Collection<? extends BoundedWindow> windows,
+ PaneInfo pane) {
+ collector.collect(
+ WindowedValue.of(
+ value, timestamp, windows, pane));
+ }
+
+ @Override
+ public <T> void sideOutput(TupleTag<T> tag, T output) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+ sideOutput(tag, output);
+ }
+
+ @Override
+ protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
+ createAggregatorInternal(String name, Combine.CombineFn<AggInputT, ?, AggOutputT> combiner) {
+ SerializableFnAggregatorWrapper<AggInputT, AggOutputT> wrapper =
+ new SerializableFnAggregatorWrapper<>(combiner);
+ runtimeContext.addAccumulator(name, wrapper);
+ return wrapper;
+ }
+}
[08/14] incubator-beam git commit: Configure RunnableOnService tests
for Flink in batch mode
Posted by al...@apache.org.
Configure RunnableOnService tests for Flink in batch mode
Today Flink batch supports only global windows. This is a situation we
intend our build to allow, eventually via JUnit category filtering.
For now all the test classes that use non-global windows are excluded
entirely via maven configuration. In the future, it should be on a
per-test-method basis.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/bfc1a2ba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/bfc1a2ba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/bfc1a2ba
Branch: refs/heads/master
Commit: bfc1a2ba041c1b8b0033f886266321e5ee53cf6c
Parents: 58d66a3
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon May 2 14:04:20 2016 -0700
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
runners/flink/runner/pom.xml | 106 ++++++++++++++++++++++++++++----------
1 file changed, 79 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/bfc1a2ba/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index a53a386..cde9108 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -34,31 +34,6 @@
<packaging>jar</packaging>
- <profiles>
- <profile>
- <id>disable-runnable-on-service-tests</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <executions>
- <execution>
- <id>runnable-on-service-tests</id>
- <configuration>
- <skip>true</skip>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
- </profile>
- </profiles>
-
<dependencies>
<!-- Flink dependencies -->
<dependency>
@@ -87,7 +62,8 @@
<artifactId>flink-avro_2.10</artifactId>
<version>${flink.version}</version>
</dependency>
- <!-- Beam -->
+
+ <!--- Beam -->
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>java-sdk-all</artifactId>
@@ -111,6 +87,21 @@
</dependency>
<!-- Test scoped -->
+
+ <!-- Depend on test jar to scan for RunnableOnService tests -->
+ <dependency>
+ <groupId>org.apache.beam</groupId>
+ <artifactId>java-sdk-all</artifactId>
+ <classifier>tests</classifier>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-jdk14</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>java-examples-all</artifactId>
@@ -168,10 +159,71 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>runnable-on-service-tests</id>
+ <phase>integration-test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
+ <parallel>all</parallel>
+ <threadCount>4</threadCount>
+ <failIfNoTests>true</failIfNoTests>
+ <dependenciesToScan>
+ <dependency>org.apache.beam:java-sdk-all</dependency>
+ </dependenciesToScan>
+ <systemPropertyVariables>
+ <beamTestPipelineOptions>
+ [
+ "--runner=org.apache.beam.runners.flink.TestFlinkPipelineRunner",
+ "--streaming=false"
+ ]
+ </beamTestPipelineOptions>
+ </systemPropertyVariables>
+ <excludes>
+ <!-- Tests that use unsupported windowing -->
+ <exclude>**/org/apache/beam/sdk/transforms/CombineTest.java</exclude>
+ <exclude>**/org/apache/beam/sdk/transforms/GroupByKeyTest.java</exclude>
+ <exclude>**/org/apache/beam/sdk/transforms/ViewTest.java</exclude>
+ <exclude>**/org/apache/beam/sdk/transforms/join/CoGroupByKeyTest.java</exclude>
+ <exclude>**/org/apache/beam/sdk/transforms/windowing/WindowTest.java</exclude>
+ <exclude>**/org/apache/beam/sdk/transforms/windowing/WindowingTest.java</exclude>
+ <exclude>**/org/apache/beam/sdk/util/ReshuffleTest.java</exclude>
+ </excludes>
+ </configuration>
+ </execution>
+ <execution>
+ <id>streaming-runnable-on-service-tests</id>
+ <phase>integration-test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
+ <parallel>all</parallel>
+ <threadCount>4</threadCount>
+ <failIfNoTests>true</failIfNoTests>
+ <dependenciesToScan>
+ <dependency>org.apache.beam:java-sdk-all</dependency>
+ </dependenciesToScan>
+ <systemPropertyVariables>
+ <beamTestPipelineOptions>
+ [
+ "--runner=org.apache.beam.runners.flink.TestFlinkPipelineRunner",
+ "--streaming=true"
+ ]
+ </beamTestPipelineOptions>
+ </systemPropertyVariables>
+ <excludes>
+ </excludes>
+ </configuration>
+ </execution>
+ </executions>
</plugin>
</plugins>
-
</build>
</project>
[06/14] incubator-beam git commit: Special casing job exec
AssertionError in TestFlinkPipelineRunner
Posted by al...@apache.org.
Special casing job exec AssertionError in TestFlinkPipelineRunner
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/af8e9887
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/af8e9887
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/af8e9887
Branch: refs/heads/master
Commit: af8e98878bbc8678e33a4c00548ccabf6cf55a17
Parents: 2d71af7
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri May 6 12:49:55 2016 -0700
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
.../beam/runners/flink/TestFlinkPipelineRunner.java | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/af8e9887/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
index 24883c8..139aebf 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
@@ -26,6 +26,8 @@ import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.values.PInput;
import org.apache.beam.sdk.values.POutput;
+import org.apache.flink.runtime.client.JobExecutionException;
+
public class TestFlinkPipelineRunner extends PipelineRunner<FlinkRunnerResult> {
private FlinkPipelineRunner delegate;
@@ -55,7 +57,19 @@ public class TestFlinkPipelineRunner extends PipelineRunner<FlinkRunnerResult> {
@Override
public FlinkRunnerResult run(Pipeline pipeline) {
- return delegate.run(pipeline);
+ try {
+ return delegate.run(pipeline);
+ } catch (RuntimeException e) {
+ // Special case hack to pull out assertion errors from PAssert; instead there should
+ // probably be a better story along the lines of UserCodeException.
+ if (e.getCause() != null
+ && e.getCause() instanceof JobExecutionException
+ && e.getCause().getCause() instanceof AssertionError) {
+ throw (AssertionError) e.getCause().getCause();
+ } else {
+ throw e;
+ }
+ }
}
public PipelineOptions getPipelineOptions() {
[14/14] incubator-beam git commit: This closes #343
Posted by al...@apache.org.
This closes #343
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/af8f5935
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/af8f5935
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/af8f5935
Branch: refs/heads/master
Commit: af8f5935ca1866012ceb102b9472c8b1ef102d73
Parents: dc98211 23ba976
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Fri May 20 08:08:38 2016 +0200
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:38 2016 +0200
----------------------------------------------------------------------
examples/java8/pom.xml | 12 +
runners/flink/runner/pom.xml | 117 ++-
.../beam/runners/flink/FlinkPipelineRunner.java | 16 +-
.../runners/flink/FlinkRunnerRegistrar.java | 4 +-
.../runners/flink/TestFlinkPipelineRunner.java | 80 ++
.../apache/beam/runners/flink/io/ConsoleIO.java | 82 --
.../FlinkBatchPipelineTranslator.java | 18 +-
.../FlinkBatchTransformTranslators.java | 868 ++++++++++++-------
.../FlinkBatchTranslationContext.java | 72 +-
.../FlinkStreamingTransformTranslators.java | 22 +-
.../FlinkStreamingTranslationContext.java | 29 +-
.../functions/FlinkAssignContext.java | 56 ++
.../functions/FlinkAssignWindows.java | 51 ++
.../FlinkCoGroupKeyedListAggregator.java | 61 --
.../functions/FlinkCreateFunction.java | 63 --
.../functions/FlinkDoFnFunction.java | 194 ++---
.../FlinkKeyedListAggregationFunction.java | 78 --
.../FlinkMergingNonShuffleReduceFunction.java | 238 +++++
.../FlinkMergingPartialReduceFunction.java | 205 +++++
.../functions/FlinkMergingReduceFunction.java | 207 +++++
.../functions/FlinkMultiOutputDoFnFunction.java | 157 ++--
.../FlinkMultiOutputProcessContext.java | 176 ++++
.../FlinkMultiOutputPruningFunction.java | 25 +-
.../functions/FlinkNoElementAssignContext.java | 71 ++
.../functions/FlinkPartialReduceFunction.java | 171 +++-
.../functions/FlinkProcessContext.java | 324 +++++++
.../functions/FlinkReduceFunction.java | 174 +++-
.../functions/SideInputInitializer.java | 75 ++
.../flink/translation/functions/UnionCoder.java | 152 ----
.../translation/types/CoderTypeInformation.java | 21 +-
.../translation/types/CoderTypeSerializer.java | 14 +-
.../translation/types/KvCoderComperator.java | 102 +--
.../types/KvCoderTypeInformation.java | 63 +-
.../types/VoidCoderTypeSerializer.java | 112 ---
.../wrappers/CombineFnAggregatorWrapper.java | 94 --
.../SerializableFnAggregatorWrapper.java | 31 +-
.../translation/wrappers/SinkOutputFormat.java | 10 +-
.../translation/wrappers/SourceInputFormat.java | 18 +-
.../streaming/FlinkGroupByKeyWrapper.java | 10 +-
.../io/FlinkStreamingCreateFunction.java | 9 +-
.../apache/beam/runners/flink/AvroITCase.java | 129 ---
.../beam/runners/flink/FlattenizeITCase.java | 76 --
.../beam/runners/flink/FlinkTestPipeline.java | 2 +-
.../beam/runners/flink/JoinExamplesITCase.java | 102 ---
.../runners/flink/MaybeEmptyTestITCase.java | 66 --
.../runners/flink/ParDoMultiOutputITCase.java | 102 ---
.../beam/runners/flink/ReadSourceITCase.java | 14 +-
.../flink/RemoveDuplicatesEmptyITCase.java | 72 --
.../runners/flink/RemoveDuplicatesITCase.java | 73 --
.../beam/runners/flink/SideInputITCase.java | 70 --
.../apache/beam/runners/flink/TfIdfITCase.java | 80 --
.../beam/runners/flink/WordCountITCase.java | 77 --
.../runners/flink/WordCountJoin2ITCase.java | 140 ---
.../runners/flink/WordCountJoin3ITCase.java | 158 ----
.../flink/streaming/GroupAlsoByWindowTest.java | 3 +-
.../beam/runners/flink/util/JoinExamples.java | 161 ----
.../beam/sdk/transforms/join/UnionCoder.java | 2 +-
57 files changed, 2836 insertions(+), 2773 deletions(-)
----------------------------------------------------------------------
[11/14] incubator-beam git commit: Fix faulty Flink Flatten when
PCollectionList is empty
Posted by al...@apache.org.
Fix faulty Flink Flatten when PCollectionList is empty
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/4e60a497
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/4e60a497
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/4e60a497
Branch: refs/heads/master
Commit: 4e60a497b313414aa2b2968b8def6c6f753908fe
Parents: 26fa0b2
Author: Aljoscha Krettek <al...@gmail.com>
Authored: Fri May 13 14:17:50 2016 +0200
Committer: Aljoscha Krettek <al...@gmail.com>
Committed: Fri May 20 08:08:24 2016 +0200
----------------------------------------------------------------------
.../FlinkBatchTransformTranslators.java | 32 +++++++++++++++-----
1 file changed, 25 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/4e60a497/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
index a03352e..07785aa 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
@@ -34,6 +34,7 @@ import org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.VoidCoder;
import org.apache.beam.sdk.io.AvroIO;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.Read;
@@ -61,6 +62,7 @@ import org.apache.beam.sdk.values.TupleTag;
import com.google.api.client.util.Maps;
import com.google.common.collect.Lists;
+import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.operators.Keys;
import org.apache.flink.api.common.typeinfo.TypeInformation;
@@ -78,6 +80,7 @@ import org.apache.flink.api.java.operators.Grouping;
import org.apache.flink.api.java.operators.MapPartitionOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.core.fs.Path;
+import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -91,7 +94,7 @@ import java.util.Map;
/**
* Translators for transforming
* Dataflow {@link org.apache.beam.sdk.transforms.PTransform}s to
- * Flink {@link org.apache.flink.api.java.DataSet}s
+ * Flink {@link org.apache.flink.api.java.DataSet}s.
*/
public class FlinkBatchTransformTranslators {
@@ -465,15 +468,30 @@ public class FlinkBatchTransformTranslators {
private static class FlattenPCollectionTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Flatten.FlattenPCollectionList<T>> {
@Override
+ @SuppressWarnings("unchecked")
public void translateNode(Flatten.FlattenPCollectionList<T> transform, FlinkBatchTranslationContext context) {
List<PCollection<T>> allInputs = context.getInput(transform).getAll();
DataSet<T> result = null;
- for(PCollection<T> collection : allInputs) {
- DataSet<T> current = context.getInputDataSet(collection);
- if (result == null) {
- result = current;
- } else {
- result = result.union(current);
+ if (allInputs.isEmpty()) {
+ // create an empty dummy source to satisfy downstream operations
+ // we cannot create an empty source in Flink, therefore we have to
+ // add the flatMap that simply never forwards the single element
+ DataSource<String> dummySource =
+ context.getExecutionEnvironment().fromElements("dummy");
+ result = dummySource.flatMap(new FlatMapFunction<String, T>() {
+ @Override
+ public void flatMap(String s, Collector<T> collector) throws Exception {
+ // never return anything
+ }
+ }).returns(new CoderTypeInformation<>((Coder<T>) VoidCoder.of()));
+ } else {
+ for (PCollection<T> collection : allInputs) {
+ DataSet<T> current = context.getInputDataSet(collection);
+ if (result == null) {
+ result = current;
+ } else {
+ result = result.union(current);
+ }
}
}
context.setOutputDataSet(context.getOutput(transform), result);