You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by gr...@apache.org on 2017/07/12 23:44:06 UTC
[06/22] flink git commit: [FLINK-6731] [tests] Activate strict
checkstyle for flink-tests
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceITCase.java
new file mode 100644
index 0000000..2d6897b
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceITCase.java
@@ -0,0 +1,515 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.GroupReduceFunction;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.functions.ReduceFunction;
+import org.apache.flink.api.common.functions.RichReduceFunction;
+import org.apache.flink.api.common.operators.base.ReduceOperatorBase.CombineHint;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.functions.KeySelector;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.api.java.tuple.Tuple5;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.test.operators.util.CollectionDataSets;
+import org.apache.flink.test.operators.util.CollectionDataSets.CustomType;
+import org.apache.flink.test.operators.util.CollectionDataSets.PojoWithDateAndEnum;
+import org.apache.flink.test.util.MultipleProgramsTestBase;
+import org.apache.flink.util.Collector;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+
+/**
+ * Integration tests for {@link ReduceFunction} and {@link RichReduceFunction}.
+ */
+@SuppressWarnings("serial")
+@RunWith(Parameterized.class)
+public class ReduceITCase extends MultipleProgramsTestBase {
+
+ public ReduceITCase(TestExecutionMode mode){
+ super(mode);
+ }
+
+ @Test
+ public void testReduceOnTuplesWithKeyFieldSelector() throws Exception {
+ /*
+ * Reduce on tuples with key field selector
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
+ groupBy(1).reduce(new Tuple3Reduce("B-)"));
+
+ List<Tuple3<Integer, Long, String>> result = reduceDs.collect();
+
+ String expected = "1,1,Hi\n" +
+ "5,2,B-)\n" +
+ "15,3,B-)\n" +
+ "34,4,B-)\n" +
+ "65,5,B-)\n" +
+ "111,6,B-)\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testReduceOnTupleWithMultipleKeyFieldSelectors() throws Exception{
+ /*
+ * Reduce on tuples with multiple key field selectors
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds.
+ groupBy(4, 0).reduce(new Tuple5Reduce());
+
+ List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs
+ .collect();
+
+ String expected = "1,1,0,Hallo,1\n" +
+ "2,3,2,Hallo Welt wie,1\n" +
+ "2,2,1,Hallo Welt,2\n" +
+ "3,9,0,P-),2\n" +
+ "3,6,5,BCD,3\n" +
+ "4,17,0,P-),1\n" +
+ "4,17,0,P-),2\n" +
+ "5,11,10,GHI,1\n" +
+ "5,29,0,P-),2\n" +
+ "5,25,0,P-),3\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testReduceOnTuplesWithKeyExtractor() throws Exception {
+ /*
+ * Reduce on tuples with key extractor
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
+ groupBy(new KeySelector1()).reduce(new Tuple3Reduce("B-)"));
+
+ List<Tuple3<Integer, Long, String>> result = reduceDs.collect();
+
+ String expected = "1,1,Hi\n" +
+ "5,2,B-)\n" +
+ "15,3,B-)\n" +
+ "34,4,B-)\n" +
+ "65,5,B-)\n" +
+ "111,6,B-)\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ private static class KeySelector1 implements KeySelector<Tuple3<Integer, Long, String>, Long> {
+ private static final long serialVersionUID = 1L;
+ @Override
+ public Long getKey(Tuple3<Integer, Long, String> in) {
+ return in.f1;
+ }
+ }
+
+ @Test
+ public void testReduceOnCustomTypeWithKeyExtractor() throws Exception {
+ /*
+ * Reduce on custom type with key extractor
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
+ DataSet<CustomType> reduceDs = ds.
+ groupBy(new KeySelector2()).reduce(new CustomTypeReduce());
+
+ List<CustomType> result = reduceDs.collect();
+
+ String expected = "1,0,Hi\n" +
+ "2,3,Hello!\n" +
+ "3,12,Hello!\n" +
+ "4,30,Hello!\n" +
+ "5,60,Hello!\n" +
+ "6,105,Hello!\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ private static class KeySelector2 implements KeySelector<CustomType, Integer> {
+ private static final long serialVersionUID = 1L;
+ @Override
+ public Integer getKey(CustomType in) {
+ return in.myInt;
+ }
+ }
+
+ @Test
+ public void testAllReduceForTuple() throws Exception {
+ /*
+ * All-reduce for tuple
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
+ reduce(new AllAddingTuple3Reduce());
+
+ List<Tuple3<Integer, Long, String>> result = reduceDs.collect();
+
+ String expected = "231,91,Hello World\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testAllReduceForCustomTypes() throws Exception {
+ /*
+ * All-reduce for custom types
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
+ DataSet<CustomType> reduceDs = ds.
+ reduce(new AllAddingCustomTypeReduce());
+
+ List<CustomType> result = reduceDs.collect();
+
+ String expected = "91,210,Hello!";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testReduceWithBroadcastSet() throws Exception {
+ /*
+ * Reduce with broadcast set
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Integer> intDs = CollectionDataSets.getIntegerDataSet(env);
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
+ groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints");
+
+ List<Tuple3<Integer, Long, String>> result = reduceDs.collect();
+
+ String expected = "1,1,Hi\n" +
+ "5,2,55\n" +
+ "15,3,55\n" +
+ "34,4,55\n" +
+ "65,5,55\n" +
+ "111,6,55\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testReduceATupleReturningKeySelector() throws Exception {
+ /*
+ * Reduce with a Tuple-returning KeySelector
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
+ .groupBy(new KeySelector3()).reduce(new Tuple5Reduce());
+
+ List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs
+ .collect();
+
+ String expected = "1,1,0,Hallo,1\n" +
+ "2,3,2,Hallo Welt wie,1\n" +
+ "2,2,1,Hallo Welt,2\n" +
+ "3,9,0,P-),2\n" +
+ "3,6,5,BCD,3\n" +
+ "4,17,0,P-),1\n" +
+ "4,17,0,P-),2\n" +
+ "5,11,10,GHI,1\n" +
+ "5,29,0,P-),2\n" +
+ "5,25,0,P-),3\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ private static class KeySelector3 implements KeySelector<Tuple5<Integer, Long, Integer, String, Long>, Tuple2<Integer, Long>> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public Tuple2<Integer, Long> getKey(Tuple5<Integer, Long, Integer, String, Long> t) {
+ return new Tuple2<Integer, Long>(t.f0, t.f4);
+ }
+ }
+
+ @Test
+ public void testReduceOnTupleWithMultipleKeyExpressions() throws Exception {
+ /*
+ * Case 2 with String-based field expression
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
+ .groupBy("f4", "f0").reduce(new Tuple5Reduce());
+
+ List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs
+ .collect();
+
+ String expected = "1,1,0,Hallo,1\n" +
+ "2,3,2,Hallo Welt wie,1\n" +
+ "2,2,1,Hallo Welt,2\n" +
+ "3,9,0,P-),2\n" +
+ "3,6,5,BCD,3\n" +
+ "4,17,0,P-),1\n" +
+ "4,17,0,P-),2\n" +
+ "5,11,10,GHI,1\n" +
+ "5,29,0,P-),2\n" +
+ "5,25,0,P-),3\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testReduceOnTupleWithMultipleKeyExpressionsWithHashHint() throws Exception {
+ /*
+ * Case 2 with String-based field expression
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> reduceDs = ds
+ .groupBy("f4", "f0").reduce(new Tuple5Reduce()).setCombineHint(CombineHint.HASH);
+
+ List<Tuple5<Integer, Long, Integer, String, Long>> result = reduceDs
+ .collect();
+
+ String expected = "1,1,0,Hallo,1\n" +
+ "2,3,2,Hallo Welt wie,1\n" +
+ "2,2,1,Hallo Welt,2\n" +
+ "3,9,0,P-),2\n" +
+ "3,6,5,BCD,3\n" +
+ "4,17,0,P-),1\n" +
+ "4,17,0,P-),2\n" +
+ "5,11,10,GHI,1\n" +
+ "5,29,0,P-),2\n" +
+ "5,25,0,P-),3\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testSupportForDataAndEnumSerialization() throws Exception {
+ /**
+ * Test support for Date and enum serialization
+ */
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ DataSet<PojoWithDateAndEnum> ds = env.generateSequence(0, 2).map(new Mapper1());
+ ds = ds.union(CollectionDataSets.getPojoWithDateAndEnum(env));
+
+ DataSet<String> res = ds.groupBy("group").reduceGroup(new GroupReducer1());
+
+ List<String> result = res.collect();
+
+ String expected = "ok\nok";
+
+ compareResultAsText(result, expected);
+ }
+
+ private static class Mapper1 implements MapFunction<Long, PojoWithDateAndEnum> {
+ @Override
+ public PojoWithDateAndEnum map(Long value) throws Exception {
+ int l = value.intValue();
+ switch (l) {
+ case 0:
+ PojoWithDateAndEnum one = new PojoWithDateAndEnum();
+ one.group = "a";
+ one.date = new Date(666);
+ one.cat = CollectionDataSets.Category.CAT_A;
+ return one;
+ case 1:
+ PojoWithDateAndEnum two = new PojoWithDateAndEnum();
+ two.group = "a";
+ two.date = new Date(666);
+ two.cat = CollectionDataSets.Category.CAT_A;
+ return two;
+ case 2:
+ PojoWithDateAndEnum three = new PojoWithDateAndEnum();
+ three.group = "b";
+ three.date = new Date(666);
+ three.cat = CollectionDataSets.Category.CAT_B;
+ return three;
+ }
+ throw new RuntimeException("Unexpected value for l=" + l);
+ }
+ }
+
+ private static class GroupReducer1 implements GroupReduceFunction<CollectionDataSets.PojoWithDateAndEnum, String> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public void reduce(Iterable<PojoWithDateAndEnum> values,
+ Collector<String> out) throws Exception {
+ for (PojoWithDateAndEnum val : values) {
+ if (val.cat == CollectionDataSets.Category.CAT_A) {
+ Assert.assertEquals("a", val.group);
+ } else if (val.cat == CollectionDataSets.Category.CAT_B) {
+ Assert.assertEquals("b", val.group);
+ } else {
+ Assert.fail("error. Cat = " + val.cat);
+ }
+ Assert.assertEquals(666, val.date.getTime());
+ }
+ out.collect("ok");
+ }
+ }
+
+ private static class Tuple3Reduce implements ReduceFunction<Tuple3<Integer, Long, String>> {
+ private static final long serialVersionUID = 1L;
+ private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>();
+ private final String f2Replace;
+
+ public Tuple3Reduce() {
+ this.f2Replace = null;
+ }
+
+ public Tuple3Reduce(String f2Replace) {
+ this.f2Replace = f2Replace;
+ }
+
+ @Override
+ public Tuple3<Integer, Long, String> reduce(
+ Tuple3<Integer, Long, String> in1,
+ Tuple3<Integer, Long, String> in2) throws Exception {
+
+ if (f2Replace == null) {
+ out.setFields(in1.f0 + in2.f0, in1.f1, in1.f2);
+ } else {
+ out.setFields(in1.f0 + in2.f0, in1.f1, this.f2Replace);
+ }
+ return out;
+ }
+ }
+
+ private static class Tuple5Reduce implements ReduceFunction<Tuple5<Integer, Long, Integer, String, Long>> {
+ private static final long serialVersionUID = 1L;
+ private final Tuple5<Integer, Long, Integer, String, Long> out = new Tuple5<Integer, Long, Integer, String, Long>();
+
+ @Override
+ public Tuple5<Integer, Long, Integer, String, Long> reduce(
+ Tuple5<Integer, Long, Integer, String, Long> in1,
+ Tuple5<Integer, Long, Integer, String, Long> in2)
+ throws Exception {
+
+ out.setFields(in1.f0, in1.f1 + in2.f1, 0, "P-)", in1.f4);
+ return out;
+ }
+ }
+
+ private static class CustomTypeReduce implements ReduceFunction<CustomType> {
+ private static final long serialVersionUID = 1L;
+ private final CustomType out = new CustomType();
+
+ @Override
+ public CustomType reduce(CustomType in1, CustomType in2)
+ throws Exception {
+
+ out.myInt = in1.myInt;
+ out.myLong = in1.myLong + in2.myLong;
+ out.myString = "Hello!";
+ return out;
+ }
+ }
+
+ private static class AllAddingTuple3Reduce implements ReduceFunction<Tuple3<Integer, Long, String>> {
+ private static final long serialVersionUID = 1L;
+ private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>();
+
+ @Override
+ public Tuple3<Integer, Long, String> reduce(
+ Tuple3<Integer, Long, String> in1,
+ Tuple3<Integer, Long, String> in2) throws Exception {
+
+ out.setFields(in1.f0 + in2.f0, in1.f1 + in2.f1, "Hello World");
+ return out;
+ }
+ }
+
+ private static class AllAddingCustomTypeReduce implements ReduceFunction<CustomType> {
+ private static final long serialVersionUID = 1L;
+ private final CustomType out = new CustomType();
+
+ @Override
+ public CustomType reduce(CustomType in1, CustomType in2)
+ throws Exception {
+
+ out.myInt = in1.myInt + in2.myInt;
+ out.myLong = in1.myLong + in2.myLong;
+ out.myString = "Hello!";
+ return out;
+ }
+ }
+
+ private static class BCTuple3Reduce extends RichReduceFunction<Tuple3<Integer, Long, String>> {
+ private static final long serialVersionUID = 1L;
+ private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>();
+ private String f2Replace = "";
+
+ @Override
+ public void open(Configuration config) {
+
+ Collection<Integer> ints = this.getRuntimeContext().getBroadcastVariable("ints");
+ int sum = 0;
+ for (Integer i : ints) {
+ sum += i;
+ }
+ f2Replace = sum + "";
+
+ }
+
+ @Override
+ public Tuple3<Integer, Long, String> reduce(
+ Tuple3<Integer, Long, String> in1,
+ Tuple3<Integer, Long, String> in2) throws Exception {
+
+ out.setFields(in1.f0 + in2.f0, in1.f1, this.f2Replace);
+ return out;
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceWithCombinerITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceWithCombinerITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceWithCombinerITCase.java
new file mode 100644
index 0000000..c6d340a
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/ReduceWithCombinerITCase.java
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.CombineFunction;
+import org.apache.flink.api.common.functions.GroupCombineFunction;
+import org.apache.flink.api.common.functions.GroupReduceFunction;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.functions.KeySelector;
+import org.apache.flink.api.java.operators.UnsortedGrouping;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.test.util.MultipleProgramsTestBase;
+import org.apache.flink.util.Collector;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Integration tests for {@link GroupCombineFunction}.
+ */
+@SuppressWarnings("serial")
+@RunWith(Parameterized.class)
+public class ReduceWithCombinerITCase extends MultipleProgramsTestBase {
+
+ public ReduceWithCombinerITCase(TestExecutionMode mode) {
+ super(TestExecutionMode.CLUSTER);
+ }
+
+ @Test
+ public void testReduceOnNonKeyedDataset() throws Exception {
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ // creates the input data and distributes them evenly among the available downstream tasks
+ DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);
+ List<Tuple2<Integer, Boolean>> actual = input.reduceGroup(new NonKeyedCombReducer()).collect();
+ String expected = "10,true\n";
+
+ compareResultAsTuples(actual, expected);
+ }
+
+ @Test
+ public void testForkingReduceOnNonKeyedDataset() throws Exception {
+
+ // set up the execution environment
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ // creates the input data and distributes them evenly among the available downstream tasks
+ DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);
+
+ DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
+ DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());
+
+ List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
+ String expected = "10,true\n10,true\n";
+ compareResultAsTuples(actual, expected);
+ }
+
+ @Test
+ public void testReduceOnKeyedDataset() throws Exception {
+
+ // set up the execution environment
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ // creates the input data and distributes them evenly among the available downstream tasks
+ DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
+ List<Tuple3<String, Integer, Boolean>> actual = input.groupBy(0).reduceGroup(new KeyedCombReducer()).collect();
+ String expected = "k1,6,true\nk2,4,true\n";
+
+ compareResultAsTuples(actual, expected);
+ }
+
+ @Test
+ public void testReduceOnKeyedDatasetWithSelector() throws Exception {
+
+ // set up the execution environment
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ // creates the input data and distributes them evenly among the available downstream tasks
+ DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
+
+ List<Tuple3<String, Integer, Boolean>> actual = input
+ .groupBy(new KeySelectorX())
+ .reduceGroup(new KeyedCombReducer())
+ .collect();
+ String expected = "k1,6,true\nk2,4,true\n";
+
+ compareResultAsTuples(actual, expected);
+ }
+
+ @Test
+ public void testForkingReduceOnKeyedDataset() throws Exception {
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ // creates the input data and distributes them evenly among the available downstream tasks
+ DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
+
+ UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(0);
+
+ DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
+ DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());
+
+ List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
+ String expected = "k1,6,true\n" +
+ "k2,4,true\n" +
+ "k1,6,true\n" +
+ "k2,4,true\n";
+ compareResultAsTuples(actual, expected);
+ }
+
+ @Test
+ public void testForkingReduceOnKeyedDatasetWithSelection() throws Exception {
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ // creates the input data and distributes them evenly among the available downstream tasks
+ DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
+
+ UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(new KeySelectorX());
+
+ DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
+ DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());
+
+ List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
+ String expected = "k1,6,true\n" +
+ "k2,4,true\n" +
+ "k1,6,true\n" +
+ "k2,4,true\n";
+
+ compareResultAsTuples(actual, expected);
+ }
+
+ private DataSet<Tuple2<Integer, Boolean>> createNonKeyedInput(ExecutionEnvironment env) {
+ return env.fromCollection(Arrays.asList(
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false),
+ new Tuple2<>(1, false))
+ ).rebalance();
+ }
+
+ private static class NonKeyedCombReducer implements CombineFunction<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>>,
+ GroupReduceFunction<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>> {
+
+ @Override
+ public Tuple2<Integer, Boolean> combine(Iterable<Tuple2<Integer, Boolean>> values) throws Exception {
+ int sum = 0;
+ boolean flag = true;
+
+ for (Tuple2<Integer, Boolean> tuple : values) {
+ sum += tuple.f0;
+ flag &= !tuple.f1;
+
+ }
+ return new Tuple2<>(sum, flag);
+ }
+
+ @Override
+ public void reduce(Iterable<Tuple2<Integer, Boolean>> values, Collector<Tuple2<Integer, Boolean>> out) throws Exception {
+ int sum = 0;
+ boolean flag = true;
+ for (Tuple2<Integer, Boolean> tuple : values) {
+ sum += tuple.f0;
+ flag &= tuple.f1;
+ }
+ out.collect(new Tuple2<>(sum, flag));
+ }
+ }
+
+ private static class NonKeyedGroupCombReducer implements GroupCombineFunction<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>>,
+ GroupReduceFunction<Tuple2<Integer, Boolean>, Tuple2<Integer, Boolean>> {
+
+ @Override
+ public void reduce(Iterable<Tuple2<Integer, Boolean>> values, Collector<Tuple2<Integer, Boolean>> out) throws Exception {
+ int sum = 0;
+ boolean flag = true;
+ for (Tuple2<Integer, Boolean> tuple : values) {
+ sum += tuple.f0;
+ flag &= tuple.f1;
+ }
+ out.collect(new Tuple2<>(sum, flag));
+ }
+
+ @Override
+ public void combine(Iterable<Tuple2<Integer, Boolean>> values, Collector<Tuple2<Integer, Boolean>> out) throws Exception {
+ int sum = 0;
+ boolean flag = true;
+ for (Tuple2<Integer, Boolean> tuple : values) {
+ sum += tuple.f0;
+ flag &= !tuple.f1;
+ }
+ out.collect(new Tuple2<>(sum, flag));
+ }
+ }
+
+ private DataSet<Tuple3<String, Integer, Boolean>> createKeyedInput(ExecutionEnvironment env) {
+ return env.fromCollection(Arrays.asList(
+ new Tuple3<>("k1", 1, false),
+ new Tuple3<>("k1", 1, false),
+ new Tuple3<>("k1", 1, false),
+ new Tuple3<>("k2", 1, false),
+ new Tuple3<>("k1", 1, false),
+ new Tuple3<>("k1", 1, false),
+ new Tuple3<>("k2", 1, false),
+ new Tuple3<>("k2", 1, false),
+ new Tuple3<>("k1", 1, false),
+ new Tuple3<>("k2", 1, false))
+ ).rebalance();
+ }
+
+ private static class KeySelectorX implements KeySelector<Tuple3<String, Integer, Boolean>, String> {
+ private static final long serialVersionUID = 1L;
+ @Override
+ public String getKey(Tuple3<String, Integer, Boolean> in) {
+ return in.f0;
+ }
+ }
+
+ private class KeyedCombReducer implements CombineFunction<Tuple3<String, Integer, Boolean>, Tuple3<String, Integer, Boolean>>,
+ GroupReduceFunction<Tuple3<String, Integer, Boolean>, Tuple3<String, Integer, Boolean>> {
+
+ @Override
+ public Tuple3<String, Integer, Boolean> combine(Iterable<Tuple3<String, Integer, Boolean>> values) throws Exception {
+ String key = null;
+ int sum = 0;
+ boolean flag = true;
+
+ for (Tuple3<String, Integer, Boolean> tuple : values) {
+ key = (key == null) ? tuple.f0 : key;
+ sum += tuple.f1;
+ flag &= !tuple.f2;
+ }
+ return new Tuple3<>(key, sum, flag);
+ }
+
+ @Override
+ public void reduce(Iterable<Tuple3<String, Integer, Boolean>> values, Collector<Tuple3<String, Integer, Boolean>> out) throws Exception {
+ String key = null;
+ int sum = 0;
+ boolean flag = true;
+
+ for (Tuple3<String, Integer, Boolean> tuple : values) {
+ key = (key == null) ? tuple.f0 : key;
+ sum += tuple.f1;
+ flag &= tuple.f2;
+ }
+ out.collect(new Tuple3<>(key, sum, flag));
+ }
+ }
+
+ private class KeyedGroupCombReducer implements GroupCombineFunction<Tuple3<String, Integer, Boolean>, Tuple3<String, Integer, Boolean>>,
+ GroupReduceFunction<Tuple3<String, Integer, Boolean>, Tuple3<String, Integer, Boolean>> {
+
+ @Override
+ public void combine(Iterable<Tuple3<String, Integer, Boolean>> values, Collector<Tuple3<String, Integer, Boolean>> out) throws Exception {
+ String key = null;
+ int sum = 0;
+ boolean flag = true;
+
+ for (Tuple3<String, Integer, Boolean> tuple : values) {
+ key = (key == null) ? tuple.f0 : key;
+ sum += tuple.f1;
+ flag &= !tuple.f2;
+ }
+ out.collect(new Tuple3<>(key, sum, flag));
+ }
+
+ @Override
+ public void reduce(Iterable<Tuple3<String, Integer, Boolean>> values, Collector<Tuple3<String, Integer, Boolean>> out) throws Exception {
+ String key = null;
+ int sum = 0;
+ boolean flag = true;
+
+ for (Tuple3<String, Integer, Boolean> tuple : values) {
+ key = (key == null) ? tuple.f0 : key;
+ sum += tuple.f1;
+ flag &= tuple.f2;
+ }
+ out.collect(new Tuple3<>(key, sum, flag));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/RemoteEnvironmentITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/RemoteEnvironmentITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/RemoteEnvironmentITCase.java
new file mode 100644
index 0000000..36eded6
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/RemoteEnvironmentITCase.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.RichMapPartitionFunction;
+import org.apache.flink.api.common.io.GenericInputFormat;
+import org.apache.flink.api.common.operators.util.TestNonRichInputFormat;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.io.LocalCollectionOutputFormat;
+import org.apache.flink.client.program.ProgramInvocationException;
+import org.apache.flink.configuration.AkkaOptions;
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.io.GenericInputSplit;
+import org.apache.flink.runtime.minicluster.StandaloneMiniCluster;
+import org.apache.flink.util.Collector;
+import org.apache.flink.util.FlinkException;
+import org.apache.flink.util.TestLogger;
+
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Integration tests for {@link org.apache.flink.api.java.RemoteEnvironment}.
+ */
+@SuppressWarnings("serial")
+public class RemoteEnvironmentITCase extends TestLogger {
+
+ private static final int TM_SLOTS = 4;
+
+ private static final int USER_DOP = 2;
+
+ private static final String INVALID_STARTUP_TIMEOUT = "0.001 ms";
+
+ private static final String VALID_STARTUP_TIMEOUT = "100 s";
+
+ private static Configuration configuration;
+
+ private static StandaloneMiniCluster cluster;
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ configuration = new Configuration();
+
+ configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, TM_SLOTS);
+
+ cluster = new StandaloneMiniCluster(configuration);
+ }
+
+ @AfterClass
+ public static void tearDownCluster() throws Exception {
+ cluster.close();
+ }
+
+ /**
+ * Ensure that that Akka configuration parameters can be set.
+ */
+ @Test(expected = FlinkException.class)
+ public void testInvalidAkkaConfiguration() throws Throwable {
+ Configuration config = new Configuration();
+ config.setString(AkkaOptions.STARTUP_TIMEOUT, INVALID_STARTUP_TIMEOUT);
+
+ final ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment(
+ cluster.getHostname(),
+ cluster.getPort(),
+ config
+ );
+ env.getConfig().disableSysoutLogging();
+
+ DataSet<String> result = env.createInput(new TestNonRichInputFormat());
+ result.output(new LocalCollectionOutputFormat<>(new ArrayList<String>()));
+ try {
+ env.execute();
+ Assert.fail("Program should not run successfully, cause of invalid akka settings.");
+ } catch (ProgramInvocationException ex) {
+ throw ex.getCause();
+ }
+ }
+
+ /**
+ * Ensure that the program parallelism can be set even if the configuration is supplied.
+ */
+ @Test
+ public void testUserSpecificParallelism() throws Exception {
+ Configuration config = new Configuration();
+ config.setString(AkkaOptions.STARTUP_TIMEOUT, VALID_STARTUP_TIMEOUT);
+
+ final ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment(
+ cluster.getHostname(),
+ cluster.getPort(),
+ config
+ );
+ env.setParallelism(USER_DOP);
+ env.getConfig().disableSysoutLogging();
+
+ DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat())
+ .rebalance()
+ .mapPartition(new RichMapPartitionFunction<Integer, Integer>() {
+ @Override
+ public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception {
+ out.collect(getRuntimeContext().getIndexOfThisSubtask());
+ }
+ });
+ List<Integer> resultCollection = result.collect();
+ assertEquals(USER_DOP, resultCollection.size());
+ }
+
+ private static class ParallelismDependentInputFormat extends GenericInputFormat<Integer> {
+
+ private transient boolean emitted;
+
+ @Override
+ public GenericInputSplit[] createInputSplits(int numSplits) throws IOException {
+ assertEquals(USER_DOP, numSplits);
+ return super.createInputSplits(numSplits);
+ }
+
+ @Override
+ public boolean reachedEnd() {
+ return emitted;
+ }
+
+ @Override
+ public Integer nextRecord(Integer reuse) {
+ if (emitted) {
+ return null;
+ }
+ emitted = true;
+ return 1;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/ReplicatingDataSourceITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/ReplicatingDataSourceITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/ReplicatingDataSourceITCase.java
new file mode 100644
index 0000000..c023cf4
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/ReplicatingDataSourceITCase.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.FilterFunction;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.io.ReplicatingInputFormat;
+import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.io.ParallelIteratorInputFormat;
+import org.apache.flink.api.java.tuple.Tuple;
+import org.apache.flink.api.java.tuple.Tuple1;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.core.io.GenericInputSplit;
+import org.apache.flink.test.util.MultipleProgramsTestBase;
+import org.apache.flink.util.NumberSequenceIterator;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.List;
+
+/**
+ * Tests for replicating DataSources.
+ */
+@RunWith(Parameterized.class)
+public class ReplicatingDataSourceITCase extends MultipleProgramsTestBase {
+
+ public ReplicatingDataSourceITCase(TestExecutionMode mode){
+ super(mode);
+ }
+
+ @Test
+ public void testReplicatedSourceToJoin() throws Exception {
+ /*
+ * Test replicated source going into join
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit>
+ (new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO)
+ .map(new ToTuple());
+ DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple());
+
+ DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0)
+ .projectFirst(0)
+ .sum(0);
+
+ List<Tuple> result = pairs.collect();
+
+ String expectedResult = "(500500)";
+
+ compareResultAsText(result, expectedResult);
+ }
+
+ @Test
+ public void testReplicatedSourceToCross() throws Exception {
+ /*
+ * Test replicated source going into cross
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit>
+ (new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO)
+ .map(new ToTuple());
+ DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple());
+
+ DataSet<Tuple1<Long>> pairs = source1.cross(source2)
+ .filter(new FilterFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>>() {
+ @Override
+ public boolean filter(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception {
+ return value.f0.f0.equals(value.f1.f0);
+ }
+ })
+ .map(new MapFunction<Tuple2<Tuple1<Long>, Tuple1<Long>>, Tuple1<Long>>() {
+ @Override
+ public Tuple1<Long> map(Tuple2<Tuple1<Long>, Tuple1<Long>> value) throws Exception {
+ return value.f0;
+ }
+ })
+ .sum(0);
+
+ List<Tuple1<Long>> result = pairs.collect();
+
+ String expectedResult = "(500500)";
+
+ compareResultAsText(result, expectedResult);
+ }
+
+ private static class ToTuple implements MapFunction<Long, Tuple1<Long>> {
+
+ @Override
+ public Tuple1<Long> map(Long value) throws Exception {
+ return new Tuple1<Long>(value);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/SampleITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/SampleITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/SampleITCase.java
new file mode 100644
index 0000000..c0cc62a
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/SampleITCase.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.operators.FlatMapOperator;
+import org.apache.flink.api.java.operators.MapPartitionOperator;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.api.java.utils.DataSetUtils;
+import org.apache.flink.test.operators.util.CollectionDataSets;
+import org.apache.flink.test.util.MultipleProgramsTestBase;
+import org.apache.flink.util.Collector;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.List;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Integration tests for {@link DataSetUtils#sample}.
+ */
+@SuppressWarnings("serial")
+@RunWith(Parameterized.class)
+public class SampleITCase extends MultipleProgramsTestBase {
+
+ private static final Random RNG = new Random();
+
+ public SampleITCase(TestExecutionMode mode) {
+ super(mode);
+ }
+
+ @Before
+ public void initiate() {
+ ExecutionEnvironment.getExecutionEnvironment().setParallelism(5);
+ }
+
+ @Test
+ public void testSamplerWithFractionWithoutReplacement() throws Exception {
+ verifySamplerWithFractionWithoutReplacement(0d);
+ verifySamplerWithFractionWithoutReplacement(0.2d);
+ verifySamplerWithFractionWithoutReplacement(1.0d);
+ }
+
+ @Test
+ public void testSamplerWithFractionWithReplacement() throws Exception {
+ verifySamplerWithFractionWithReplacement(0d);
+ verifySamplerWithFractionWithReplacement(0.2d);
+ verifySamplerWithFractionWithReplacement(1.0d);
+ verifySamplerWithFractionWithReplacement(2.0d);
+ }
+
+ @Test
+ public void testSamplerWithSizeWithoutReplacement() throws Exception {
+ verifySamplerWithFixedSizeWithoutReplacement(0);
+ verifySamplerWithFixedSizeWithoutReplacement(2);
+ verifySamplerWithFixedSizeWithoutReplacement(21);
+ }
+
+ @Test
+ public void testSamplerWithSizeWithReplacement() throws Exception {
+ verifySamplerWithFixedSizeWithReplacement(0);
+ verifySamplerWithFixedSizeWithReplacement(2);
+ verifySamplerWithFixedSizeWithReplacement(21);
+ }
+
+ private void verifySamplerWithFractionWithoutReplacement(double fraction) throws Exception {
+ verifySamplerWithFractionWithoutReplacement(fraction, RNG.nextLong());
+ }
+
+ private void verifySamplerWithFractionWithoutReplacement(double fraction, long seed) throws Exception {
+ verifySamplerWithFraction(false, fraction, seed);
+ }
+
+ private void verifySamplerWithFractionWithReplacement(double fraction) throws Exception {
+ verifySamplerWithFractionWithReplacement(fraction, RNG.nextLong());
+ }
+
+ private void verifySamplerWithFractionWithReplacement(double fraction, long seed) throws Exception {
+ verifySamplerWithFraction(true, fraction, seed);
+ }
+
+ private void verifySamplerWithFraction(boolean withReplacement, double fraction, long seed) throws Exception {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
+ MapPartitionOperator<String, String> sampled = DataSetUtils.sample(ds, withReplacement, fraction, seed);
+ List<String> result = sampled.collect();
+ containsResultAsText(result, getSourceStrings());
+ }
+
+ private void verifySamplerWithFixedSizeWithoutReplacement(int numSamples) throws Exception {
+ verifySamplerWithFixedSizeWithoutReplacement(numSamples, RNG.nextLong());
+ }
+
+ private void verifySamplerWithFixedSizeWithoutReplacement(int numSamples, long seed) throws Exception {
+ verifySamplerWithFixedSize(false, numSamples, seed);
+ }
+
+ private void verifySamplerWithFixedSizeWithReplacement(int numSamples) throws Exception {
+ verifySamplerWithFixedSizeWithReplacement(numSamples, RNG.nextLong());
+ }
+
+ private void verifySamplerWithFixedSizeWithReplacement(int numSamples, long seed) throws Exception {
+ verifySamplerWithFixedSize(true, numSamples, seed);
+ }
+
+ private void verifySamplerWithFixedSize(boolean withReplacement, int numSamples, long seed) throws Exception {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ FlatMapOperator<Tuple3<Integer, Long, String>, String> ds = getSourceDataSet(env);
+ DataSet<String> sampled = DataSetUtils.sampleWithSize(ds, withReplacement, numSamples, seed);
+ List<String> result = sampled.collect();
+ assertEquals(numSamples, result.size());
+ containsResultAsText(result, getSourceStrings());
+ }
+
+ private FlatMapOperator<Tuple3<Integer, Long, String>, String> getSourceDataSet(ExecutionEnvironment env) {
+ return CollectionDataSets.get3TupleDataSet(env).flatMap(
+ new FlatMapFunction<Tuple3<Integer, Long, String>, String>() {
+ @Override
+ public void flatMap(Tuple3<Integer, Long, String> value, Collector<String> out) throws Exception {
+ out.collect(value.f2);
+ }
+ });
+ }
+
+ private String getSourceStrings() {
+ return "Hi\n" +
+ "Hello\n" +
+ "Hello world\n" +
+ "Hello world, how are you?\n" +
+ "I am fine.\n" +
+ "Luke Skywalker\n" +
+ "Comment#1\n" +
+ "Comment#2\n" +
+ "Comment#3\n" +
+ "Comment#4\n" +
+ "Comment#5\n" +
+ "Comment#6\n" +
+ "Comment#7\n" +
+ "Comment#8\n" +
+ "Comment#9\n" +
+ "Comment#10\n" +
+ "Comment#11\n" +
+ "Comment#12\n" +
+ "Comment#13\n" +
+ "Comment#14\n" +
+ "Comment#15\n";
+ }
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/SortPartitionITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/SortPartitionITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/SortPartitionITCase.java
new file mode 100644
index 0000000..a44f28c
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/SortPartitionITCase.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.functions.MapPartitionFunction;
+import org.apache.flink.api.common.operators.Order;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.functions.KeySelector;
+import org.apache.flink.api.java.tuple.Tuple1;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.api.java.tuple.Tuple5;
+import org.apache.flink.test.operators.util.CollectionDataSets;
+import org.apache.flink.test.operators.util.CollectionDataSets.POJO;
+import org.apache.flink.test.util.MultipleProgramsTestBase;
+import org.apache.flink.util.Collector;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.io.Serializable;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Tests for {@link DataSet#sortPartition}.
+ */
+@RunWith(Parameterized.class)
+public class SortPartitionITCase extends MultipleProgramsTestBase {
+
+ public SortPartitionITCase(TestExecutionMode mode){
+ super(mode);
+ }
+
+ @Test
+ public void testSortPartitionByKeyField() throws Exception {
+ /*
+ * Test sort partition on key field
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
+ .sortPartition(1, Order.DESCENDING)
+ .mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testSortPartitionByTwoKeyFields() throws Exception {
+ /*
+ * Test sort partition on two key fields
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(2);
+
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
+ .sortPartition(4, Order.ASCENDING)
+ .sortPartition(2, Order.DESCENDING)
+ .mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ @Test
+ public void testSortPartitionByFieldExpression() throws Exception {
+ /*
+ * Test sort partition on field expression
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper()).setParallelism(4) // parallelize input
+ .sortPartition("f1", Order.DESCENDING)
+ .mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testSortPartitionByTwoFieldExpressions() throws Exception {
+ /*
+ * Test sort partition on two field expressions
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(2);
+
+ DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
+ .sortPartition("f4", Order.ASCENDING)
+ .sortPartition("f2", Order.DESCENDING)
+ .mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testSortPartitionByNestedFieldExpression() throws Exception {
+ /*
+ * Test sort partition on nested field expressions
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(3);
+
+ DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper<Tuple2<Tuple2<Integer, Integer>, String>>()).setParallelism(3) // parallelize input
+ .sortPartition("f0.f1", Order.ASCENDING)
+ .sortPartition("f1", Order.DESCENDING)
+ .mapPartition(new OrderCheckMapper<>(new NestedTupleChecker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testSortPartitionPojoByNestedFieldExpression() throws Exception {
+ /*
+ * Test sort partition on field expression
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(3);
+
+ DataSet<POJO> ds = CollectionDataSets.getMixedPojoDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper<POJO>()).setParallelism(1) // parallelize input
+ .sortPartition("nestedTupleWithCustom.f1.myString", Order.ASCENDING)
+ .sortPartition("number", Order.DESCENDING)
+ .mapPartition(new OrderCheckMapper<>(new PojoChecker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testSortPartitionParallelismChange() throws Exception {
+ /*
+ * Test sort partition with parallelism change
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(3);
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .sortPartition(1, Order.DESCENDING).setParallelism(3) // change parallelism
+ .mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testSortPartitionWithKeySelector1() throws Exception {
+ /*
+ * Test sort partition on an extracted key
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
+ .sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Long>() {
+ @Override
+ public Long getKey(Tuple3<Integer, Long, String> value) throws Exception {
+ return value.f1;
+ }
+ }, Order.ASCENDING)
+ .mapPartition(new OrderCheckMapper<>(new Tuple3AscendingChecker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ @Test
+ public void testSortPartitionWithKeySelector2() throws Exception {
+ /*
+ * Test sort partition on an extracted key
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+ env.setParallelism(4);
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ List<Tuple1<Boolean>> result = ds
+ .map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
+ .sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
+ @Override
+ public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> value) throws Exception {
+ return new Tuple2<>(value.f0, value.f1);
+ }
+ }, Order.DESCENDING)
+ .mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
+ .distinct().collect();
+
+ String expected = "(true)\n";
+
+ compareResultAsText(result, expected);
+ }
+
+ private interface OrderChecker<T> extends Serializable {
+ boolean inOrder(T t1, T t2);
+ }
+
+ @SuppressWarnings("serial")
+ private static class Tuple3Checker implements OrderChecker<Tuple3<Integer, Long, String>> {
+ @Override
+ public boolean inOrder(Tuple3<Integer, Long, String> t1, Tuple3<Integer, Long, String> t2) {
+ return t1.f1 >= t2.f1;
+ }
+ }
+
+ @SuppressWarnings("serial")
+ private static class Tuple3AscendingChecker implements OrderChecker<Tuple3<Integer, Long, String>> {
+ @Override
+ public boolean inOrder(Tuple3<Integer, Long, String> t1, Tuple3<Integer, Long, String> t2) {
+ return t1.f1 <= t2.f1;
+ }
+ }
+
+ @SuppressWarnings("serial")
+ private static class Tuple5Checker implements OrderChecker<Tuple5<Integer, Long, Integer, String, Long>> {
+ @Override
+ public boolean inOrder(Tuple5<Integer, Long, Integer, String, Long> t1,
+ Tuple5<Integer, Long, Integer, String, Long> t2) {
+ return t1.f4 < t2.f4 || t1.f4.equals(t2.f4) && t1.f2 >= t2.f2;
+ }
+ }
+
+ @SuppressWarnings("serial")
+ private static class NestedTupleChecker implements OrderChecker<Tuple2<Tuple2<Integer, Integer>, String>> {
+ @Override
+ public boolean inOrder(Tuple2<Tuple2<Integer, Integer>, String> t1,
+ Tuple2<Tuple2<Integer, Integer>, String> t2) {
+ return t1.f0.f1 < t2.f0.f1 ||
+ t1.f0.f1.equals(t2.f0.f1) && t1.f1.compareTo(t2.f1) >= 0;
+ }
+ }
+
+ @SuppressWarnings("serial")
+ private static class PojoChecker implements OrderChecker<POJO> {
+ @Override
+ public boolean inOrder(POJO t1, POJO t2) {
+ return t1.nestedTupleWithCustom.f1.myString.compareTo(t2.nestedTupleWithCustom.f1.myString) < 0 ||
+ t1.nestedTupleWithCustom.f1.myString.compareTo(t2.nestedTupleWithCustom.f1.myString) == 0 &&
+ t1.number >= t2.number;
+ }
+ }
+
+ @SuppressWarnings("unused, serial")
+ private static class OrderCheckMapper<T> implements MapPartitionFunction<T, Tuple1<Boolean>> {
+
+ OrderChecker<T> checker;
+
+ public OrderCheckMapper() {}
+
+ public OrderCheckMapper(OrderChecker<T> checker) {
+ this.checker = checker;
+ }
+
+ @Override
+ public void mapPartition(Iterable<T> values, Collector<Tuple1<Boolean>> out) throws Exception {
+
+ Iterator<T> it = values.iterator();
+ if (!it.hasNext()) {
+ out.collect(new Tuple1<>(true));
+ } else {
+ T last = it.next();
+
+ while (it.hasNext()) {
+ T next = it.next();
+ if (!checker.inOrder(last, next)) {
+ out.collect(new Tuple1<>(false));
+ return;
+ }
+ last = next;
+ }
+ out.collect(new Tuple1<>(true));
+ }
+ }
+ }
+
+ @SuppressWarnings("serial")
+ private static class IdMapper<T> implements MapFunction<T, T> {
+
+ @Override
+ public T map(T value) throws Exception {
+ return value;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/SumMinMaxITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/SumMinMaxITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/SumMinMaxITCase.java
new file mode 100644
index 0000000..ebec17b
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/SumMinMaxITCase.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.tuple.Tuple1;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.test.operators.util.CollectionDataSets;
+import org.apache.flink.test.util.MultipleProgramsTestBase;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.List;
+
+/**
+ * Integration tests for {@link org.apache.flink.api.scala.GroupedDataSet#min} and
+ * {@link org.apache.flink.api.scala.GroupedDataSet#max}.
+ */
+@RunWith(Parameterized.class)
+public class SumMinMaxITCase extends MultipleProgramsTestBase {
+
+ public SumMinMaxITCase(TestExecutionMode mode){
+ super(mode);
+ }
+
+ @Test
+ public void testSumMaxAndProject() throws Exception {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple2<Integer, Long>> sumDs = ds
+ .sum(0)
+ .andMax(1)
+ .project(0, 1);
+
+ List<Tuple2<Integer, Long>> result = sumDs.collect();
+
+ String expected = "231,6\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testGroupedAggregate() throws Exception {
+ /*
+ * Grouped Aggregate
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
+ .sum(0)
+ .project(1, 0);
+
+ List<Tuple2<Long, Integer>> result = aggregateDs.collect();
+
+ String expected = "1,1\n" +
+ "2,5\n" +
+ "3,15\n" +
+ "4,34\n" +
+ "5,65\n" +
+ "6,111\n";
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testNestedAggregate() throws Exception {
+ /*
+ * Nested Aggregate
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
+ .min(0)
+ .min(0)
+ .project(0);
+
+ List<Tuple1<Integer>> result = aggregateDs.collect();
+
+ String expected = "1\n";
+
+ compareResultAsTuples(result, expected);
+ }
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/TypeHintITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/TypeHintITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/TypeHintITCase.java
new file mode 100644
index 0000000..75bf8f0
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/TypeHintITCase.java
@@ -0,0 +1,330 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.CoGroupFunction;
+import org.apache.flink.api.common.functions.FlatJoinFunction;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.api.common.functions.GroupCombineFunction;
+import org.apache.flink.api.common.functions.GroupReduceFunction;
+import org.apache.flink.api.common.functions.JoinFunction;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.operators.Order;
+import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.api.java.typeutils.TupleTypeInfo;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.test.operators.util.CollectionDataSets;
+import org.apache.flink.test.util.JavaProgramTestBase;
+import org.apache.flink.util.Collector;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Integration tests for {@link org.apache.flink.api.common.typeinfo.TypeHint}.
+ */
+@RunWith(Parameterized.class)
+public class TypeHintITCase extends JavaProgramTestBase {
+
+ private static final int NUM_PROGRAMS = 9;
+
+ private int curProgId = config.getInteger("ProgramId", -1);
+
+ public TypeHintITCase(Configuration config) {
+ super(config);
+ }
+
+ @Override
+ protected void testProgram() throws Exception {
+ TypeHintProgs.runProgram(curProgId);
+ }
+
+ @Parameters
+ public static Collection<Object[]> getConfigurations() throws FileNotFoundException, IOException {
+
+ LinkedList<Configuration> tConfigs = new LinkedList<Configuration>();
+
+ for (int i = 1; i <= NUM_PROGRAMS; i++) {
+ Configuration config = new Configuration();
+ config.setInteger("ProgramId", i);
+ tConfigs.add(config);
+ }
+
+ return toParameterList(tConfigs);
+ }
+
+ private static class TypeHintProgs {
+
+ public static void runProgram(int progId) throws Exception {
+ switch(progId) {
+ // Test identity map with missing types and string type hint
+ case 1: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds
+ .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>())
+ .returns("Tuple3<Integer, Long, String>");
+ List<Tuple3<Integer, Long, String>> result = identityMapDs.collect();
+
+ String expectedResult = "(2,2,Hello)\n" +
+ "(3,2,Hello world)\n" +
+ "(1,1,Hi)\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test identity map with missing types and type information type hint
+ case 2: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds
+ // all following generics get erased during compilation
+ .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>())
+ .returns(new TupleTypeInfo<Tuple3<Integer, Long, String>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO));
+ List<Tuple3<Integer, Long, String>> result = identityMapDs
+ .collect();
+
+ String expectedResult = "(2,2,Hello)\n" +
+ "(3,2,Hello world)\n" +
+ "(1,1,Hi)\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test flat map with class type hint
+ case 3: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Integer> identityMapDs = ds
+ .flatMap(new FlatMapper<Tuple3<Integer, Long, String>, Integer>())
+ .returns(Integer.class);
+ List<Integer> result = identityMapDs.collect();
+
+ String expectedResult = "2\n" +
+ "3\n" +
+ "1\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test join with type information type hint
+ case 4: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Integer> resultDs = ds1
+ .join(ds2)
+ .where(0)
+ .equalTo(0)
+ .with(new Joiner<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>, Integer>())
+ .returns(BasicTypeInfo.INT_TYPE_INFO);
+ List<Integer> result = resultDs.collect();
+
+ String expectedResult = "2\n" +
+ "3\n" +
+ "1\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test flat join with type information type hint
+ case 5: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Integer> resultDs = ds1
+ .join(ds2)
+ .where(0)
+ .equalTo(0)
+ .with(new FlatJoiner<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>, Integer>())
+ .returns(BasicTypeInfo.INT_TYPE_INFO);
+ List<Integer> result = resultDs.collect();
+
+ String expectedResult = "2\n" +
+ "3\n" +
+ "1\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test unsorted group reduce with type information type hint
+ case 6: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Integer> resultDs = ds
+ .groupBy(0)
+ .reduceGroup(new GroupReducer<Tuple3<Integer, Long, String>, Integer>())
+ .returns(BasicTypeInfo.INT_TYPE_INFO);
+ List<Integer> result = resultDs.collect();
+
+ String expectedResult = "2\n" +
+ "3\n" +
+ "1\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test sorted group reduce with type information type hint
+ case 7: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Integer> resultDs = ds
+ .groupBy(0)
+ .sortGroup(0, Order.ASCENDING)
+ .reduceGroup(new GroupReducer<Tuple3<Integer, Long, String>, Integer>())
+ .returns(BasicTypeInfo.INT_TYPE_INFO);
+ List<Integer> result = resultDs.collect();
+
+ String expectedResult = "2\n" +
+ "3\n" +
+ "1\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test combine group with type information type hint
+ case 8: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Integer> resultDs = ds
+ .groupBy(0)
+ .combineGroup(new GroupCombiner<Tuple3<Integer, Long, String>, Integer>())
+ .returns(BasicTypeInfo.INT_TYPE_INFO);
+ List<Integer> result = resultDs.collect();
+
+ String expectedResult = "2\n" +
+ "3\n" +
+ "1\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ // Test cogroup with type information type hint
+ case 9: {
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.getSmall3TupleDataSet(env);
+ DataSet<Integer> resultDs = ds1
+ .coGroup(ds2)
+ .where(0)
+ .equalTo(0)
+ .with(new CoGrouper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>, Integer>())
+ .returns(BasicTypeInfo.INT_TYPE_INFO);
+ List<Integer> result = resultDs.collect();
+
+ String expectedResult = "2\n" +
+ "3\n" +
+ "1\n";
+
+ compareResultAsText(result, expectedResult);
+ break;
+ }
+ default:
+ throw new IllegalArgumentException("Invalid program id");
+ }
+ }
+ }
+
+ // --------------------------------------------------------------------------------------------
+
+ private static class Mapper<T, V> implements MapFunction<T, V> {
+ private static final long serialVersionUID = 1L;
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public V map(T value) throws Exception {
+ return (V) value;
+ }
+ }
+
+ private static class FlatMapper<T, V> implements FlatMapFunction<T, V> {
+ private static final long serialVersionUID = 1L;
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ @Override
+ public void flatMap(T value, Collector<V> out) throws Exception {
+ out.collect((V) ((Tuple3) value).f0);
+ }
+ }
+
+ private static class Joiner<IN1, IN2, OUT> implements JoinFunction<IN1, IN2, OUT> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public OUT join(IN1 first, IN2 second) throws Exception {
+ return (OUT) ((Tuple3) first).f0;
+ }
+ }
+
+ private static class FlatJoiner<IN1, IN2, OUT> implements FlatJoinFunction<IN1, IN2, OUT> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public void join(IN1 first, IN2 second, Collector<OUT> out) throws Exception {
+ out.collect((OUT) ((Tuple3) first).f0);
+ }
+ }
+
+ private static class GroupReducer<IN, OUT> implements GroupReduceFunction<IN, OUT> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public void reduce(Iterable<IN> values, Collector<OUT> out) throws Exception {
+ out.collect((OUT) ((Tuple3) values.iterator().next()).f0);
+ }
+ }
+
+ private static class GroupCombiner<IN, OUT> implements GroupCombineFunction<IN, OUT> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public void combine(Iterable<IN> values, Collector<OUT> out) throws Exception {
+ out.collect((OUT) ((Tuple3) values.iterator().next()).f0);
+ }
+ }
+
+ private static class CoGrouper<IN1, IN2, OUT> implements CoGroupFunction<IN1, IN2, OUT> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public void coGroup(Iterable<IN1> first, Iterable<IN2> second, Collector<OUT> out) throws Exception {
+ out.collect((OUT) ((Tuple3) first.iterator().next()).f0);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/flink/blob/9bd491e0/flink-tests/src/test/java/org/apache/flink/test/operators/UnionITCase.java
----------------------------------------------------------------------
diff --git a/flink-tests/src/test/java/org/apache/flink/test/operators/UnionITCase.java b/flink-tests/src/test/java/org/apache/flink/test/operators/UnionITCase.java
new file mode 100644
index 0000000..daa9cb1
--- /dev/null
+++ b/flink-tests/src/test/java/org/apache/flink/test/operators/UnionITCase.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.test.operators;
+
+import org.apache.flink.api.common.functions.RichFilterFunction;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.tuple.Tuple3;
+import org.apache.flink.test.operators.util.CollectionDataSets;
+import org.apache.flink.test.util.MultipleProgramsTestBase;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.List;
+
+/**
+ * Integration tests for {@link DataSet#union}.
+ */
+@RunWith(Parameterized.class)
+public class UnionITCase extends MultipleProgramsTestBase {
+
+ private static final String FULL_TUPLE_3_STRING = "1,1,Hi\n" +
+ "2,2,Hello\n" +
+ "3,2,Hello world\n" +
+ "4,3,Hello world, how are you?\n" +
+ "5,3,I am fine.\n" +
+ "6,3,Luke Skywalker\n" +
+ "7,4,Comment#1\n" +
+ "8,4,Comment#2\n" +
+ "9,4,Comment#3\n" +
+ "10,4,Comment#4\n" +
+ "11,5,Comment#5\n" +
+ "12,5,Comment#6\n" +
+ "13,5,Comment#7\n" +
+ "14,5,Comment#8\n" +
+ "15,5,Comment#9\n" +
+ "16,6,Comment#10\n" +
+ "17,6,Comment#11\n" +
+ "18,6,Comment#12\n" +
+ "19,6,Comment#13\n" +
+ "20,6,Comment#14\n" +
+ "21,6,Comment#15\n";
+
+ public UnionITCase(TestExecutionMode mode){
+ super(mode);
+ }
+
+ @Test
+ public void testUnion2IdenticalDataSets() throws Exception {
+ /*
+ * Union of 2 Same Data Sets
+ */
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env));
+
+ List<Tuple3<Integer, Long, String>> result = unionDs.collect();
+
+ String expected = FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING;
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testUnion5IdenticalDataSets() throws Exception {
+ /*
+ * Union of 5 same Data Sets, with multiple unions
+ */
+
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
+ DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env))
+ .union(CollectionDataSets.get3TupleDataSet(env))
+ .union(CollectionDataSets.get3TupleDataSet(env))
+ .union(CollectionDataSets.get3TupleDataSet(env));
+
+ List<Tuple3<Integer, Long, String>> result = unionDs.collect();
+
+ String expected = FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING
+ + FULL_TUPLE_3_STRING +
+ FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING;
+
+ compareResultAsTuples(result, expected);
+ }
+
+ @Test
+ public void testUnionWithEmptyDataSet() throws Exception {
+ /*
+ * Test on union with empty dataset
+ */
+ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+
+ // Don't know how to make an empty result in an other way than filtering it
+ DataSet<Tuple3<Integer, Long, String>> empty = CollectionDataSets.get3TupleDataSet(env).
+ filter(new RichFilter1());
+
+ DataSet<Tuple3<Integer, Long, String>> unionDs = CollectionDataSets.get3TupleDataSet(env)
+ .union(empty);
+
+ List<Tuple3<Integer, Long, String>> result = unionDs.collect();
+
+ String expected = FULL_TUPLE_3_STRING;
+
+ compareResultAsTuples(result, expected);
+ }
+
+ private static class RichFilter1 extends RichFilterFunction<Tuple3<Integer, Long, String>> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean filter(Tuple3<Integer, Long, String> value) throws Exception {
+ return false;
+ }
+ }
+
+}