You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2017/01/26 17:40:36 UTC
svn commit: r1780431 [2/2] - in /pig/trunk: ./
src/docs/src/documentation/content/xdocs/ src/org/apache/pig/
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/
src/o...
Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java Thu Jan 26 17:40:35 2017
@@ -1414,7 +1414,7 @@ public class LogToPhyTranslationVisitor
return;
}
- else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){
+ else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH || loj.getJoinType() == LOJoin.JOINTYPE.BLOOM){
POPackage poPackage = compileToLR_GR_PackTrio(loj, loj.getCustomPartitioner(), innerFlags, loj.getExpressionPlans());
POForEach fe = compileFE4Flattening(innerFlags, scope, parallel, alias, location, inputs);
currentPlan.add(fe);
@@ -1425,7 +1425,20 @@ public class LogToPhyTranslationVisitor
e.getErrorCode(),e.getErrorSource(),e);
}
logToPhyMap.put(loj, fe);
- poPackage.getPkgr().setPackageType(PackageType.JOIN);
+ if (loj.getJoinType() == LOJoin.JOINTYPE.BLOOM) {
+ if (innerFlags.length == 2) {
+ if (innerFlags[0] == false && innerFlags[1] == false) {
+ throw new LogicalToPhysicalTranslatorException(
+ "Error at " + loj.getLocation() + " with alias "+ loj.getAlias() +
+ ". Bloom join cannot be used with a FULL OUTER join.",
+ 1109,
+ PigException.INPUT);
+ }
+ }
+ poPackage.getPkgr().setPackageType(PackageType.BLOOMJOIN);
+ } else {
+ poPackage.getPkgr().setPackageType(PackageType.JOIN);
+ }
}
translateSoftLinks(loj);
}
Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java Thu Jan 26 17:40:35 2017
@@ -1788,6 +1788,8 @@ public class LogicalPlanBuilder {
return JOINTYPE.REPLICATED;
} else if( modifier.equalsIgnoreCase( "hash" ) || modifier.equalsIgnoreCase( "default" ) ) {
return LOJoin.JOINTYPE.HASH;
+ } else if( modifier.equalsIgnoreCase( "bloom" ) ) {
+ return LOJoin.JOINTYPE.BLOOM;
} else if( modifier.equalsIgnoreCase( "skewed" ) ) {
return JOINTYPE.SKEWED;
} else if (modifier.equalsIgnoreCase("merge")) {
@@ -1796,7 +1798,7 @@ public class LogicalPlanBuilder {
return JOINTYPE.MERGESPARSE;
} else {
throw new ParserValidationException( intStream, loc,
- "Only REPL, REPLICATED, HASH, SKEWED, MERGE, and MERGE-SPARSE are vaild JOIN modifiers." );
+ "Only REPL, REPLICATED, HASH, BLOOM, SKEWED, MERGE, and MERGE-SPARSE are vaild JOIN modifiers." );
}
}
Modified: pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java Thu Jan 26 17:40:35 2017
@@ -133,6 +133,8 @@ public abstract class ScriptState {
MERGE_SPARSE_JOIN,
REPLICATED_JOIN,
SKEWED_JOIN,
+ BUILD_BLOOM,
+ FILTER_BLOOM,
HASH_JOIN,
COLLECTED_GROUP,
MERGE_COGROUP,
@@ -312,7 +314,7 @@ public abstract class ScriptState {
maxScriptSize = Integer.valueOf(prop);
}
}
-
+
this.truncatedScript = (script.length() > maxScriptSize) ? script.substring(0, maxScriptSize)
: script;
@@ -485,6 +487,10 @@ public abstract class ScriptState {
public void visit(LOJoin op) {
if (op.getJoinType() == JOINTYPE.HASH) {
feature.set(PIG_FEATURE.HASH_JOIN.ordinal());
+ } else if (op.getJoinType() == JOINTYPE.BLOOM) {
+ feature.set(PIG_FEATURE.HASH_JOIN.ordinal());
+ feature.set(PIG_FEATURE.BUILD_BLOOM.ordinal());
+ feature.set(PIG_FEATURE.FILTER_BLOOM.ordinal());
} else if (op.getJoinType() == JOINTYPE.MERGE) {
feature.set(PIG_FEATURE.MERGE_JOIN.ordinal());
} else if (op.getJoinType() == JOINTYPE.MERGESPARSE) {
@@ -506,6 +512,7 @@ public abstract class ScriptState {
feature.set(PIG_FEATURE.RANK.ordinal());
}
+ @Override
public void visit(LOSort op) {
feature.set(PIG_FEATURE.ORDER_BY.ordinal());
}
Modified: pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java (original)
+++ pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java Thu Jan 26 17:40:35 2017
@@ -275,6 +275,12 @@ public class TezScriptState extends Scri
if (tezOp.isRegularJoin()) {
feature.set(PIG_FEATURE.HASH_JOIN.ordinal());
}
+ if (tezOp.isBuildBloom()) {
+ feature.set(PIG_FEATURE.BUILD_BLOOM.ordinal());
+ }
+ if (tezOp.isFilterBloom()) {
+ feature.set(PIG_FEATURE.FILTER_BLOOM.ordinal());
+ }
if (tezOp.isUnion()) {
feature.set(PIG_FEATURE.UNION.ordinal());
}
Modified: pig/trunk/test/e2e/pig/build.xml
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/build.xml?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/build.xml (original)
+++ pig/trunk/test/e2e/pig/build.xml Thu Jan 26 17:40:35 2017
@@ -137,6 +137,7 @@
<path path="${test.location}/tests/multiquery.conf"/>
<path path="${test.location}/tests/negative.conf"/>
<path path="${test.location}/tests/nightly.conf"/>
+ <path path="${test.location}/tests/join.conf"/>
<path path="${test.location}/tests/streaming.conf"/>
<path path="${test.location}/tests/streaming_local.conf"/>
<path path="${test.location}/tests/turing_jython.conf"/>
Added: pig/trunk/test/e2e/pig/tests/join.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/join.conf?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/tests/join.conf (added)
+++ pig/trunk/test/e2e/pig/tests/join.conf Thu Jan 26 17:40:35 2017
@@ -0,0 +1,310 @@
+#!/usr/bin/env perl
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+
+$cfg = {
+ 'driver' => 'Pig',
+
+ 'groups' => [
+ {
+ 'name' => 'BloomJoin_Map',
+ 'execonly' => 'tez',
+ 'tests' => [
+ {
+ # Tuple join key
+ 'num' => 1,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age) using 'bloom';
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age);
+store e into ':OUTPATH:';\,
+ },
+ {
+ # bytearray join key
+ 'num' => 2,
+ 'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name using 'bloom';
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+ },
+ {
+ # Left outer join and chararray join key
+ 'num' => 3,
+ 'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions);
+c = join a by name left, b by name using 'bloom';
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions);
+c = join a by name left, b by name;
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+ },
+ {
+ # Right outer join
+ 'num' => 4,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions);
+c = join a by (name,age) right, b by (name,age) using 'bloom';
+store c into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions);
+c = join a by (name,age) right, b by (name,age);
+store c into ':OUTPATH:';\,
+ },
+ {
+ # Left input from a union
+ 'num' => 5,
+ 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+d = filter d by age > 60;
+e = join c by name, d by name using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+d = filter d by age > 60;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+ },
+ {
+ # Right input from a union and integer join key
+ 'num' => 6,
+ 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+e = join d by age, c by age using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+e = join d by age, c by age;
+store e into ':OUTPATH:';\,
+ },
+ {
+ # Left input from a split
+ 'num' => 7,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age using 'bloom';
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age;
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+ },
+ {
+ # Right input from a split
+ 'num' => 8,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age using 'bloom';
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age;
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+ },
+ ] # end of tests
+ },
+ {
+ 'name' => 'BloomJoin_Reduce',
+ 'execonly' => 'tez',
+ 'java_params' => ['-Dpig.bloomjoin.strategy=reduce'],
+ 'tests' => [
+ {
+ # Tuple join key
+ 'num' => 1,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age) using 'bloom';
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age);
+store e into ':OUTPATH:';\,
+ },
+ {
+ # bytearray join key
+ 'num' => 2,
+ 'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name using 'bloom';
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+ },
+ {
+ # Left outer join and chararray join key
+ 'num' => 3,
+ 'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions);
+c = join a by name left, b by name using 'bloom';
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions);
+c = join a by name left, b by name;
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+ },
+ {
+ # Right outer join
+ 'num' => 4,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions);
+c = join a by (name,age) right, b by (name,age) using 'bloom';
+store c into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions);
+c = join a by (name,age) right, b by (name,age);
+store c into ':OUTPATH:';\,
+ },
+ {
+ # Left input from a union
+ 'num' => 5,
+ 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+d = filter d by age > 60;
+e = join c by name, d by name using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+d = filter d by age > 60;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+ },
+ {
+ # Right input from a union and integer join key
+ 'num' => 6,
+ 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+e = join d by age, c by age using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions);
+e = join d by age, c by age;
+store e into ':OUTPATH:';\,
+ },
+ {
+ # Left input from a split
+ 'num' => 7,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age using 'bloom';
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age;
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+ },
+ {
+ # Right input from a split
+ 'num' => 8,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age using 'bloom';
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age;
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+ },
+ ] # end of tests
+ }
+ ] # end of groups
+};
\ No newline at end of file
Modified: pig/trunk/test/e2e/pig/tests/multiquery.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/multiquery.conf?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/multiquery.conf (original)
+++ pig/trunk/test/e2e/pig/tests/multiquery.conf Thu Jan 26 17:40:35 2017
@@ -906,7 +906,38 @@ m = UNION e, i, j, n;
n = JOIN a BY name, m BY name;
store n into ':OUTPATH:';\,
- }
+ },
+ {
+ # Self join bloom left outer
+ 'num' => 12,
+ 'execonly' => 'tez',
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name using 'bloom';
+store d into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name;
+store d into ':OUTPATH:';\,
+ },
+ {
+ # Self join bloom left outer with strategy as reduce
+ 'num' => 13,
+ 'execonly' => 'tez',
+ 'java_params' => ['-Dpig.bloomjoin.strategy=reduce'],
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name using 'bloom';
+store d into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name;
+store d into ':OUTPATH:';\,
+ },
] # end of tests
},
Modified: pig/trunk/test/e2e/pig/tests/orc.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/orc.conf?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/orc.conf (original)
+++ pig/trunk/test/e2e/pig/tests/orc.conf Thu Jan 26 17:40:35 2017
@@ -1,3 +1,21 @@
+#!/usr/bin/env perl
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
$cfg = {
'driver' => 'Pig',
'nummachines' => 5,
Modified: pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java Thu Jan 26 17:40:35 2017
@@ -246,6 +246,66 @@ public class TestEmptyInputDir {
}
}
+ @Test
+ public void testBloomJoin() throws Exception {
+ PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
+ w.println("A = load '" + INPUT_FILE + "' as (x:int);");
+ w.println("B = load '" + EMPTY_DIR + "' as (x:int);");
+ w.println("C = join B by $0, A by $0 using 'bloom';");
+ w.println("D = join A by $0, B by $0 using 'bloom';");
+ w.println("store C into '" + OUTPUT_FILE + "';");
+ w.println("store D into 'output1';");
+ w.close();
+
+ try {
+ String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, };
+ PigStats stats = PigRunner.run(args, null);
+
+ assertTrue(stats.isSuccessful());
+ assertEquals(0, stats.getNumberRecords(OUTPUT_FILE));
+ assertEquals(0, stats.getNumberRecords("output1"));
+ assertEmptyOutputFile();
+ } finally {
+ new File(PIG_FILE).delete();
+ Util.deleteFile(cluster, OUTPUT_FILE);
+ Util.deleteFile(cluster, "output1");
+ }
+ }
+
+ @Test
+ public void testBloomJoinOuter() throws Exception {
+ PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
+ w.println("A = load '" + INPUT_FILE + "' as (x:int);");
+ w.println("B = load '" + EMPTY_DIR + "' as (x:int);");
+ w.println("C = join B by $0 left outer, A by $0 using 'bloom';");
+ w.println("D = join A by $0 left outer, B by $0 using 'bloom';");
+ w.println("E = join B by $0 right outer, A by $0 using 'bloom';");
+ w.println("F = join A by $0 right outer, B by $0 using 'bloom';");
+ w.println("store C into '" + OUTPUT_FILE + "';");
+ w.println("store D into 'output1';");
+ w.println("store E into 'output2';");
+ w.println("store F into 'output3';");
+ w.close();
+
+ try {
+ String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, };
+ PigStats stats = PigRunner.run(args, null);
+
+ assertTrue(stats.isSuccessful());
+ assertEquals(0, stats.getNumberRecords(OUTPUT_FILE));
+ assertEquals(2, stats.getNumberRecords("output1"));
+ assertEquals(2, stats.getNumberRecords("output2"));
+ assertEquals(0, stats.getNumberRecords("output3"));
+ assertEmptyOutputFile();
+ } finally {
+ new File(PIG_FILE).delete();
+ Util.deleteFile(cluster, OUTPUT_FILE);
+ Util.deleteFile(cluster, "output1");
+ Util.deleteFile(cluster, "output2");
+ Util.deleteFile(cluster, "output3");
+ }
+ }
+
private void assertEmptyOutputFile() throws IllegalArgumentException, IOException {
FileSystem fs = cluster.getFileSystem();
FileStatus status = fs.getFileStatus(new Path(OUTPUT_FILE));
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,91 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-48 -> Tez vertex scope-49,Tez vertex scope-50,
+Tez vertex scope-50 -> Tez vertex scope-46,Tez vertex scope-47,
+Tez vertex scope-46 -> Tez vertex scope-49,
+Tez vertex scope-47 -> Tez vertex scope-49,
+Tez vertex scope-49
+
+Tez vertex scope-48
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{bytearray}(false) - scope-30 -> [ scope-49, scope-50]
+| |
+| Project[bytearray][0] - scope-31
+|
+|---c: New For Each(false,false)[bag] - scope-20
+ | |
+ | Project[bytearray][0] - scope-15
+ | |
+ | Cast[int] - scope-18
+ | |
+ | |---Project[bytearray][1] - scope-17
+ |
+ |---c: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-14
+Tez vertex scope-50
+# Combine plan on edge <scope-48>
+Local Rearrange[tuple]{int}(false) - scope-55 -> scope-50
+| |
+| Project[int][0] - scope-54
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+# Plan on vertex
+POValueOutputTez - scope-52 -> [scope-46, scope-47]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-51
+Tez vertex scope-46
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-26 <- scope-50 -> scope-49
+| |
+| Project[bytearray][0] - scope-27
+|
+|---b: New For Each(false,false)[bag] - scope-6
+ | |
+ | Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-4
+ | |
+ | |---Project[bytearray][1] - scope-3
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-47
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-28 <- scope-50 -> scope-49
+| |
+| Project[bytearray][0] - scope-29
+|
+|---a: New For Each(false,false)[bag] - scope-13
+ | |
+ | Project[bytearray][0] - scope-8
+ | |
+ | Cast[int] - scope-11
+ | |
+ | |---Project[bytearray][1] - scope-10
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-7
+Tez vertex scope-49
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-45
+|
+|---e: New For Each(false,false,false,false)[bag] - scope-44
+ | |
+ | Project[bytearray][2] - scope-36
+ | |
+ | Project[int][3] - scope-38
+ | |
+ | Project[int][1] - scope-40
+ | |
+ | Project[int][5] - scope-42
+ |
+ |---d: New For Each(true,true,true)[tuple] - scope-35
+ | |
+ | Project[bag][1] - scope-32
+ | |
+ | Project[bag][2] - scope-33
+ | |
+ | Project[bag][3] - scope-34
+ |
+ |---d: Package(Packager)[tuple]{bytearray} - scope-25
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,91 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-48 -> Tez vertex scope-49,Tez vertex scope-50,
+Tez vertex scope-50 -> Tez vertex scope-46,Tez vertex scope-47,
+Tez vertex scope-46 -> Tez vertex scope-49,
+Tez vertex scope-47 -> Tez vertex scope-49,
+Tez vertex scope-49
+
+Tez vertex scope-48
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{bytearray}(false) - scope-30 -> [ scope-49, scope-50]
+| |
+| Project[bytearray][0] - scope-31
+|
+|---c: New For Each(false,false)[bag] - scope-20
+ | |
+ | Project[bytearray][0] - scope-15
+ | |
+ | Cast[int] - scope-18
+ | |
+ | |---Project[bytearray][1] - scope-17
+ |
+ |---c: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-14
+Tez vertex scope-50
+# Combine plan on edge <scope-48>
+Local Rearrange[tuple]{int}(false) - scope-55 -> scope-50
+| |
+| Project[int][0] - scope-54
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+# Plan on vertex
+POValueOutputTez - scope-52 -> [scope-46, scope-47]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-51
+Tez vertex scope-46
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-26 <- scope-50 -> scope-49
+| |
+| Project[bytearray][0] - scope-27
+|
+|---b: New For Each(false,false)[bag] - scope-6
+ | |
+ | Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-4
+ | |
+ | |---Project[bytearray][1] - scope-3
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-47
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-28 <- scope-50 -> scope-49
+| |
+| Project[bytearray][0] - scope-29
+|
+|---a: New For Each(false,false)[bag] - scope-13
+ | |
+ | Project[bytearray][0] - scope-8
+ | |
+ | Cast[int] - scope-11
+ | |
+ | |---Project[bytearray][1] - scope-10
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-7
+Tez vertex scope-49
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-45
+|
+|---e: New For Each(false,false,false,false)[bag] - scope-44
+ | |
+ | Project[bytearray][2] - scope-36
+ | |
+ | Project[int][3] - scope-38
+ | |
+ | Project[int][1] - scope-40
+ | |
+ | Project[int][5] - scope-42
+ |
+ |---d: New For Each(true,true,true)[tuple] - scope-35
+ | |
+ | Project[bag][1] - scope-32
+ | |
+ | Project[bag][2] - scope-33
+ | |
+ | Project[bag][3] - scope-34
+ |
+ |---d: Package(Packager)[tuple]{bytearray} - scope-25
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,83 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-39 -> Tez vertex scope-41,Tez vertex scope-42,
+Tez vertex scope-42 -> Tez vertex scope-40,
+Tez vertex scope-40 -> Tez vertex scope-41,
+Tez vertex scope-41
+
+Tez vertex scope-39
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{chararray}(false) - scope-20 -> [ scope-41, scope-42]
+| |
+| Project[chararray][0] - scope-21
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[chararray] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-42
+# Combine plan on edge <scope-39>
+Local Rearrange[tuple]{int}(false) - scope-47 -> scope-42
+| |
+| Project[int][0] - scope-46
+|
+|---Package(BloomPackager)[tuple]{int} - scope-45
+# Plan on vertex
+POValueOutputTez - scope-44 -> [scope-40]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-43
+Tez vertex scope-40
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{chararray}(false) - scope-22 <- scope-42 -> scope-41
+| |
+| Project[chararray][0] - scope-23
+|
+|---b: New For Each(false,false)[bag] - scope-15
+ | |
+ | Cast[chararray] - scope-10
+ | |
+ | |---Project[bytearray][0] - scope-9
+ | |
+ | Cast[int] - scope-13
+ | |
+ | |---Project[bytearray][1] - scope-12
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8
+Tez vertex scope-41
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-38
+|
+|---e: New For Each(false,false,false)[bag] - scope-37
+ | |
+ | Project[chararray][0] - scope-31
+ | |
+ | Project[int][1] - scope-33
+ | |
+ | Project[int][3] - scope-35
+ |
+ |---d: New For Each(true,true)[tuple] - scope-30
+ | |
+ | Project[bag][1] - scope-24
+ | |
+ | POBinCond[bag] - scope-29
+ | |
+ | |---Project[bag][2] - scope-25
+ | |
+ | |---POUserFunc(org.apache.pig.builtin.IsEmpty)[boolean] - scope-27
+ | | |
+ | | |---Project[bag][2] - scope-26
+ | |
+ | |---Constant({(,)}) - scope-28
+ |
+ |---d: Package(Packager)[tuple]{chararray} - scope-19
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,83 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-39 -> Tez vertex scope-41,Tez vertex scope-42,
+Tez vertex scope-42 -> Tez vertex scope-40,
+Tez vertex scope-40 -> Tez vertex scope-41,
+Tez vertex scope-41
+
+Tez vertex scope-39
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{chararray}(false) - scope-20 -> [ scope-41, scope-42]
+| |
+| Project[chararray][0] - scope-21
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[chararray] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-42
+# Combine plan on edge <scope-39>
+Local Rearrange[tuple]{int}(false) - scope-47 -> scope-42
+| |
+| Project[int][0] - scope-46
+|
+|---Package(BloomPackager)[tuple]{int} - scope-45
+# Plan on vertex
+POValueOutputTez - scope-44 -> [scope-40]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-43
+Tez vertex scope-40
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{chararray}(false) - scope-22 <- scope-42 -> scope-41
+| |
+| Project[chararray][0] - scope-23
+|
+|---b: New For Each(false,false)[bag] - scope-15
+ | |
+ | Cast[chararray] - scope-10
+ | |
+ | |---Project[bytearray][0] - scope-9
+ | |
+ | Cast[int] - scope-13
+ | |
+ | |---Project[bytearray][1] - scope-12
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8
+Tez vertex scope-41
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-38
+|
+|---e: New For Each(false,false,false)[bag] - scope-37
+ | |
+ | Project[chararray][0] - scope-31
+ | |
+ | Project[int][1] - scope-33
+ | |
+ | Project[int][3] - scope-35
+ |
+ |---d: New For Each(true,true)[tuple] - scope-30
+ | |
+ | Project[bag][1] - scope-24
+ | |
+ | POBinCond[bag] - scope-29
+ | |
+ | |---Project[bag][2] - scope-25
+ | |
+ | |---POUserFunc(org.apache.pig.builtin.IsEmpty)[boolean] - scope-27
+ | | |
+ | | |---Project[bag][2] - scope-26
+ | |
+ | |---Constant({(,)}) - scope-28
+ |
+ |---d: Package(Packager)[tuple]{chararray} - scope-19
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,105 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-45 -> Tez vertex group scope-58,Tez vertex group scope-59,
+Tez vertex scope-46 -> Tez vertex group scope-58,Tez vertex group scope-59,
+Tez vertex group scope-59 -> Tez vertex scope-52,
+Tez vertex scope-52 -> Tez vertex scope-44,
+Tez vertex scope-44 -> Tez vertex scope-51,
+Tez vertex group scope-58 -> Tez vertex scope-51,
+Tez vertex scope-51
+
+Tez vertex scope-45
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-60 -> [ scope-51, scope-52]
+| |
+| Project[int][0] - scope-61
+|
+|---b: New For Each(false,false)[bag] - scope-15
+ | |
+ | Cast[int] - scope-10
+ | |
+ | |---Project[bytearray][0] - scope-9
+ | |
+ | Cast[int] - scope-13
+ | |
+ | |---Project[bytearray][1] - scope-12
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8
+Tez vertex scope-46
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-62 -> [ scope-51, scope-52]
+| |
+| Project[int][0] - scope-63
+|
+|---c: New For Each(false,false)[bag] - scope-23
+ | |
+ | Cast[int] - scope-18
+ | |
+ | |---Project[bytearray][0] - scope-17
+ | |
+ | Cast[int] - scope-21
+ | |
+ | |---Project[bytearray][1] - scope-20
+ |
+ |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-16
+Tez vertex group scope-59 <- [scope-45, scope-46] -> scope-52
+# No plan on vertex group
+Tez vertex scope-52
+# Combine plan on edge <scope-45>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Combine plan on edge <scope-46>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Plan on vertex
+POValueOutputTez - scope-54 -> [scope-44]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+Tez vertex scope-44
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-30
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex group scope-58 <- [scope-45, scope-46] -> scope-51
+# No plan on vertex group
+Tez vertex scope-51
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43
+|
+|---e: New For Each(false,false,false)[bag] - scope-42
+ | |
+ | Project[int][0] - scope-36
+ | |
+ | Project[int][1] - scope-38
+ | |
+ | Project[int][3] - scope-40
+ |
+ |---d: New For Each(true,true)[tuple] - scope-35
+ | |
+ | Project[bag][1] - scope-33
+ | |
+ | Project[bag][2] - scope-34
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-28
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,105 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-45 -> Tez vertex group scope-58,Tez vertex group scope-59,
+Tez vertex scope-46 -> Tez vertex group scope-58,Tez vertex group scope-59,
+Tez vertex group scope-59 -> Tez vertex scope-52,
+Tez vertex scope-52 -> Tez vertex scope-44,
+Tez vertex scope-44 -> Tez vertex scope-51,
+Tez vertex group scope-58 -> Tez vertex scope-51,
+Tez vertex scope-51
+
+Tez vertex scope-45
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-60 -> [ scope-51, scope-52]
+| |
+| Project[int][0] - scope-61
+|
+|---b: New For Each(false,false)[bag] - scope-15
+ | |
+ | Cast[int] - scope-10
+ | |
+ | |---Project[bytearray][0] - scope-9
+ | |
+ | Cast[int] - scope-13
+ | |
+ | |---Project[bytearray][1] - scope-12
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8
+Tez vertex scope-46
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-62 -> [ scope-51, scope-52]
+| |
+| Project[int][0] - scope-63
+|
+|---c: New For Each(false,false)[bag] - scope-23
+ | |
+ | Cast[int] - scope-18
+ | |
+ | |---Project[bytearray][0] - scope-17
+ | |
+ | Cast[int] - scope-21
+ | |
+ | |---Project[bytearray][1] - scope-20
+ |
+ |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-16
+Tez vertex group scope-59 <- [scope-45, scope-46] -> scope-52
+# No plan on vertex group
+Tez vertex scope-52
+# Combine plan on edge <scope-45>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Combine plan on edge <scope-46>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Plan on vertex
+POValueOutputTez - scope-54 -> [scope-44]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+Tez vertex scope-44
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-30
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex group scope-58 <- [scope-45, scope-46] -> scope-51
+# No plan on vertex group
+Tez vertex scope-51
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43
+|
+|---e: New For Each(false,false,false)[bag] - scope-42
+ | |
+ | Project[int][0] - scope-36
+ | |
+ | Project[int][1] - scope-38
+ | |
+ | Project[int][3] - scope-40
+ |
+ |---d: New For Each(true,true)[tuple] - scope-35
+ | |
+ | Project[bag][1] - scope-33
+ | |
+ | Project[bag][2] - scope-34
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-28
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,97 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-44 -> Tez vertex scope-46,
+Tez vertex scope-45 -> Tez vertex scope-46,
+Tez vertex scope-50 -> Tez vertex scope-51,Tez vertex scope-52,
+Tez vertex scope-52 -> Tez vertex scope-46,
+Tez vertex scope-46 -> Tez vertex scope-51,
+Tez vertex scope-51
+
+Tez vertex scope-44
+# Plan on vertex
+POValueOutputTez - scope-48 -> [scope-46]
+|
+|---b: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-45
+# Plan on vertex
+POValueOutputTez - scope-49 -> [scope-46]
+|
+|---c: New For Each(false,false)[bag] - scope-15
+ | |
+ | Cast[int] - scope-10
+ | |
+ | |---Project[bytearray][0] - scope-9
+ | |
+ | Cast[int] - scope-13
+ | |
+ | |---Project[bytearray][1] - scope-12
+ |
+ |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-8
+Tez vertex scope-50
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-31 -> [ scope-51, scope-52]
+| |
+| Project[int][0] - scope-32
+|
+|---a: New For Each(false,false)[bag] - scope-24
+ | |
+ | Cast[int] - scope-19
+ | |
+ | |---Project[bytearray][0] - scope-18
+ | |
+ | Cast[int] - scope-22
+ | |
+ | |---Project[bytearray][1] - scope-21
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-17
+Tez vertex scope-52
+# Combine plan on edge <scope-50>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Plan on vertex
+POValueOutputTez - scope-54 -> [scope-46]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+Tez vertex scope-46
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-30
+|
+|---POShuffledValueInputTez - scope-47 <- [scope-44, scope-45]
+Tez vertex scope-51
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43
+|
+|---e: New For Each(false,false,false)[bag] - scope-42
+ | |
+ | Project[int][2] - scope-36
+ | |
+ | Project[int][3] - scope-38
+ | |
+ | Project[int][1] - scope-40
+ |
+ |---d: New For Each(true,true)[tuple] - scope-35
+ | |
+ | Project[bag][1] - scope-33
+ | |
+ | Project[bag][2] - scope-34
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-28
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,97 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-44 -> Tez vertex scope-46,
+Tez vertex scope-45 -> Tez vertex scope-46,
+Tez vertex scope-50 -> Tez vertex scope-51,Tez vertex scope-52,
+Tez vertex scope-52 -> Tez vertex scope-46,
+Tez vertex scope-46 -> Tez vertex scope-51,
+Tez vertex scope-51
+
+Tez vertex scope-44
+# Plan on vertex
+POValueOutputTez - scope-48 -> [scope-46]
+|
+|---b: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-45
+# Plan on vertex
+POValueOutputTez - scope-49 -> [scope-46]
+|
+|---c: New For Each(false,false)[bag] - scope-15
+ | |
+ | Cast[int] - scope-10
+ | |
+ | |---Project[bytearray][0] - scope-9
+ | |
+ | Cast[int] - scope-13
+ | |
+ | |---Project[bytearray][1] - scope-12
+ |
+ |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-8
+Tez vertex scope-50
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-31 -> [ scope-51, scope-52]
+| |
+| Project[int][0] - scope-32
+|
+|---a: New For Each(false,false)[bag] - scope-24
+ | |
+ | Cast[int] - scope-19
+ | |
+ | |---Project[bytearray][0] - scope-18
+ | |
+ | Cast[int] - scope-22
+ | |
+ | |---Project[bytearray][1] - scope-21
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-17
+Tez vertex scope-52
+# Combine plan on edge <scope-50>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Plan on vertex
+POValueOutputTez - scope-54 -> [scope-46]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+Tez vertex scope-46
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-30
+|
+|---POShuffledValueInputTez - scope-47 <- [scope-44, scope-45]
+Tez vertex scope-51
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43
+|
+|---e: New For Each(false,false,false)[bag] - scope-42
+ | |
+ | Project[int][2] - scope-36
+ | |
+ | Project[int][3] - scope-38
+ | |
+ | Project[int][1] - scope-40
+ |
+ |---d: New For Each(true,true)[tuple] - scope-35
+ | |
+ | Project[bag][1] - scope-33
+ | |
+ | Project[bag][2] - scope-34
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-28
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,107 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-60 -> Tez vertex scope-61,Tez vertex scope-62,
+Tez vertex scope-62 -> Tez vertex scope-54,Tez vertex scope-58,
+Tez vertex scope-54 -> Tez vertex scope-58,Tez vertex scope-61,
+Tez vertex scope-58 -> Tez vertex scope-61,
+Tez vertex scope-61
+
+Tez vertex scope-60
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-38 -> [ scope-61, scope-62]
+| |
+| Project[int][0] - scope-39
+|
+|---b: New For Each(false,false)[bag] - scope-28
+ | |
+ | Cast[int] - scope-23
+ | |
+ | |---Project[bytearray][0] - scope-22
+ | |
+ | Cast[int] - scope-26
+ | |
+ | |---Project[bytearray][1] - scope-25
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-21
+Tez vertex scope-62
+# Combine plan on edge <scope-60>
+Local Rearrange[tuple]{int}(false) - scope-67 -> scope-62
+| |
+| Project[int][0] - scope-66
+|
+|---Package(BloomPackager)[tuple]{int} - scope-65
+# Plan on vertex
+POValueOutputTez - scope-64 -> [scope-54, scope-58]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-63
+Tez vertex scope-54
+# Plan on vertex
+a: Split - scope-68
+| |
+| d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-62 -> scope-61
+| | |
+| | Project[int][0] - scope-35
+| |
+| |---a1: Filter[bag] - scope-11
+| | |
+| | Equal To[boolean] - scope-14
+| | |
+| | |---Project[int][0] - scope-12
+| | |
+| | |---Constant(3) - scope-13
+| |
+| POValueOutputTez - scope-55 -> [scope-58]
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-58
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-36 <- scope-62 -> scope-61
+| |
+| Project[int][0] - scope-37
+|
+|---a2: Filter[bag] - scope-17
+ | |
+ | Equal To[boolean] - scope-20
+ | |
+ | |---Project[int][0] - scope-18
+ | |
+ | |---Constant(4) - scope-19
+ |
+ |---POValueInputTez - scope-59 <- scope-54
+Tez vertex scope-61
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-53
+|
+|---e: New For Each(false,false,false,false)[bag] - scope-52
+ | |
+ | Project[int][0] - scope-44
+ | |
+ | Project[int][1] - scope-46
+ | |
+ | Project[int][3] - scope-48
+ | |
+ | Project[int][5] - scope-50
+ |
+ |---d: New For Each(true,true,true)[tuple] - scope-43
+ | |
+ | Project[bag][1] - scope-40
+ | |
+ | Project[bag][2] - scope-41
+ | |
+ | Project[bag][3] - scope-42
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-33
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,107 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-60 -> Tez vertex scope-61,Tez vertex scope-62,
+Tez vertex scope-62 -> Tez vertex scope-54,Tez vertex scope-58,
+Tez vertex scope-54 -> Tez vertex scope-58,Tez vertex scope-61,
+Tez vertex scope-58 -> Tez vertex scope-61,
+Tez vertex scope-61
+
+Tez vertex scope-60
+# Plan on vertex
+d: BuildBloom Rearrange[tuple]{int}(false) - scope-38 -> [ scope-61, scope-62]
+| |
+| Project[int][0] - scope-39
+|
+|---b: New For Each(false,false)[bag] - scope-28
+ | |
+ | Cast[int] - scope-23
+ | |
+ | |---Project[bytearray][0] - scope-22
+ | |
+ | Cast[int] - scope-26
+ | |
+ | |---Project[bytearray][1] - scope-25
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-21
+Tez vertex scope-62
+# Combine plan on edge <scope-60>
+Local Rearrange[tuple]{int}(false) - scope-67 -> scope-62
+| |
+| Project[int][0] - scope-66
+|
+|---Package(BloomPackager)[tuple]{int} - scope-65
+# Plan on vertex
+POValueOutputTez - scope-64 -> [scope-54, scope-58]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-63
+Tez vertex scope-54
+# Plan on vertex
+a: Split - scope-68
+| |
+| d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-62 -> scope-61
+| | |
+| | Project[int][0] - scope-35
+| |
+| |---a1: Filter[bag] - scope-11
+| | |
+| | Equal To[boolean] - scope-14
+| | |
+| | |---Project[int][0] - scope-12
+| | |
+| | |---Constant(3) - scope-13
+| |
+| POValueOutputTez - scope-55 -> [scope-58]
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-58
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-36 <- scope-62 -> scope-61
+| |
+| Project[int][0] - scope-37
+|
+|---a2: Filter[bag] - scope-17
+ | |
+ | Equal To[boolean] - scope-20
+ | |
+ | |---Project[int][0] - scope-18
+ | |
+ | |---Constant(4) - scope-19
+ |
+ |---POValueInputTez - scope-59 <- scope-54
+Tez vertex scope-61
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-53
+|
+|---e: New For Each(false,false,false,false)[bag] - scope-52
+ | |
+ | Project[int][0] - scope-44
+ | |
+ | Project[int][1] - scope-46
+ | |
+ | Project[int][3] - scope-48
+ | |
+ | Project[int][5] - scope-50
+ |
+ |---d: New For Each(true,true,true)[tuple] - scope-43
+ | |
+ | Project[bag][1] - scope-40
+ | |
+ | Project[bag][2] - scope-41
+ | |
+ | Project[bag][3] - scope-42
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-33
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,95 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-49 -> Tez vertex scope-56,Tez vertex scope-57,
+Tez vertex scope-57 -> Tez vertex scope-53,
+Tez vertex scope-53 -> Tez vertex scope-56,
+Tez vertex scope-56
+
+Tez vertex scope-49
+# Plan on vertex
+a: Split - scope-63
+| |
+| a2: Store(file:///tmp/pigoutput/a2:org.apache.pig.builtin.PigStorage) - scope-15
+| |
+| |---a2: Filter[bag] - scope-11
+| | |
+| | Equal To[boolean] - scope-14
+| | |
+| | |---Project[int][0] - scope-12
+| | |
+| | |---Constant(4) - scope-13
+| |
+| d: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-56, scope-57]
+| | |
+| | Project[int][0] - scope-37
+| |
+| |---a1: Filter[bag] - scope-26
+| | |
+| | Equal To[boolean] - scope-29
+| | |
+| | |---Project[int][0] - scope-27
+| | |
+| | |---Constant(3) - scope-28
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-57
+# Combine plan on edge <scope-49>
+Local Rearrange[tuple]{int}(false) - scope-62 -> scope-57
+| |
+| Project[int][0] - scope-61
+|
+|---Package(BloomPackager)[tuple]{int} - scope-60
+# Plan on vertex
+POValueOutputTez - scope-59 -> [scope-53]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-58
+Tez vertex scope-53
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-57 -> scope-56
+| |
+| Project[int][0] - scope-35
+|
+|---b: New For Each(false,false)[bag] - scope-23
+ | |
+ | Cast[int] - scope-18
+ | |
+ | |---Project[bytearray][0] - scope-17
+ | |
+ | Cast[int] - scope-21
+ | |
+ | |---Project[bytearray][1] - scope-20
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-16
+Tez vertex scope-56
+# Plan on vertex
+e: Store(file:///tmp/pigoutput/e:org.apache.pig.builtin.PigStorage) - scope-48
+|
+|---e: New For Each(false,false,false)[bag] - scope-47
+ | |
+ | Project[int][2] - scope-41
+ | |
+ | Project[int][3] - scope-43
+ | |
+ | Project[int][1] - scope-45
+ |
+ |---d: New For Each(true,true)[tuple] - scope-40
+ | |
+ | Project[bag][1] - scope-38
+ | |
+ | Project[bag][2] - scope-39
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-33
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,95 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-49 -> Tez vertex scope-56,Tez vertex scope-57,
+Tez vertex scope-57 -> Tez vertex scope-53,
+Tez vertex scope-53 -> Tez vertex scope-56,
+Tez vertex scope-56
+
+Tez vertex scope-49
+# Plan on vertex
+a: Split - scope-63
+| |
+| a2: Store(file:///tmp/pigoutput/a2:org.apache.pig.builtin.PigStorage) - scope-15
+| |
+| |---a2: Filter[bag] - scope-11
+| | |
+| | Equal To[boolean] - scope-14
+| | |
+| | |---Project[int][0] - scope-12
+| | |
+| | |---Constant(4) - scope-13
+| |
+| d: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-56, scope-57]
+| | |
+| | Project[int][0] - scope-37
+| |
+| |---a1: Filter[bag] - scope-26
+| | |
+| | Equal To[boolean] - scope-29
+| | |
+| | |---Project[int][0] - scope-27
+| | |
+| | |---Constant(3) - scope-28
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-57
+# Combine plan on edge <scope-49>
+Local Rearrange[tuple]{int}(false) - scope-62 -> scope-57
+| |
+| Project[int][0] - scope-61
+|
+|---Package(BloomPackager)[tuple]{int} - scope-60
+# Plan on vertex
+POValueOutputTez - scope-59 -> [scope-53]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-58
+Tez vertex scope-53
+# Plan on vertex
+d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-57 -> scope-56
+| |
+| Project[int][0] - scope-35
+|
+|---b: New For Each(false,false)[bag] - scope-23
+ | |
+ | Cast[int] - scope-18
+ | |
+ | |---Project[bytearray][0] - scope-17
+ | |
+ | Cast[int] - scope-21
+ | |
+ | |---Project[bytearray][1] - scope-20
+ |
+ |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-16
+Tez vertex scope-56
+# Plan on vertex
+e: Store(file:///tmp/pigoutput/e:org.apache.pig.builtin.PigStorage) - scope-48
+|
+|---e: New For Each(false,false,false)[bag] - scope-47
+ | |
+ | Project[int][2] - scope-41
+ | |
+ | Project[int][3] - scope-43
+ | |
+ | Project[int][1] - scope-45
+ |
+ |---d: New For Each(true,true)[tuple] - scope-40
+ | |
+ | Project[bag][1] - scope-38
+ | |
+ | Project[bag][2] - scope-39
+ |
+ |---d: Package(Packager)[tuple]{int} - scope-33
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,95 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-43 -> Tez vertex scope-45,Tez vertex scope-47,Tez vertex scope-51,Tez vertex scope-52,
+Tez vertex scope-52 -> Tez vertex scope-45,Tez vertex scope-47,
+Tez vertex scope-45 -> Tez vertex scope-51,
+Tez vertex scope-47 -> Tez vertex scope-51,
+Tez vertex scope-51
+
+Tez vertex scope-43
+# Plan on vertex
+a: Split - scope-58
+| |
+| e: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-51, scope-52]
+| | |
+| | Project[int][0] - scope-37
+| |
+| |---d: Filter[bag] - scope-23
+| | |
+| | Greater Than[boolean] - scope-26
+| | |
+| | |---Project[int][0] - scope-24
+| | |
+| | |---Constant(10) - scope-25
+| |
+| POValueOutputTez - scope-44 -> [scope-45, scope-47]
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-52
+# Combine plan on edge <scope-43>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Plan on vertex
+POValueOutputTez - scope-54 -> [scope-45, scope-47]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+Tez vertex scope-45
+# Plan on vertex
+e: BloomFilter Rearrange[tuple]{int}(false) - scope-32 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-33
+|
+|---b: Filter[bag] - scope-11
+ | |
+ | Less Than[boolean] - scope-14
+ | |
+ | |---Project[int][0] - scope-12
+ | |
+ | |---Constant(5) - scope-13
+ |
+ |---POValueInputTez - scope-46 <- scope-43
+Tez vertex scope-47
+# Plan on vertex
+e: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-35
+|
+|---c: Filter[bag] - scope-17
+ | |
+ | Equal To[boolean] - scope-20
+ | |
+ | |---Project[int][0] - scope-18
+ | |
+ | |---Constant(10) - scope-19
+ |
+ |---POValueInputTez - scope-48 <- scope-43
+Tez vertex scope-51
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-42
+|
+|---e: New For Each(true,true,true)[tuple] - scope-41
+ | |
+ | Project[bag][1] - scope-38
+ | |
+ | Project[bag][2] - scope-39
+ | |
+ | Project[bag][3] - scope-40
+ |
+ |---e: Package(Packager)[tuple]{int} - scope-31
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld?rev=1780431&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld Thu Jan 26 17:40:35 2017
@@ -0,0 +1,95 @@
+#--------------------------------------------------
+# There are 1 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-43 -> Tez vertex scope-45,Tez vertex scope-47,Tez vertex scope-51,Tez vertex scope-52,
+Tez vertex scope-52 -> Tez vertex scope-45,Tez vertex scope-47,
+Tez vertex scope-45 -> Tez vertex scope-51,
+Tez vertex scope-47 -> Tez vertex scope-51,
+Tez vertex scope-51
+
+Tez vertex scope-43
+# Plan on vertex
+a: Split - scope-58
+| |
+| e: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-51, scope-52]
+| | |
+| | Project[int][0] - scope-37
+| |
+| |---d: Filter[bag] - scope-23
+| | |
+| | Greater Than[boolean] - scope-26
+| | |
+| | |---Project[int][0] - scope-24
+| | |
+| | |---Constant(10) - scope-25
+| |
+| POValueOutputTez - scope-44 -> [scope-45, scope-47]
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0
+Tez vertex scope-52
+# Combine plan on edge <scope-43>
+Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52
+| |
+| Project[int][0] - scope-56
+|
+|---Package(BloomPackager)[tuple]{int} - scope-55
+# Plan on vertex
+POValueOutputTez - scope-54 -> [scope-45, scope-47]
+|
+|---Package(BloomPackager)[tuple]{int} - scope-53
+Tez vertex scope-45
+# Plan on vertex
+e: BloomFilter Rearrange[tuple]{int}(false) - scope-32 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-33
+|
+|---b: Filter[bag] - scope-11
+ | |
+ | Less Than[boolean] - scope-14
+ | |
+ | |---Project[int][0] - scope-12
+ | |
+ | |---Constant(5) - scope-13
+ |
+ |---POValueInputTez - scope-46 <- scope-43
+Tez vertex scope-47
+# Plan on vertex
+e: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-52 -> scope-51
+| |
+| Project[int][0] - scope-35
+|
+|---c: Filter[bag] - scope-17
+ | |
+ | Equal To[boolean] - scope-20
+ | |
+ | |---Project[int][0] - scope-18
+ | |
+ | |---Constant(10) - scope-19
+ |
+ |---POValueInputTez - scope-48 <- scope-43
+Tez vertex scope-51
+# Plan on vertex
+e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-42
+|
+|---e: New For Each(true,true,true)[tuple] - scope-41
+ | |
+ | Project[bag][1] - scope-38
+ | |
+ | Project[bag][2] - scope-39
+ | |
+ | Project[bag][3] - scope-40
+ |
+ |---e: Package(Packager)[tuple]{int} - scope-31
Modified: pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java?rev=1780431&r1=1780430&r2=1780431&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java (original)
+++ pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java Thu Jan 26 17:40:35 2017
@@ -88,6 +88,7 @@ public class TestTezCompiler {
pc.getProperties().remove(PigConfiguration.PIG_OPT_MULTIQUERY);
pc.getProperties().remove(PigConfiguration.PIG_TEZ_OPT_UNION);
pc.getProperties().remove(PigConfiguration.PIG_EXEC_NO_SECONDARY_KEY);
+ pc.getProperties().remove(PigConfiguration.PIG_BLOOMJOIN_STRATEGY);
pigServer = new PigServer(pc);
}
@@ -178,6 +179,125 @@ public class TestTezCompiler {
}
@Test
+ public void testBloomJoin() throws Exception {
+ String query =
+ "a = load 'file:///tmp/input1' as (x, y:int);" +
+ "b = load 'file:///tmp/input2' as (x, z:int);" +
+ "c = load 'file:///tmp/input2' as (x, w:int);" +
+ "d = join b by x, a by x, c by x using 'bloom';" +
+ "e = foreach d generate a::x as x, y, z, w;" +
+ "store e into 'file:///tmp/pigoutput';";
+
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld");
+ resetScope();
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce");
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld");
+ }
+
+ @Test
+ public void testBloomJoinLeftOuter() throws Exception {
+ String query =
+ "a = load 'file:///tmp/input1' as (x:chararray, y:int);" +
+ "b = load 'file:///tmp/input2' as (x:chararray, z:int);" +
+ "d = join a by x left, b by x using 'bloom';" +
+ "e = foreach d generate a::x as x, y, z;" +
+ "store e into 'file:///tmp/pigoutput';";
+
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld");
+ resetScope();
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce");
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld");
+ }
+
+ @Test
+ public void testBloomJoinUnion() throws Exception {
+ // Left input from a union
+ String query =
+ "a = load 'file:///tmp/input1' as (x:int, y:int);" +
+ "b = load 'file:///tmp/input2' as (x:int, z:int);" +
+ "c = load 'file:///tmp/input3' as (x:int, z:int);" +
+ "b = union b, c;" +
+ "d = join a by x, b by x using 'bloom';" +
+ "e = foreach d generate a::x as x, y, z;" +
+ "store e into 'file:///tmp/pigoutput';";
+
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld");
+ resetScope();
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce");
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld");
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, null);
+
+ resetScope();
+ // Right input from a union
+ query =
+ "a = load 'file:///tmp/input1' as (x:int, y:int);" +
+ "b = load 'file:///tmp/input2' as (x:int, z:int);" +
+ "c = load 'file:///tmp/input3' as (x:int, z:int);" +
+ "b = union b, c;" +
+ "d = join b by x, a by x using 'bloom';" +
+ "e = foreach d generate a::x as x, y, z;" +
+ "store e into 'file:///tmp/pigoutput';";
+
+ // Needs shared edges and PIG-3856 to be a more optimial plan
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld");
+ resetScope();
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce");
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld");
+ }
+
+ @Test
+ public void testBloomJoinSplit() throws Exception {
+ // Left input from a split
+ String query =
+ "a = load 'file:///tmp/input1' as (x:int, y:int);" +
+ "b = load 'file:///tmp/input2' as (x:int, z:int);" +
+ "a1 = filter a by x == 3;" +
+ "a2 = filter a by x == 4;" +
+ "d = join a1 by x, a2 by x, b by x using 'bloom';" +
+ "e = foreach d generate a1::x as x, a1::y as y1, a2::y as y2, z;" +
+ "store e into 'file:///tmp/pigoutput';";
+
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld");
+ resetScope();
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce");
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld");
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, null);
+
+ resetScope();
+ // Right input from a split
+ query =
+ "a = load 'file:///tmp/input1' as (x:int, y:int);" +
+ "b = load 'file:///tmp/input2' as (x:int, z:int);" +
+ "a1 = filter a by x == 3;" +
+ "a2 = filter a by x == 4;" +
+ "d = join b by x, a1 by x using 'bloom';" +
+ "e = foreach d generate a1::x as x, y, z;" +
+ "store a2 into 'file:///tmp/pigoutput/a2';" +
+ "store e into 'file:///tmp/pigoutput/e';";
+
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld");
+ resetScope();
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce");
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld");
+ }
+
+ @Test
+ public void testBloomSelfJoin() throws Exception {
+ String query =
+ "a = load 'file:///tmp/input1' as (x:int, y:int);" +
+ "b = filter a by x < 5;" +
+ "c = filter a by x == 10;" +
+ "d = filter a by x > 10;" +
+ "e = join b by x, c by x, d by x using 'bloom';" +
+ "store e into 'file:///tmp/pigoutput';";
+
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld");
+ resetScope();
+ setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce");
+ run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld");
+ }
+
+ @Test
public void testSelfJoin() throws Exception {
String query =
"a = load 'file:///tmp/input1' as (x:int, y:int);" +