You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2010/02/07 19:45:36 UTC
svn commit: r907463 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
test/org/apache/pig/test/TestCollectedGroup.java
test/org/apache/pig/test/TestJoin.java
Author: hashutosh
Date: Sun Feb 7 18:45:36 2010
New Revision: 907463
URL: http://svn.apache.org/viewvc?rev=907463&view=rev
Log:
PIG-1046: join algorithm specification is within double quotes
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java
hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sun Feb 7 18:45:36 2010
@@ -24,6 +24,8 @@
IMPROVEMENTS
+PIG-1046: join algorithm specification is within double quotes (ashutoshc)
+
PIG-1209: Port POJoinPackage to proactively spill (ashutoshc)
PIG-1190: Handling of quoted strings in pig-latin/grunt commands (ashutoshc)
Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Sun Feb 7 18:45:36 2010
@@ -320,6 +320,31 @@
return cogroup;
}
+ private LogicalOperator parseUsingForGroupBy(String modifier, ArrayList<CogroupInput> gis, LogicalPlan lp) throws ParseException, PlanException{
+
+ if(modifier.equalsIgnoreCase("collected")){
+ if (gis.size() != 1) {
+ throw new ParseException("Collected group is only supported for single input");
+ }
+ if (!isColumnProjectionsOrStar(gis.get(0))) {
+ throw new ParseException("Collected group is only supported for columns or star projection");
+ }
+ LogicalOperator cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.COLLECTED);
+ cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
+ return cogroup;
+ }
+
+ else if (modifier.equalsIgnoreCase("regular")){
+ LogicalOperator cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.REGULAR);
+ cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
+ return cogroup;
+ }
+
+ else{
+ throw new ParseException("Only COLLECTED or REGULAR are valid GROUP modifiers.");
+ }
+ }
+
/**
* Join parser.
*/
@@ -435,7 +460,42 @@
return foreach;
}
- void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws ParseException{
+ private LogicalOperator parseUsingForJoin(String modifier, ArrayList<CogroupInput> gis,
+ LogicalPlan lp, boolean isFullOuter, boolean isRightOuter, boolean isOuter) throws
+ ParseException, PlanException{
+
+ if (modifier.equalsIgnoreCase("repl") || modifier.equalsIgnoreCase("replicated")) {
+ if(isFullOuter || isRightOuter) {
+ throw new ParseException("Replicated join does not support (right|full) outer joins");
+ }
+ LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED);
+ joinOp.pinOption(LOJoin.OPTION_JOIN);
+ return joinOp;
+ }
+ else if (modifier.equalsIgnoreCase("hash") || modifier.equalsIgnoreCase("default")) {
+ LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH);
+ joinOp.pinOption(LOJoin.OPTION_JOIN);
+ return joinOp;
+ }
+ else if (modifier.equalsIgnoreCase("skewed")) {
+ LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.SKEWED);
+ joinOp.pinOption(LOJoin.OPTION_JOIN);
+ return joinOp;
+ }
+ else if (modifier.equalsIgnoreCase("merge")) {
+ if(isOuter) {
+ throw new ParseException("Merge join does not support (left|right|full) outer joins");
+ }
+ LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.MERGE);
+ joinOp.pinOption(LOJoin.OPTION_JOIN);
+ return joinOp;
+ }
+ else{
+ throw new ParseException("Only REPL, REPLICATED, HASH, SKEWED and MERGE are vaild JOIN modifiers.");
+ }
+ }
+
+ void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws ParseException{
Boolean isAtomic = null;
if ( spec instanceof LOConst ||
(spec instanceof LOUserFunc &&
@@ -1713,29 +1773,22 @@
ArrayList<CogroupInput> gis = new ArrayList<CogroupInput>();
LogicalOperator cogroup = null;
log.trace("Entering CoGroupClause");
+ Token t;
}
{
-
(gi = GroupItem(lp) { gis.add(gi); }
("," gi = GroupItem(lp) { gis.add(gi); })*
- (
- [<USING> ("\"collected\"" {
- if (gis.size() != 1) {
- throw new ParseException("Collected group is only supported for single input");
- }
- if (!isColumnProjectionsOrStar(gis.get(0))) {
- throw new ParseException("Collected group is only supported for columns or star projection");
- }
- cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.COLLECTED);
- cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
- }
- |"\"regular\"" {
- cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.REGULAR);
- cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
- }
- )
- ]
- )
+ ([ <USING> (
+ (t = < QUOTEDSTRING> { cogroup = parseUsingForGroupBy(unquote (t.image), gis, lp); })
+ |("\"collected\"") {
+ log.info("[WARN] Use of double-quoted string to specify hint is deprecated. Please specify hint in single quotes.");
+ cogroup = parseUsingForGroupBy("collected", gis, lp);
+ }
+ |("\"regular\"") {
+ log.info("[WARN] Use of double-quoted string to specify hint is deprecated. Please specify hint in single quotes.");
+ cogroup = parseUsingForGroupBy("regular", gis, lp);
+ }
+ )])
)
{
@@ -2033,6 +2086,7 @@
boolean isRightOuter = false;
boolean isFullOuter = false;
boolean isOuter = false;
+ Token t;
}
{
(gi = JoinItem(lp) { gis.add(gi); }
@@ -2081,43 +2135,25 @@
}
// For all types of join we create LOJoin and mark what type of join it is.
- (
- [<USING> ("\"replicated\"" {
- if(isFullOuter || isRightOuter) {
- throw new ParseException("Replicated join does not support (right|full) outer joins");
- }
- joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED);
- joinOp.pinOption(LOJoin.OPTION_JOIN);
- }
- | "\"repl\"" {
- if(isFullOuter || isRightOuter) {
- throw new ParseException("Replicated join does not support (right|full) outer joins");
- }
- joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED);
- joinOp.pinOption(LOJoin.OPTION_JOIN);
- }
- |"\"skewed\"" {
- joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.SKEWED);
- joinOp.pinOption(LOJoin.OPTION_JOIN);
+ ([<USING> (
+ (t = <QUOTEDSTRING> { joinOp = parseUsingForJoin(unquote(t.image), gis, lp, isFullOuter, isRightOuter, isOuter);})
+ | ("\"repl\"" | "\"replicated\"") {
+ log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes.");
+ joinOp = parseUsingForJoin("replicated", gis, lp, isFullOuter, isRightOuter, isOuter);
+ }
+ | ("\"skewed\"") {
+ log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes.");
+ joinOp = parseUsingForJoin("skewed", gis, lp, isFullOuter, isRightOuter, isOuter);
}
- |"\"merge\"" {
- if(isOuter) {
- throw new ParseException("Merge join does not support (left|right|full) outer joins");
- }
- joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.MERGE);
- joinOp.pinOption(LOJoin.OPTION_JOIN);
- }
- |"\"hash\"" {
- joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH);
- joinOp.pinOption(LOJoin.OPTION_JOIN);
+ | ("\"merge\"") {
+ log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes.");
+ joinOp = parseUsingForJoin("merge", gis, lp, isFullOuter, isRightOuter, isOuter);
+ }
+ | ("\"hash\"" | "\"default\"") {
+ log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes.");
+ joinOp = parseUsingForJoin("hash", gis, lp, isFullOuter, isRightOuter, isOuter);
}
- |"\"default\"" {
- joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH);
- joinOp.pinOption(LOJoin.OPTION_JOIN);
- })
- ]
- )
- )
+ )]))
{log.trace("Exiting JoinClause");
if (joinOp!=null) {
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java Sun Feb 7 18:45:36 2010
@@ -32,9 +32,12 @@
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
+import org.apache.pig.test.utils.LogicalPlanTester;
import org.apache.pig.test.utils.TestHelper;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCollectedGroup;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import org.apache.pig.impl.logicalLayer.LOCogroup;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.plan.OperatorKey;
import org.junit.After;
import org.junit.Before;
@@ -76,6 +79,22 @@
Util.deleteFile(cluster, INPUT_FILE);
}
+ public void testCollectedGrpSpecifiedInSingleQuotes1(){
+
+ LogicalPlanTester lpt = new LogicalPlanTester();
+ lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);");
+ LogicalPlan lp = lpt.buildPlan("B = group A by id using 'collected';");
+ assertEquals(LOCogroup.GROUPTYPE.COLLECTED, ((LOCogroup)lp.getLeaves().get(0)).getGroupType());
+ }
+
+ public void testCollectedGrpSpecifiedInSingleQuotes2(){
+
+ LogicalPlanTester lpt = new LogicalPlanTester();
+ lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);");
+ LogicalPlan lp = lpt.buildPlan("B = group A all using 'regular';");
+ assertEquals(LOCogroup.GROUPTYPE.REGULAR, ((LOCogroup)lp.getLeaves().get(0)).getGroupType());
+ }
+
public void testPOMapsideGroupNoNullPlans() throws IOException {
POCollectedGroup pmg = new POCollectedGroup(new OperatorKey());
List<PhysicalPlan> plans = pmg.getPlans();
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Sun Feb 7 18:45:36 2010
@@ -32,10 +32,12 @@
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.impl.logicalLayer.LOJoin;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
+import org.apache.pig.impl.logicalLayer.LOJoin.JOINTYPE;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.LogUtils;
-import org.apache.pig.test.utils.Identity;
import org.apache.pig.test.utils.LogicalPlanTester;
import org.junit.Before;
import org.junit.Test;
@@ -95,7 +97,6 @@
}
}
-
@Test
public void testJoinUnkownSchema() throws Exception {
// If any of the input schema is unknown, the resulting schema should be unknown as well
@@ -109,7 +110,7 @@
assertTrue(schema == null);
}
}
-
+
@Test
public void testDefaultJoin() throws IOException, ParseException {
for (ExecType execType : execTypes) {
@@ -553,5 +554,54 @@
deleteInputFile(execType, secondInput);
}
}
-
+
+ @Test
+ public void testLiteralsForJoinAlgoSpecification1() {
+
+ LogicalPlanTester lpt = new LogicalPlanTester();
+ lpt.buildPlan("a = load 'A'; ");
+ lpt.buildPlan("b = load 'B'; ");
+ LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'merge'; ");
+ assertEquals(JOINTYPE.MERGE, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+ }
+
+ @Test
+ public void testLiteralsForJoinAlgoSpecification2() {
+
+ LogicalPlanTester lpt = new LogicalPlanTester();
+ lpt.buildPlan("a = load 'A'; ");
+ lpt.buildPlan("b = load 'B'; ");
+ LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'hash'; ");
+ assertEquals(JOINTYPE.HASH, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+ }
+
+ @Test
+ public void testLiteralsForJoinAlgoSpecification5() {
+
+ LogicalPlanTester lpt = new LogicalPlanTester();
+ lpt.buildPlan("a = load 'A'; ");
+ lpt.buildPlan("b = load 'B'; ");
+ LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'default'; ");
+ assertEquals(JOINTYPE.HASH, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+ }
+
+ @Test
+ public void testLiteralsForJoinAlgoSpecification3() {
+
+ LogicalPlanTester lpt = new LogicalPlanTester();
+ lpt.buildPlan("a = load 'A'; ");
+ lpt.buildPlan("b = load 'B'; ");
+ LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'repl'; ");
+ assertEquals(JOINTYPE.REPLICATED, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+ }
+
+ @Test
+ public void testLiteralsForJoinAlgoSpecification4() {
+
+ LogicalPlanTester lpt = new LogicalPlanTester();
+ lpt.buildPlan("a = load 'A'; ");
+ lpt.buildPlan("b = load 'B'; ");
+ LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'replicated'; ");
+ assertEquals(JOINTYPE.REPLICATED, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+ }
}