You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2010/02/07 19:45:36 UTC

svn commit: r907463 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt test/org/apache/pig/test/TestCollectedGroup.java test/org/apache/pig/test/TestJoin.java

Author: hashutosh
Date: Sun Feb  7 18:45:36 2010
New Revision: 907463

URL: http://svn.apache.org/viewvc?rev=907463&view=rev
Log:
PIG-1046: join algorithm specification is within double quotes

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sun Feb  7 18:45:36 2010
@@ -24,6 +24,8 @@
 
 IMPROVEMENTS
 
+PIG-1046: join algorithm specification is within double quotes (ashutoshc)
+
 PIG-1209: Port POJoinPackage to proactively spill (ashutoshc)
 
 PIG-1190: Handling of quoted strings in pig-latin/grunt commands (ashutoshc)

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Sun Feb  7 18:45:36 2010
@@ -320,6 +320,31 @@
 		return cogroup;
 	}
 	
+    private LogicalOperator parseUsingForGroupBy(String modifier, ArrayList<CogroupInput> gis, LogicalPlan lp) throws ParseException, PlanException{
+
+      if(modifier.equalsIgnoreCase("collected")){
+            if (gis.size() != 1) {
+                throw new ParseException("Collected group is only supported for single input");  
+                }
+            if (!isColumnProjectionsOrStar(gis.get(0))) {
+                throw new ParseException("Collected group is only supported for columns or star projection");
+                }
+            LogicalOperator cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.COLLECTED);
+            cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
+            return cogroup;
+        }
+
+        else if (modifier.equalsIgnoreCase("regular")){
+            LogicalOperator cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.REGULAR);
+            cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
+            return cogroup;
+        }
+
+        else{
+            throw new ParseException("Only COLLECTED or REGULAR are valid GROUP modifiers.");
+        }
+    }
+    
 	/**
 	 * Join parser. 
 	 */
@@ -435,7 +460,42 @@
 		return foreach;
 	}
 
-	void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws ParseException{
+    private LogicalOperator parseUsingForJoin(String modifier, ArrayList<CogroupInput> gis,
+                LogicalPlan lp, boolean isFullOuter, boolean isRightOuter, boolean isOuter) throws
+                ParseException, PlanException{
+
+              if (modifier.equalsIgnoreCase("repl") || modifier.equalsIgnoreCase("replicated")) {
+              if(isFullOuter || isRightOuter) {
+                  throw new ParseException("Replicated join does not support (right|full) outer joins");
+              }
+                    LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED); 
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp; 
+            }
+             else if (modifier.equalsIgnoreCase("hash") || modifier.equalsIgnoreCase("default")) {
+                    LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH);
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp;
+            }
+            else if (modifier.equalsIgnoreCase("skewed")) {
+                    LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.SKEWED);
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp;
+            }
+             else if (modifier.equalsIgnoreCase("merge")) {
+                 if(isOuter) {
+                        throw new ParseException("Merge join does not support (left|right|full) outer joins");
+                    }
+                    LogicalOperator joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.MERGE);
+                    joinOp.pinOption(LOJoin.OPTION_JOIN);
+                    return joinOp; 
+            }
+            else{
+                    throw new ParseException("Only REPL, REPLICATED, HASH, SKEWED and MERGE are vaild JOIN modifiers.");
+            }
+    }
+
+    void assertAtomic(LogicalOperator spec, boolean desiredAtomic) throws ParseException{
 		Boolean isAtomic = null;
 		if ( spec instanceof LOConst || 
 			(spec instanceof LOUserFunc &&
@@ -1713,29 +1773,22 @@
     ArrayList<CogroupInput> gis = new ArrayList<CogroupInput>(); 
     LogicalOperator cogroup = null; 
     log.trace("Entering CoGroupClause");
+    Token t;
 }
 {
-
     (gi = GroupItem(lp) { gis.add(gi); }
         ("," gi = GroupItem(lp) { gis.add(gi); })*
-        (
-            [<USING> ("\"collected\"" { 
-                if (gis.size() != 1) {
-                    throw new ParseException("Collected group is only supported for single input");  
-                }
-                if (!isColumnProjectionsOrStar(gis.get(0))) {
-                    throw new ParseException("Collected group is only supported for columns or star projection");
-                }
-                cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.COLLECTED);
-                cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
-                }
-                |"\"regular\"" {
-                    cogroup = parseCogroup(gis, lp, LOCogroup.GROUPTYPE.REGULAR);
-                    cogroup.pinOption(LOCogroup.OPTION_GROUPTYPE);
-                }
-                )
-            ]                                                                        
-        )
+        ([ <USING> (
+          (t = < QUOTEDSTRING> { cogroup = parseUsingForGroupBy(unquote (t.image), gis, lp); })
+         |("\"collected\"") {
+            log.info("[WARN] Use of double-quoted string to specify hint is deprecated. Please specify hint in single quotes."); 
+            cogroup = parseUsingForGroupBy("collected", gis, lp);
+            }
+         |("\"regular\"") {
+            log.info("[WARN] Use of double-quoted string to specify hint is deprecated. Please specify hint in single quotes."); 
+            cogroup = parseUsingForGroupBy("regular", gis, lp);
+            }
+        )])
     )
 
     {
@@ -2033,6 +2086,7 @@
 	boolean isRightOuter = false;
 	boolean isFullOuter = false;
 	boolean isOuter = false;
+	Token t;
 }
 {
 	(gi = JoinItem(lp) { gis.add(gi); }
@@ -2081,43 +2135,25 @@
 		
 	}
 	// For all types of join we create LOJoin and mark what type of join it is.
-	(
-		[<USING> ("\"replicated\"" { 
-	          if(isFullOuter || isRightOuter) {
-	              throw new ParseException("Replicated join does not support (right|full) outer joins");
-	          }
-				    joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED); 
-				    joinOp.pinOption(LOJoin.OPTION_JOIN); 
-			    }
-			| "\"repl\"" {
-                  if(isFullOuter || isRightOuter) {
-	                    throw new ParseException("Replicated join does not support (right|full) outer joins");
-	          }
-				    joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.REPLICATED);
-				    joinOp.pinOption(LOJoin.OPTION_JOIN);
-                  }
-		    |"\"skewed\"" {
-		    	    joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.SKEWED);
-		    	    joinOp.pinOption(LOJoin.OPTION_JOIN);
+	([<USING> (
+		  (t = <QUOTEDSTRING> { joinOp = parseUsingForJoin(unquote(t.image), gis, lp, isFullOuter, isRightOuter, isOuter);})
+        | ("\"repl\"" | "\"replicated\"")  {
+		      log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); 
+              joinOp = parseUsingForJoin("replicated", gis, lp, isFullOuter, isRightOuter, isOuter);
+		  }
+	    | ("\"skewed\"") {
+              log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); 
+              joinOp = parseUsingForJoin("skewed", gis, lp, isFullOuter, isRightOuter, isOuter);
 		    	}
-		    |"\"merge\"" { 
-		    	    if(isOuter) {
-                        throw new ParseException("Merge join does not support (left|right|full) outer joins");
-                    }
-		    	    joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.MERGE);
-		    	    joinOp.pinOption(LOJoin.OPTION_JOIN); 
-		    	}
-		    |"\"hash\"" {
-		    		joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH);
-		    		joinOp.pinOption(LOJoin.OPTION_JOIN);
+		| ("\"merge\"") { 
+            log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); 
+            joinOp = parseUsingForJoin("merge", gis, lp, isFullOuter, isRightOuter, isOuter);
+        	}
+	    | ("\"hash\"" | "\"default\"") {
+		    log.info("[WARN] Use of double-quotes for specifying join algorithm is deprecated. Please use single quotes."); 
+            joinOp = parseUsingForJoin("hash", gis, lp, isFullOuter, isRightOuter, isOuter);
 		    	}
-		    |"\"default\"" {
-		    		joinOp = parseJoin(gis, lp, LOJoin.JOINTYPE.HASH);
-		    		joinOp.pinOption(LOJoin.OPTION_JOIN);
-		    	})
-	    ] 
-    )
-    )
+     )]))
 
 	{log.trace("Exiting JoinClause");
 	if (joinOp!=null) {

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestCollectedGroup.java Sun Feb  7 18:45:36 2010
@@ -32,9 +32,12 @@
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
+import org.apache.pig.test.utils.LogicalPlanTester;
 import org.apache.pig.test.utils.TestHelper;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCollectedGroup;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import org.apache.pig.impl.logicalLayer.LOCogroup;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
 import org.apache.pig.impl.plan.OperatorKey;
 import org.junit.After;
 import org.junit.Before;
@@ -76,6 +79,22 @@
         Util.deleteFile(cluster, INPUT_FILE);
     }
     
+    public void testCollectedGrpSpecifiedInSingleQuotes1(){
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);");
+        LogicalPlan lp = lpt.buildPlan("B = group A by id using 'collected';");
+        assertEquals(LOCogroup.GROUPTYPE.COLLECTED, ((LOCogroup)lp.getLeaves().get(0)).getGroupType());
+    }
+    
+    public void testCollectedGrpSpecifiedInSingleQuotes2(){
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("A = LOAD '" + INPUT_FILE + "' as (id, name, grade);");
+        LogicalPlan lp = lpt.buildPlan("B = group A all using 'regular';");
+        assertEquals(LOCogroup.GROUPTYPE.REGULAR, ((LOCogroup)lp.getLeaves().get(0)).getGroupType());
+    }
+    
     public void testPOMapsideGroupNoNullPlans() throws IOException {
         POCollectedGroup pmg = new POCollectedGroup(new OperatorKey());
         List<PhysicalPlan> plans = pmg.getPlans();

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=907463&r1=907462&r2=907463&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Sun Feb  7 18:45:36 2010
@@ -32,10 +32,12 @@
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.impl.logicalLayer.LOJoin;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
+import org.apache.pig.impl.logicalLayer.LOJoin.JOINTYPE;
 import org.apache.pig.impl.logicalLayer.parser.ParseException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.util.LogUtils;
-import org.apache.pig.test.utils.Identity;
 import org.apache.pig.test.utils.LogicalPlanTester;
 import org.junit.Before;
 import org.junit.Test;
@@ -95,7 +97,6 @@
         }
     }
 
-    
     @Test
     public void testJoinUnkownSchema() throws Exception {
         // If any of the input schema is unknown, the resulting schema should be unknown as well
@@ -109,7 +110,7 @@
             assertTrue(schema == null);
         }
     }
-    
+
     @Test
     public void testDefaultJoin() throws IOException, ParseException {
         for (ExecType execType : execTypes) {
@@ -553,5 +554,54 @@
             deleteInputFile(execType, secondInput);
         }
     }
-
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification1() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'merge'; ");
+        assertEquals(JOINTYPE.MERGE, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification2() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'hash'; ");
+        assertEquals(JOINTYPE.HASH, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification5() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'default'; ");
+        assertEquals(JOINTYPE.HASH, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification3() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'repl'; ");
+        assertEquals(JOINTYPE.REPLICATED, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
+    
+    @Test
+    public void testLiteralsForJoinAlgoSpecification4() {
+        
+        LogicalPlanTester lpt = new LogicalPlanTester();
+        lpt.buildPlan("a = load 'A'; ");
+        lpt.buildPlan("b = load 'B'; ");
+        LogicalPlan lp = lpt.buildPlan("c = Join a by $0, b by $0 using 'replicated'; ");
+        assertEquals(JOINTYPE.REPLICATED, ((LOJoin)lp.getLeaves().get(0)).getJoinType());
+    }
 }