You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by th...@apache.org on 2011/10/12 01:23:21 UTC

svn commit: r1182143 - in /pig/trunk: CHANGES.txt src/org/apache/pig/Main.java src/org/apache/pig/newplan/logical/rules/LogicalExpressionSimplifier.java test/org/apache/pig/test/TestFilterSimplification.java

Author: thejas
Date: Tue Oct 11 23:23:20 2011
New Revision: 1182143

URL: http://svn.apache.org/viewvc?rev=1182143&view=rev
Log:
PIG-2316: Incorrect results for FILTER *** BY ( *** OR ***) with
 FilterLogicExpressionSimplifier optimizer turned on (knoguchi via thejas)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/Main.java
    pig/trunk/src/org/apache/pig/newplan/logical/rules/LogicalExpressionSimplifier.java
    pig/trunk/test/org/apache/pig/test/TestFilterSimplification.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1182143&r1=1182142&r2=1182143&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Oct 11 23:23:20 2011
@@ -139,6 +139,9 @@ PIG-2228: support partial aggregation in
 
 BUG FIXES
 
+PIG-2316: Incorrect results for FILTER *** BY ( *** OR ***) with
+ FilterLogicExpressionSimplifier optimizer turned on (knoguchi via thejas)
+
 PIG-2271: PIG regression in BinStorage/PigStorage in 0.9.1 (thejas)
 
 PIG-2309: Keyword 'NOT' is wrongly treated as a UDF in split statement (vivekp via thejas)

Modified: pig/trunk/src/org/apache/pig/Main.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/Main.java?rev=1182143&r1=1182142&r2=1182143&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/Main.java (original)
+++ pig/trunk/src/org/apache/pig/Main.java Tue Oct 11 23:23:20 2011
@@ -91,9 +91,12 @@ public class Main {
     private static final String BRIEF = "brief";
     private static final String DEBUG = "debug";
     private static final String VERBOSE = "verbose";
-    
+
     private enum ExecMode {STRING, FILE, SHELL, UNKNOWN}
-                
+
+    private static final String PROP_FILT_SIMPL_OPT 
+        = "pig.exec.filterLogicExpressionSimplifier";
+    
 /**
  * The Main-Class for the Pig Jar that will provide a shell and setup a classpath appropriate
  * for executing Jar files.  Warning, this method calls System.exit().
@@ -349,6 +352,11 @@ static int run(String args[], PigProgres
             log.info("Logging error messages to: " + logFileName);
         }
         
+        if( ! Boolean.valueOf(properties.getProperty(PROP_FILT_SIMPL_OPT, "false"))){
+            //turn off if the user has not explicitly turned on this optimization 
+            optimizerRules.add("FilterLogicExpressionSimplifier");
+        }
+        
         if(optimizerRules.size() > 0) {
             pigContext.getProperties().setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules));
         }
@@ -757,7 +765,6 @@ public static void usage()
         System.out.println("    -p, -param - Key value pair of the form param=val");
         System.out.println("    -r, -dryrun - Produces script with substituted parameters. Script is not executed.");
         System.out.println("    -t, -optimizer_off - Turn optimizations off. The following values are supported:");
-        System.out.println("            FilterLogicExpressionSimplifier - Simplify filter expressions");
         System.out.println("            SplitFilter - Split filter conditions");
         System.out.println("            PushUpFilter - Filter as early as possible");
         System.out.println("            MergeFilter - Merge filter conditions");
@@ -768,7 +775,7 @@ public static void usage()
         System.out.println("            MergeForEach - Merge adjacent ForEach");
         System.out.println("            GroupByConstParallelSetter - Force parallel 1 for \"group all\" statement");
         System.out.println("            All - Disable all optimizations");
-        System.out.println("        All optimizations are enabled by default. Optimization values are case insensitive.");
+        System.out.println("        All optimizations listed here are enabled by default. Optimization values are case insensitive.");
         System.out.println("    -v, -verbose - Print all error messages to screen");
         System.out.println("    -w, -warning - Turn warning logging on; also turns warning aggregation off");
         System.out.println("    -x, -exectype - Set execution mode: local|mapreduce, default is mapreduce.");
@@ -806,6 +813,8 @@ public static void printProperties(){
         System.out.println("        pig.exec.mapPartAgg.minReduction=<min aggregation factor>. Default is 10.");
         System.out.println("            If the in-map partial aggregation does not reduce the output num records");
         System.out.println("            by this factor, it gets disabled.");        
+        System.out.println("        " + PROP_FILT_SIMPL_OPT + "=true|false; Default is false.");
+        System.out.println("            Enable optimizer rules to simplify filter expressions.");
         System.out.println("    Miscellaneous:");
         System.out.println("        exectype=mapreduce|local; default is mapreduce. This property is the same as -x switch");
         System.out.println("        pig.additional.jars=<comma seperated list of jars>. Used in place of register command.");

Modified: pig/trunk/src/org/apache/pig/newplan/logical/rules/LogicalExpressionSimplifier.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/rules/LogicalExpressionSimplifier.java?rev=1182143&r1=1182142&r2=1182143&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/rules/LogicalExpressionSimplifier.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/rules/LogicalExpressionSimplifier.java Tue Oct 11 23:23:20 2011
@@ -626,19 +626,19 @@ public class LogicalExpressionSimplifier
             boolean comparable1 = v1 instanceof Comparable, comparable2 = v2 instanceof Comparable;
             boolean isEqual1 = e1 instanceof EqualExpression, isEqual2 = e2 instanceof EqualExpression, isNotEqual1 = e1 instanceof NotEqualExpression, isNotEqual2 = e2 instanceof NotEqualExpression, isGT1 = e1 instanceof GreaterThanExpression, isGT2 = e2 instanceof GreaterThanExpression, isGE1 = e1 instanceof GreaterThanEqualExpression, isGE2 = e2 instanceof GreaterThanEqualExpression, isLT1 = e1 instanceof LessThanExpression, isLT2 = e2 instanceof LessThanExpression, isLE1 = e1 instanceof LessThanEqualExpression, isLE2 = e2 instanceof LessThanEqualExpression;
             if (isEqual1 && isEqual2) {
-                if (val1.equals(val2)) return Equal | ImplyLeft | ImplyRight;
+                if (v1.equals(v2)) return Equal | ImplyLeft | ImplyRight;
                 else return Exclusive;
             }
             else if (isEqual1 && isNotEqual2) {
-                if (val1.equals(val2)) return Exclusive;
+                if (v1.equals(v2)) return Exclusive;
                 else return ImplyRight;
             }
             else if (isNotEqual1 && isEqual2) {
-                if (val1.equals(v2)) return Exclusive;
+                if (v1.equals(v2)) return Exclusive;
                 else return ImplyLeft;
             }
             else if (isNotEqual1 && isNotEqual2) {
-                if (val1.equals(v2)) return Equal | ImplyLeft | ImplyRight;
+                if (v1.equals(v2)) return Equal | ImplyLeft | ImplyRight;
                 else return Unknown;
             }
             else if (isEqual1 && isGT2) {

Modified: pig/trunk/test/org/apache/pig/test/TestFilterSimplification.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestFilterSimplification.java?rev=1182143&r1=1182142&r2=1182143&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestFilterSimplification.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestFilterSimplification.java Tue Oct 11 23:23:20 2011
@@ -795,7 +795,46 @@ public class TestFilterSimplification ex
 
         assertTrue(expected.isEqual(newLogicalPlan));
     }
-    
+
+    // PIG-2316
+    @Test
+    public void testEqualNotEqualWithSameValue() throws Exception {
+        String query = "b = filter (load 'd.txt' as (a0:int, a1:int)) "
+                       + "by ((a0 == 1) or (a0 != 1));"
+                       + "store b into 'empty';";
+        LogicalPlan newLogicalPlan = Util.buildLp(pigServer, query);;
+        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 10);
+        optimizer.optimize();
+
+        //expected plan is same as original plan
+        LogicalPlan expected = Util.buildLp(pigServer, query);;
+        assertTrue(expected.isEqual(newLogicalPlan));
+
+        //swapping == and !=
+        query = "b = filter (load 'd.txt' as (a0:int, a1:int)) "
+                       + "by ((a0 != 1) or (a0 == 1));"
+                       + "store b into 'empty';";
+        newLogicalPlan = Util.buildLp(pigServer, query);;
+        optimizer = new MyPlanOptimizer(newLogicalPlan, 10);
+        optimizer.optimize();
+
+        //expected plan is same as original plan
+        expected = Util.buildLp(pigServer, query);;
+        assertTrue(expected.isEqual(newLogicalPlan));
+
+        //more realistic test case which created incorrect output
+        query = "b = filter (load 'd.txt' as (a0:int, a1:int)) "
+                       + "by ((a0 == 1 and a1 == 3) or (a0 != 1));"
+                       + "store b into 'empty';";
+        newLogicalPlan = Util.buildLp(pigServer, query);;
+        optimizer = new MyPlanOptimizer(newLogicalPlan, 10);
+        optimizer.optimize();
+
+        //expected plan is same as original plan
+        expected = Util.buildLp(pigServer, query);;
+        assertTrue(expected.isEqual(newLogicalPlan));
+    }
+
     public class MyPlanOptimizer extends LogicalPlanOptimizer {
 
         protected MyPlanOptimizer(OperatorPlan p, int iterations) {