You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by bi...@apache.org on 2013/05/17 07:54:12 UTC

svn commit: r1483659 - in /pig/trunk: ./ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/ src/org/apache/pig/newplan/logical/optimizer/ test/org/apache/pig/test/

Author: billgraham
Date: Fri May 17 05:54:11 2013
New Revision: 1483659

URL: http://svn.apache.org/r1483659
Log:
PIG-3317: disable optimizations via pig properties (traviscrawford via billgraham)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
    pig/trunk/src/org/apache/pig/Main.java
    pig/trunk/src/org/apache/pig/PigConstants.java
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
    pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java
    pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri May 17 05:54:11 2013
@@ -28,6 +28,8 @@ PIG-3174:  Remove rpm and deb artifacts 
 
 IMPROVEMENTS
 
+PIG-3317: disable optimizations via pig properties (traviscrawford via billgraham)
+
 PIG-3321: AVRO: Support user specified schema on load (harveyc via rohini)
 
 PIG-2959: Add a pig.cmd for Pig to run under Windows (daijy)

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml Fri May 17 05:54:11 2013
@@ -477,14 +477,24 @@ STORE Gtab INTO '/user/vxj/finalresult2'
 <section id="optimization-rules">
 <title>Optimization Rules</title>
 
-<p>Pig supports various optimization rules. By default optimization, and all optimization rules, are turned on. 
-To turn off optimiztion, use:</p>
+<p>Pig supports various optimization rules, all of which are enabled by default.
+To disable all or specific optimizations, use one or more of the following methods.
+Note some optimization rules are mandatory and cannot be disabled.</p>
+
+    <ul>
+        <li>The <code>pig.optimizer.rules.disabled</code>
+            <a href="start.html#properties">pig property</a>, which accepts a
+            comma-separated list of optimization rules to disable; the <code>all</code>
+            keyword disables all non-mandatory optimizations.
+            (e.g.: <code>set pig.optimizer.rules.disabled 'ColumnMapKeyPrune';</code>)</li>
+        <li>The <code>-t, -optimizer_off</code> command-line options.
+            (e.g.: <code>pig -optimizer_off [opt_rule | all]</code>)</li>
+    </ul>
+
+<p><code>FilterLogicExpressionSimplifier</code> is an exception to the above.
+The rule is disabled by default, and enabled by setting the
+<code>pig.exec.filterLogicExpressionSimplifier</code> pig property to true.</p>
 
-<source>
-pig -optimizer_off [opt_rule | all ]
-</source>
-
-<p>Note that some rules are mandatory and cannot be turned off.</p>
 
 <!-- +++++++++++++++++++++++++++++++ -->
 <section id="FilterLogicExpressionSimplifier">

Modified: pig/trunk/src/org/apache/pig/Main.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/Main.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/Main.java (original)
+++ pig/trunk/src/org/apache/pig/Main.java Fri May 17 05:54:11 2013
@@ -61,6 +61,7 @@ import org.apache.pig.backend.hadoop.dat
 import org.apache.pig.classification.InterfaceAudience;
 import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.PigImplConstants;
 import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.util.JarManager;
 import org.apache.pig.impl.util.LogUtils;
@@ -186,7 +187,7 @@ static int run(String args[], PigProgres
         boolean embedded = false;
         List<String> params = new ArrayList<String>();
         List<String> paramFiles = new ArrayList<String>();
-        HashSet<String> optimizerRules = new HashSet<String>();
+        HashSet<String> disabledOptimizerRules = new HashSet<String>();
 
         CmdLineParser opts = new CmdLineParser(pigArgs);
         opts.registerOpt('4', "log4jconf", CmdLineParser.ValueExpected.REQUIRED);
@@ -316,7 +317,7 @@ static int run(String args[], PigProgres
                 break;
 
             case 't':
-            	optimizerRules.add(opts.getValStr());
+                disabledOptimizerRules.add(opts.getValStr());
                 break;
 
             case 'v':
@@ -391,12 +392,11 @@ static int run(String args[], PigProgres
 
         if( ! Boolean.valueOf(properties.getProperty(PROP_FILT_SIMPL_OPT, "false"))){
             //turn off if the user has not explicitly turned on this optimization
-            optimizerRules.add("FilterLogicExpressionSimplifier");
+            disabledOptimizerRules.add("FilterLogicExpressionSimplifier");
         }
 
-        if(optimizerRules.size() > 0) {
-            pigContext.getProperties().setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules));
-        }
+        pigContext.getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
+                ObjectSerializer.serialize(disabledOptimizerRules));
 
         PigContext.setClassLoader(pigContext.createCl(null));
 

Modified: pig/trunk/src/org/apache/pig/PigConstants.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConstants.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/PigConstants.java (original)
+++ pig/trunk/src/org/apache/pig/PigConstants.java Fri May 17 05:54:11 2013
@@ -18,8 +18,13 @@
 
 package org.apache.pig;
 
+import org.apache.pig.classification.InterfaceAudience;
+
+@InterfaceAudience.Public
 public class PigConstants {
-    private PigConstants() {}
+    private PigConstants() {
+        throw new IllegalStateException();
+    }
 
     /**
      * This key is used in the job conf to let the various jobs know what code was
@@ -35,4 +40,10 @@ public class PigConstants {
 
     // This makes it easy to turn SchemaTuple on globally.
     public static final boolean SCHEMA_TUPLE_ON_BY_DEFAULT = false;
+
+    /**
+     * {@value} is a comma-separated list of optimizer rules to disable;
+     * by default, all rules are enabled.
+     */
+    public static final String PIG_OPTIMIZER_RULES_DISABLED_KEY = "pig.optimizer.rules.disabled";
 }
\ No newline at end of file

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java Fri May 17 05:54:11 2013
@@ -30,12 +30,15 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Properties;
 
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.pig.ExecType;
+import org.apache.pig.PigConstants;
 import org.apache.pig.PigException;
 import org.apache.pig.backend.datastorage.DataStorage;
 import org.apache.pig.backend.executionengine.ExecException;
@@ -44,6 +47,7 @@ import org.apache.pig.backend.hadoop.dat
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
 import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.PigImplConstants;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.plan.OperatorKey;
 import org.apache.pig.impl.util.ObjectSerializer;
@@ -246,38 +250,45 @@ public class HExecutionEngine {
         SchemaResetter schemaResetter = new SchemaResetter( plan, true /*skip duplicate uid check*/ );
         schemaResetter.visit();
         
-        HashSet<String> optimizerRules = null;
+        HashSet<String> disabledOptimizerRules;
         try {
-            optimizerRules = (HashSet<String>) ObjectSerializer
+            disabledOptimizerRules = (HashSet<String>) ObjectSerializer
                     .deserialize(pigContext.getProperties().getProperty(
-                            "pig.optimizer.rules"));
+                            PigImplConstants.PIG_OPTIMIZER_RULES_KEY));
         } catch (IOException ioe) {
             int errCode = 2110;
             String msg = "Unable to deserialize optimizer rules.";
             throw new FrontendException(msg, errCode, PigException.BUG, ioe);
         }
-        
+        if (disabledOptimizerRules == null) {
+            disabledOptimizerRules = new HashSet<String>();
+        }
+
+        String pigOptimizerRulesDisabled = this.pigContext.getProperties().getProperty(
+                PigConstants.PIG_OPTIMIZER_RULES_DISABLED_KEY);
+        if (pigOptimizerRulesDisabled != null) {
+            disabledOptimizerRules.addAll(Lists.newArrayList((Splitter.on(",").split(
+                    pigOptimizerRulesDisabled))));
+        }
+
         if (pigContext.inIllustrator) {
-            // disable MergeForEach in illustrator
-            if (optimizerRules == null)
-                optimizerRules = new HashSet<String>();
-            optimizerRules.add("MergeForEach");
-            optimizerRules.add("PartitionFilterOptimizer");
-            optimizerRules.add("LimitOptimizer");
-            optimizerRules.add("SplitFilter");
-            optimizerRules.add("PushUpFilter");
-            optimizerRules.add("MergeFilter");
-            optimizerRules.add("PushDownForEachFlatten");
-            optimizerRules.add("ColumnMapKeyPrune");
-            optimizerRules.add("AddForEach");
-            optimizerRules.add("GroupByConstParallelSetter");
+            disabledOptimizerRules.add("MergeForEach");
+            disabledOptimizerRules.add("PartitionFilterOptimizer");
+            disabledOptimizerRules.add("LimitOptimizer");
+            disabledOptimizerRules.add("SplitFilter");
+            disabledOptimizerRules.add("PushUpFilter");
+            disabledOptimizerRules.add("MergeFilter");
+            disabledOptimizerRules.add("PushDownForEachFlatten");
+            disabledOptimizerRules.add("ColumnMapKeyPrune");
+            disabledOptimizerRules.add("AddForEach");
+            disabledOptimizerRules.add("GroupByConstParallelSetter");
         }
 
         StoreAliasSetter storeAliasSetter = new StoreAliasSetter( plan );
         storeAliasSetter.visit();
         
         // run optimizer
-        LogicalPlanOptimizer optimizer = new LogicalPlanOptimizer( plan, 100, optimizerRules );
+        LogicalPlanOptimizer optimizer = new LogicalPlanOptimizer(plan, 100, disabledOptimizerRules);
         optimizer.optimize();
         
         // compute whether output data is sorted or not

Modified: pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java Fri May 17 05:54:11 2013
@@ -22,6 +22,12 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import com.google.common.base.Preconditions;
+import com.google.common.collect.SetMultimap;
+import com.google.common.collect.TreeMultimap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.pig.newplan.OperatorPlan;
 import org.apache.pig.newplan.logical.rules.AddForEach;
 import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
@@ -29,7 +35,6 @@ import org.apache.pig.newplan.logical.ru
 import org.apache.pig.newplan.logical.rules.FilterAboveForeach;
 import org.apache.pig.newplan.logical.rules.GroupByConstParallelSetter;
 import org.apache.pig.newplan.logical.rules.ImplicitSplitInserter;
-import org.apache.pig.newplan.logical.rules.InputOutputFileValidator;
 import org.apache.pig.newplan.logical.rules.LimitOptimizer;
 import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter;
 import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier;
@@ -44,12 +49,28 @@ import org.apache.pig.newplan.optimizer.
 import org.apache.pig.newplan.optimizer.Rule;
 
 public class LogicalPlanOptimizer extends PlanOptimizer {
+    private static final Log LOG = LogFactory.getLog(LogicalPlanOptimizer.class);
+    private static enum RulesReportKey { RULES_ENABLED, RULES_DISABLED }
     private Set<String> mRulesOff = null;
-    
-    public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) {    	
+    private boolean allRulesDisabled = false;
+    private SetMultimap<RulesReportKey, String> rulesReport = TreeMultimap.create();
+
+    /**
+     * Create a new LogicalPlanOptimizer.
+     * @param p               Plan to optimize.
+     * @param iterations      Maximum number of optimizer iterations.
+     * @param turnOffRules    Optimization rules to disable. "all" disables all non-mandatory
+     *                        rules. null enables all rules.
+     */
+    public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) {
         super(p, null, iterations);
-        this.mRulesOff = turnOffRules;
+        mRulesOff = turnOffRules == null ? new HashSet<String>() : turnOffRules;
+        if (mRulesOff.contains("all")) {
+            allRulesDisabled = true;
+        }
+
         ruleSets = buildRuleSets();
+        LOG.info(rulesReport);
         addListeners();
     }
 
@@ -183,34 +204,25 @@ public class LogicalPlanOptimizer extend
         
         return ls;
     }
-        
+
+    /**
+     * Add rule to ruleSet if its mandatory, or has not been disabled.
+     * @param ruleSet    Set rule will be added to if not disabled.
+     * @param rule       Rule to potentially add.
+     */
     private void checkAndAddRule(Set<Rule> ruleSet, Rule rule) {
+        Preconditions.checkArgument(ruleSet != null);
+        Preconditions.checkArgument(rule != null && rule.getName() != null);
+
         if (rule.isMandatory()) {
             ruleSet.add(rule);
-            return;
-        }
-        
-        boolean turnAllRulesOff = false;
-        if (mRulesOff != null) {
-            for (String ruleName : mRulesOff) {
-                if ("all".equalsIgnoreCase(ruleName)) {
-                    turnAllRulesOff = true;
-                    break;
-                }
-            }
-        }
-        
-        if (turnAllRulesOff) return;
-        
-        if(mRulesOff != null) {
-            for(String ruleOff: mRulesOff) {
-                String ruleName = rule.getName();
-                if(ruleName == null) continue;
-                if(ruleName.equalsIgnoreCase(ruleOff)) return;
-            }
+            rulesReport.put(RulesReportKey.RULES_ENABLED, rule.getName());
+        } else if (!allRulesDisabled && !mRulesOff.contains(rule.getName())) {
+            ruleSet.add(rule);
+            rulesReport.put(RulesReportKey.RULES_ENABLED, rule.getName());
+        } else {
+            rulesReport.put(RulesReportKey.RULES_DISABLED, rule.getName());
         }
-        
-        ruleSet.add(rule);
     }
 
     private void addListeners() {

Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Fri May 17 05:54:11 2013
@@ -45,6 +45,7 @@ import org.apache.pig.data.DataType;
 import org.apache.pig.data.DefaultBagFactory;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.PigImplConstants;
 import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -1524,7 +1525,9 @@ public class TestEvalPipeline2 {
         
         HashSet<String> optimizerRules = new HashSet<String>();
         optimizerRules.add("MergeForEach");
-        pigServer.getPigContext().getProperties().setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules));
+        pigServer.getPigContext().getProperties().setProperty(
+                PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
+                ObjectSerializer.serialize(optimizerRules));
         
         Util.createInputFile(cluster, "table_testProjectNullBag", input1);
         pigServer.registerQuery("a = load 'table_testProjectNullBag' as (a0:bag{}, a1:int);");
@@ -1540,7 +1543,7 @@ public class TestEvalPipeline2 {
         
         Assert.assertFalse(iter.hasNext());
         
-        pigServer.getPigContext().getProperties().remove("pig.optimizer.rules");
+        pigServer.getPigContext().getProperties().remove(PigImplConstants.PIG_OPTIMIZER_RULES_KEY);
     }
     
     // See PIG-2159