You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by bi...@apache.org on 2013/05/17 07:54:12 UTC
svn commit: r1483659 - in /pig/trunk: ./
src/docs/src/documentation/content/xdocs/ src/org/apache/pig/
src/org/apache/pig/backend/hadoop/executionengine/
src/org/apache/pig/newplan/logical/optimizer/ test/org/apache/pig/test/
Author: billgraham
Date: Fri May 17 05:54:11 2013
New Revision: 1483659
URL: http://svn.apache.org/r1483659
Log:
PIG-3317: disable optimizations via pig properties (traviscrawford via billgraham)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
pig/trunk/src/org/apache/pig/Main.java
pig/trunk/src/org/apache/pig/PigConstants.java
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java
pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri May 17 05:54:11 2013
@@ -28,6 +28,8 @@ PIG-3174: Remove rpm and deb artifacts
IMPROVEMENTS
+PIG-3317: disable optimizations via pig properties (traviscrawford via billgraham)
+
PIG-3321: AVRO: Support user specified schema on load (harveyc via rohini)
PIG-2959: Add a pig.cmd for Pig to run under Windows (daijy)
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml Fri May 17 05:54:11 2013
@@ -477,14 +477,24 @@ STORE Gtab INTO '/user/vxj/finalresult2'
<section id="optimization-rules">
<title>Optimization Rules</title>
-<p>Pig supports various optimization rules. By default optimization, and all optimization rules, are turned on.
-To turn off optimiztion, use:</p>
+<p>Pig supports various optimization rules, all of which are enabled by default.
+To disable all or specific optimizations, use one or more of the following methods.
+Note some optimization rules are mandatory and cannot be disabled.</p>
+
+ <ul>
+ <li>The <code>pig.optimizer.rules.disabled</code>
+ <a href="start.html#properties">pig property</a>, which accepts a
+ comma-separated list of optimization rules to disable; the <code>all</code>
+ keyword disables all non-mandatory optimizations.
+ (e.g.: <code>set pig.optimizer.rules.disabled 'ColumnMapKeyPrune';</code>)</li>
+ <li>The <code>-t, -optimizer_off</code> command-line options.
+ (e.g.: <code>pig -optimizer_off [opt_rule | all]</code>)</li>
+ </ul>
+
+<p><code>FilterLogicExpressionSimplifier</code> is an exception to the above.
+The rule is disabled by default, and enabled by setting the
+<code>pig.exec.filterLogicExpressionSimplifier</code> pig property to true.</p>
-<source>
-pig -optimizer_off [opt_rule | all ]
-</source>
-
-<p>Note that some rules are mandatory and cannot be turned off.</p>
<!-- +++++++++++++++++++++++++++++++ -->
<section id="FilterLogicExpressionSimplifier">
Modified: pig/trunk/src/org/apache/pig/Main.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/Main.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/Main.java (original)
+++ pig/trunk/src/org/apache/pig/Main.java Fri May 17 05:54:11 2013
@@ -61,6 +61,7 @@ import org.apache.pig.backend.hadoop.dat
import org.apache.pig.classification.InterfaceAudience;
import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.PigImplConstants;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.util.JarManager;
import org.apache.pig.impl.util.LogUtils;
@@ -186,7 +187,7 @@ static int run(String args[], PigProgres
boolean embedded = false;
List<String> params = new ArrayList<String>();
List<String> paramFiles = new ArrayList<String>();
- HashSet<String> optimizerRules = new HashSet<String>();
+ HashSet<String> disabledOptimizerRules = new HashSet<String>();
CmdLineParser opts = new CmdLineParser(pigArgs);
opts.registerOpt('4', "log4jconf", CmdLineParser.ValueExpected.REQUIRED);
@@ -316,7 +317,7 @@ static int run(String args[], PigProgres
break;
case 't':
- optimizerRules.add(opts.getValStr());
+ disabledOptimizerRules.add(opts.getValStr());
break;
case 'v':
@@ -391,12 +392,11 @@ static int run(String args[], PigProgres
if( ! Boolean.valueOf(properties.getProperty(PROP_FILT_SIMPL_OPT, "false"))){
//turn off if the user has not explicitly turned on this optimization
- optimizerRules.add("FilterLogicExpressionSimplifier");
+ disabledOptimizerRules.add("FilterLogicExpressionSimplifier");
}
- if(optimizerRules.size() > 0) {
- pigContext.getProperties().setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules));
- }
+ pigContext.getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
+ ObjectSerializer.serialize(disabledOptimizerRules));
PigContext.setClassLoader(pigContext.createCl(null));
Modified: pig/trunk/src/org/apache/pig/PigConstants.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConstants.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/PigConstants.java (original)
+++ pig/trunk/src/org/apache/pig/PigConstants.java Fri May 17 05:54:11 2013
@@ -18,8 +18,13 @@
package org.apache.pig;
+import org.apache.pig.classification.InterfaceAudience;
+
+@InterfaceAudience.Public
public class PigConstants {
- private PigConstants() {}
+ private PigConstants() {
+ throw new IllegalStateException();
+ }
/**
* This key is used in the job conf to let the various jobs know what code was
@@ -35,4 +40,10 @@ public class PigConstants {
// This makes it easy to turn SchemaTuple on globally.
public static final boolean SCHEMA_TUPLE_ON_BY_DEFAULT = false;
+
+ /**
+ * {@value} is a comma-separated list of optimizer rules to disable;
+ * by default, all rules are enabled.
+ */
+ public static final String PIG_OPTIMIZER_RULES_DISABLED_KEY = "pig.optimizer.rules.disabled";
}
\ No newline at end of file
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java Fri May 17 05:54:11 2013
@@ -30,12 +30,15 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.mapred.JobConf;
import org.apache.pig.ExecType;
+import org.apache.pig.PigConstants;
import org.apache.pig.PigException;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.backend.executionengine.ExecException;
@@ -44,6 +47,7 @@ import org.apache.pig.backend.hadoop.dat
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.PigImplConstants;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.util.ObjectSerializer;
@@ -246,38 +250,45 @@ public class HExecutionEngine {
SchemaResetter schemaResetter = new SchemaResetter( plan, true /*skip duplicate uid check*/ );
schemaResetter.visit();
- HashSet<String> optimizerRules = null;
+ HashSet<String> disabledOptimizerRules;
try {
- optimizerRules = (HashSet<String>) ObjectSerializer
+ disabledOptimizerRules = (HashSet<String>) ObjectSerializer
.deserialize(pigContext.getProperties().getProperty(
- "pig.optimizer.rules"));
+ PigImplConstants.PIG_OPTIMIZER_RULES_KEY));
} catch (IOException ioe) {
int errCode = 2110;
String msg = "Unable to deserialize optimizer rules.";
throw new FrontendException(msg, errCode, PigException.BUG, ioe);
}
-
+ if (disabledOptimizerRules == null) {
+ disabledOptimizerRules = new HashSet<String>();
+ }
+
+ String pigOptimizerRulesDisabled = this.pigContext.getProperties().getProperty(
+ PigConstants.PIG_OPTIMIZER_RULES_DISABLED_KEY);
+ if (pigOptimizerRulesDisabled != null) {
+ disabledOptimizerRules.addAll(Lists.newArrayList((Splitter.on(",").split(
+ pigOptimizerRulesDisabled))));
+ }
+
if (pigContext.inIllustrator) {
- // disable MergeForEach in illustrator
- if (optimizerRules == null)
- optimizerRules = new HashSet<String>();
- optimizerRules.add("MergeForEach");
- optimizerRules.add("PartitionFilterOptimizer");
- optimizerRules.add("LimitOptimizer");
- optimizerRules.add("SplitFilter");
- optimizerRules.add("PushUpFilter");
- optimizerRules.add("MergeFilter");
- optimizerRules.add("PushDownForEachFlatten");
- optimizerRules.add("ColumnMapKeyPrune");
- optimizerRules.add("AddForEach");
- optimizerRules.add("GroupByConstParallelSetter");
+ disabledOptimizerRules.add("MergeForEach");
+ disabledOptimizerRules.add("PartitionFilterOptimizer");
+ disabledOptimizerRules.add("LimitOptimizer");
+ disabledOptimizerRules.add("SplitFilter");
+ disabledOptimizerRules.add("PushUpFilter");
+ disabledOptimizerRules.add("MergeFilter");
+ disabledOptimizerRules.add("PushDownForEachFlatten");
+ disabledOptimizerRules.add("ColumnMapKeyPrune");
+ disabledOptimizerRules.add("AddForEach");
+ disabledOptimizerRules.add("GroupByConstParallelSetter");
}
StoreAliasSetter storeAliasSetter = new StoreAliasSetter( plan );
storeAliasSetter.visit();
// run optimizer
- LogicalPlanOptimizer optimizer = new LogicalPlanOptimizer( plan, 100, optimizerRules );
+ LogicalPlanOptimizer optimizer = new LogicalPlanOptimizer(plan, 100, disabledOptimizerRules);
optimizer.optimize();
// compute whether output data is sorted or not
Modified: pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java Fri May 17 05:54:11 2013
@@ -22,6 +22,12 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.SetMultimap;
+import com.google.common.collect.TreeMultimap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.logical.rules.AddForEach;
import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
@@ -29,7 +35,6 @@ import org.apache.pig.newplan.logical.ru
import org.apache.pig.newplan.logical.rules.FilterAboveForeach;
import org.apache.pig.newplan.logical.rules.GroupByConstParallelSetter;
import org.apache.pig.newplan.logical.rules.ImplicitSplitInserter;
-import org.apache.pig.newplan.logical.rules.InputOutputFileValidator;
import org.apache.pig.newplan.logical.rules.LimitOptimizer;
import org.apache.pig.newplan.logical.rules.LoadTypeCastInserter;
import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier;
@@ -44,12 +49,28 @@ import org.apache.pig.newplan.optimizer.
import org.apache.pig.newplan.optimizer.Rule;
public class LogicalPlanOptimizer extends PlanOptimizer {
+ private static final Log LOG = LogFactory.getLog(LogicalPlanOptimizer.class);
+ private static enum RulesReportKey { RULES_ENABLED, RULES_DISABLED }
private Set<String> mRulesOff = null;
-
- public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) {
+ private boolean allRulesDisabled = false;
+ private SetMultimap<RulesReportKey, String> rulesReport = TreeMultimap.create();
+
+ /**
+ * Create a new LogicalPlanOptimizer.
+ * @param p Plan to optimize.
+ * @param iterations Maximum number of optimizer iterations.
+ * @param turnOffRules Optimization rules to disable. "all" disables all non-mandatory
+ * rules. null enables all rules.
+ */
+ public LogicalPlanOptimizer(OperatorPlan p, int iterations, Set<String> turnOffRules) {
super(p, null, iterations);
- this.mRulesOff = turnOffRules;
+ mRulesOff = turnOffRules == null ? new HashSet<String>() : turnOffRules;
+ if (mRulesOff.contains("all")) {
+ allRulesDisabled = true;
+ }
+
ruleSets = buildRuleSets();
+ LOG.info(rulesReport);
addListeners();
}
@@ -183,34 +204,25 @@ public class LogicalPlanOptimizer extend
return ls;
}
-
+
+ /**
+ * Add rule to ruleSet if its mandatory, or has not been disabled.
+ * @param ruleSet Set rule will be added to if not disabled.
+ * @param rule Rule to potentially add.
+ */
private void checkAndAddRule(Set<Rule> ruleSet, Rule rule) {
+ Preconditions.checkArgument(ruleSet != null);
+ Preconditions.checkArgument(rule != null && rule.getName() != null);
+
if (rule.isMandatory()) {
ruleSet.add(rule);
- return;
- }
-
- boolean turnAllRulesOff = false;
- if (mRulesOff != null) {
- for (String ruleName : mRulesOff) {
- if ("all".equalsIgnoreCase(ruleName)) {
- turnAllRulesOff = true;
- break;
- }
- }
- }
-
- if (turnAllRulesOff) return;
-
- if(mRulesOff != null) {
- for(String ruleOff: mRulesOff) {
- String ruleName = rule.getName();
- if(ruleName == null) continue;
- if(ruleName.equalsIgnoreCase(ruleOff)) return;
- }
+ rulesReport.put(RulesReportKey.RULES_ENABLED, rule.getName());
+ } else if (!allRulesDisabled && !mRulesOff.contains(rule.getName())) {
+ ruleSet.add(rule);
+ rulesReport.put(RulesReportKey.RULES_ENABLED, rule.getName());
+ } else {
+ rulesReport.put(RulesReportKey.RULES_DISABLED, rule.getName());
}
-
- ruleSet.add(rule);
}
private void addListeners() {
Modified: pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java?rev=1483659&r1=1483658&r2=1483659&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestEvalPipeline2.java Fri May 17 05:54:11 2013
@@ -45,6 +45,7 @@ import org.apache.pig.data.DataType;
import org.apache.pig.data.DefaultBagFactory;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.PigImplConstants;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -1524,7 +1525,9 @@ public class TestEvalPipeline2 {
HashSet<String> optimizerRules = new HashSet<String>();
optimizerRules.add("MergeForEach");
- pigServer.getPigContext().getProperties().setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules));
+ pigServer.getPigContext().getProperties().setProperty(
+ PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
+ ObjectSerializer.serialize(optimizerRules));
Util.createInputFile(cluster, "table_testProjectNullBag", input1);
pigServer.registerQuery("a = load 'table_testProjectNullBag' as (a0:bag{}, a1:int);");
@@ -1540,7 +1543,7 @@ public class TestEvalPipeline2 {
Assert.assertFalse(iter.hasNext());
- pigServer.getPigContext().getProperties().remove("pig.optimizer.rules");
+ pigServer.getPigContext().getProperties().remove(PigImplConstants.PIG_OPTIMIZER_RULES_KEY);
}
// See PIG-2159