You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2014/08/21 23:24:17 UTC
svn commit: r1619596 - in /pig/trunk: CHANGES.txt
src/docs/src/documentation/content/xdocs/perf.xml
src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
Author: cheolsoo
Date: Thu Aug 21 21:24:17 2014
New Revision: 1619596
URL: http://svn.apache.org/r1619596
Log:
PIG-4135: Fetch optimization should be disabled if plan contains no limit (cheolsoo)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1619596&r1=1619595&r2=1619596&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Aug 21 21:24:17 2014
@@ -64,6 +64,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-4135: Fetch optimization should be disabled if plan contains no limit (cheolsoo)
+
PIG-4061: Make Streaming UDF work in Tez (hotfix PIG-4061-3.patch)
PIG-4134: TEZ-1449 broke the build (knoguchi)
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml?rev=1619596&r1=1619595&r2=1619596&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml Thu Aug 21 21:24:17 2014
@@ -1065,6 +1065,7 @@ Fetching will be disabled in case of:
</p>
<ul>
<li>the presence of other operators, <a href="http://pig.apache.org/docs/r0.13.0/api/org/apache/pig/impl/builtin/SampleLoader.html">sample loaders</a> and scalar expressions</li>
+ <li>no <a href="basic.html#limit">LIMIT</a> operator</li>
<li>implicit splits</li>
</ul>
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java?rev=1619596&r1=1619595&r2=1619596&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java Thu Aug 21 21:24:17 2014
@@ -38,6 +38,7 @@ import org.apache.pig.backend.hadoop.exe
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODistinct;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoin;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POGlobalRearrange;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeCogroup;
@@ -95,8 +96,12 @@ public class FetchOptimizer {
if (isEligible(pc, pp)) {
FetchablePlanVisitor fpv = new FetchablePlanVisitor(pc, pp);
fpv.visit();
- boolean isFetchable = fpv.isPlanFetchable();
- //initialization
+ // Plan is fetchable only if FetchablePlanVisitor returns true AND
+ // limit is present in the plan. Limit is a safeguard. If the input
+ // is large, and there is no limit, fetch optimizer will fetch the
+ // entire input to the client. That can be dangerous.
+ boolean isFetchable = fpv.isPlanFetchable() &&
+ PlanHelper.containsPhysicalOperator(pp, POLimit.class);
if (isFetchable)
init(pp);
return isFetchable;
@@ -307,7 +312,7 @@ public class FetchOptimizer {
private boolean isPlanFetchable() {
return planFetchable;
}
-
+
private boolean isTempPath(String basePathName) throws DataStorageException {
String tdir = pc.getProperties().getProperty("pig.temp.dir", "/tmp");
String tempStore = pc.getDfs().asContainer(tdir + "/temp").toString();