You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2014/08/21 23:24:17 UTC

svn commit: r1619596 - in /pig/trunk: CHANGES.txt src/docs/src/documentation/content/xdocs/perf.xml src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java

Author: cheolsoo
Date: Thu Aug 21 21:24:17 2014
New Revision: 1619596

URL: http://svn.apache.org/r1619596
Log:
PIG-4135: Fetch optimization should be disabled if plan contains no limit (cheolsoo)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1619596&r1=1619595&r2=1619596&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Aug 21 21:24:17 2014
@@ -64,6 +64,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-4135: Fetch optimization should be disabled if plan contains no limit (cheolsoo)
+
 PIG-4061: Make Streaming UDF work in Tez (hotfix PIG-4061-3.patch)
 
 PIG-4134: TEZ-1449 broke the build (knoguchi)

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml?rev=1619596&r1=1619595&r2=1619596&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml Thu Aug 21 21:24:17 2014
@@ -1065,6 +1065,7 @@ Fetching will be disabled in case of:
 </p>
 <ul>
   <li>the presence of other operators, <a href="http://pig.apache.org/docs/r0.13.0/api/org/apache/pig/impl/builtin/SampleLoader.html">sample loaders</a> and scalar expressions</li>
+  <li>no <a href="basic.html#limit">LIMIT</a> operator</li>
   <li>implicit splits</li>
 </ul>
 

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java?rev=1619596&r1=1619595&r2=1619596&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java Thu Aug 21 21:24:17 2014
@@ -38,6 +38,7 @@ import org.apache.pig.backend.hadoop.exe
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PODistinct;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFRJoin;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POGlobalRearrange;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLimit;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POMergeCogroup;
@@ -95,8 +96,12 @@ public class FetchOptimizer {
         if (isEligible(pc, pp)) {
             FetchablePlanVisitor fpv = new FetchablePlanVisitor(pc, pp);
             fpv.visit();
-            boolean isFetchable = fpv.isPlanFetchable();
-            //initialization
+            // Plan is fetchable only if FetchablePlanVisitor returns true AND
+            // limit is present in the plan. Limit is a safeguard. If the input
+            // is large, and there is no limit, fetch optimizer will fetch the
+            // entire input to the client. That can be dangerous.
+            boolean isFetchable = fpv.isPlanFetchable() && 
+                    PlanHelper.containsPhysicalOperator(pp, POLimit.class);
             if (isFetchable)
                 init(pp);
             return isFetchable;
@@ -307,7 +312,7 @@ public class FetchOptimizer {
         private boolean isPlanFetchable() {
             return planFetchable;
         }
-        
+
         private boolean isTempPath(String basePathName) throws DataStorageException {
             String tdir = pc.getProperties().getProperty("pig.temp.dir", "/tmp");
             String tempStore = pc.getDfs().asContainer(tdir + "/temp").toString();