You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2015/09/17 04:38:59 UTC

svn commit: r1703481 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java

Author: daijy
Date: Thu Sep 17 02:38:59 2015
New Revision: 1703481

URL: http://svn.apache.org/r1703481
Log:
PIG-4679: Performance degradation due to InputSizeReducerEstimator since PIG-3754

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
    pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 17 02:38:59 2015
@@ -40,6 +40,8 @@ PIG-4639: Add better parser for Apache H
 
 BUG FIXES
 
+PIG-4679: Performance degradation due to InputSizeReducerEstimator since PIG-3754 (daijy)
+
 PIG-4315: MergeJoin or Split followed by order by gives NPE in Tez (rohini)
 
 PIG-4654: Reduce tez memory.reserve-fraction and clear spillables for better memory utilization (rohini)

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java Thu Sep 17 02:38:59 2015
@@ -137,11 +137,11 @@ public class InputSizeReducerEstimator i
                             }
                         } else {
                             // If file is not found, we should report -1
-                            return -1;
+                            continue;
                         }
                     } else {
                         // If we cannot estimate size of a location, we should report -1
-                        return -1;
+                        continue;
                     }
                 }
             }

Modified: pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java (original)
+++ pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java Thu Sep 17 02:38:59 2015
@@ -38,30 +38,26 @@ public class TestInputSizeReducerEstimat
     @Test
     public void testGetInputSizeFromFs() throws Exception {
         long size = 2L * 1024 * 1024 * 1024;
+        POLoad load1 = createPOLoadWithSize(size, new PigStorage());
+        POLoad load2 = createPOLoadWithSize(size, new PigStorageWithStatistics());
         Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF, Lists.newArrayList(createPOLoadWithSize(size, new PigStorage())),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load1), new org.apache.hadoop.mapreduce.Job(CONF)));
 
         Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Lists.newArrayList(createPOLoadWithSize(size, new PigStorageWithStatistics())),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load2), new org.apache.hadoop.mapreduce.Job(CONF)));
 
         Assert.assertEquals(size * 2, InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Lists.newArrayList(
-                        createPOLoadWithSize(size, new PigStorage()),
-                        createPOLoadWithSize(size, new PigStorageWithStatistics())),
-                        new org.apache.hadoop.mapreduce.Job(CONF)));
+                CONF, Lists.newArrayList(load1, load2), new org.apache.hadoop.mapreduce.Job(CONF)));
 
         // Negative test - PIG-3754
-        POLoad poLoad = createPOLoadWithSize(size, new PigStorage());
-        poLoad.setLFile(new FileSpec("hbase://users", null));
+        load1.setLFile(new FileSpec("hbase://users", null));
 
-        Assert.assertEquals(-1, InputSizeReducerEstimator.getTotalInputFileSize(
-                CONF,
-                Collections.singletonList(poLoad),
-                new org.apache.hadoop.mapreduce.Job(CONF)));
+        Assert.assertEquals(0, InputSizeReducerEstimator.getTotalInputFileSize(
+                CONF, Collections.singletonList(load1), new org.apache.hadoop.mapreduce.Job(CONF)));
+
+        // Skip non-hdfs input - PIG-4679
+        Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
+                CONF, Lists.newArrayList(load1, load2), new org.apache.hadoop.mapreduce.Job(CONF)));
     }
 
     @Test