You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2015/09/17 04:38:59 UTC
svn commit: r1703481 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
Author: daijy
Date: Thu Sep 17 02:38:59 2015
New Revision: 1703481
URL: http://svn.apache.org/r1703481
Log:
PIG-4679: Performance degradation due to InputSizeReducerEstimator since PIG-3754
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 17 02:38:59 2015
@@ -40,6 +40,8 @@ PIG-4639: Add better parser for Apache H
BUG FIXES
+PIG-4679: Performance degradation due to InputSizeReducerEstimator since PIG-3754 (daijy)
+
PIG-4315: MergeJoin or Split followed by order by gives NPE in Tez (rohini)
PIG-4654: Reduce tez memory.reserve-fraction and clear spillables for better memory utilization (rohini)
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/InputSizeReducerEstimator.java Thu Sep 17 02:38:59 2015
@@ -137,11 +137,11 @@ public class InputSizeReducerEstimator i
}
} else {
// If file is not found, we should report -1
- return -1;
+ continue;
}
} else {
// If we cannot estimate size of a location, we should report -1
- return -1;
+ continue;
}
}
}
Modified: pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java?rev=1703481&r1=1703480&r2=1703481&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java (original)
+++ pig/trunk/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java Thu Sep 17 02:38:59 2015
@@ -38,30 +38,26 @@ public class TestInputSizeReducerEstimat
@Test
public void testGetInputSizeFromFs() throws Exception {
long size = 2L * 1024 * 1024 * 1024;
+ POLoad load1 = createPOLoadWithSize(size, new PigStorage());
+ POLoad load2 = createPOLoadWithSize(size, new PigStorageWithStatistics());
Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
- CONF, Lists.newArrayList(createPOLoadWithSize(size, new PigStorage())),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ CONF, Lists.newArrayList(load1), new org.apache.hadoop.mapreduce.Job(CONF)));
Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
- CONF,
- Lists.newArrayList(createPOLoadWithSize(size, new PigStorageWithStatistics())),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ CONF, Lists.newArrayList(load2), new org.apache.hadoop.mapreduce.Job(CONF)));
Assert.assertEquals(size * 2, InputSizeReducerEstimator.getTotalInputFileSize(
- CONF,
- Lists.newArrayList(
- createPOLoadWithSize(size, new PigStorage()),
- createPOLoadWithSize(size, new PigStorageWithStatistics())),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ CONF, Lists.newArrayList(load1, load2), new org.apache.hadoop.mapreduce.Job(CONF)));
// Negative test - PIG-3754
- POLoad poLoad = createPOLoadWithSize(size, new PigStorage());
- poLoad.setLFile(new FileSpec("hbase://users", null));
+ load1.setLFile(new FileSpec("hbase://users", null));
- Assert.assertEquals(-1, InputSizeReducerEstimator.getTotalInputFileSize(
- CONF,
- Collections.singletonList(poLoad),
- new org.apache.hadoop.mapreduce.Job(CONF)));
+ Assert.assertEquals(0, InputSizeReducerEstimator.getTotalInputFileSize(
+ CONF, Collections.singletonList(load1), new org.apache.hadoop.mapreduce.Job(CONF)));
+
+ // Skip non-hdfs input - PIG-4679
+ Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
+ CONF, Lists.newArrayList(load1, load2), new org.apache.hadoop.mapreduce.Job(CONF)));
}
@Test