You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vv...@apache.org on 2016/02/23 10:17:44 UTC
[1/5] hadoop git commit: MAPREDUCE-6635. Unsafe long to int
conversion in UncompressedSplitLineReader and IndexOutOfBoundsException.
Contributed by Junping Du.
Repository: hadoop
Updated Branches:
refs/heads/branch-2 2c218ca8a -> f1999fe27
refs/heads/branch-2.6 3fea7f0a3 -> d32100d7f
refs/heads/branch-2.7 33b961ee8 -> 0edc76418
refs/heads/branch-2.8 5f68f640a -> cb4f5ebd3
refs/heads/trunk 140cb5d74 -> c6f2d761d
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader and IndexOutOfBoundsException. Contributed by Junping Du.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c6f2d761
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c6f2d761
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c6f2d761
Branch: refs/heads/trunk
Commit: c6f2d761d5430eac6b9f07f137a7028de4e0660c
Parents: 140cb5d
Author: Varun Vasudev <vv...@apache.org>
Authored: Tue Feb 23 13:05:18 2016 +0530
Committer: Varun Vasudev <vv...@apache.org>
Committed: Tue Feb 23 13:05:18 2016 +0530
----------------------------------------------------------------------
hadoop-mapreduce-project/CHANGES.txt | 9 ++++
.../lib/input/UncompressedSplitLineReader.java | 7 ++-
.../hadoop/mapred/TestLineRecordReader.java | 53 ++++++++++++++++++++
3 files changed, 67 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c6f2d761/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index f0ad171..9628d49 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -724,6 +724,9 @@ Release 2.8.0 - UNRELEASED
MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte
characters in the job name. (Kousuke Saruta via aajisaka)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES
@@ -763,6 +766,9 @@ Release 2.7.3 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.7.2 - 2016-01-25
INCOMPATIBLE CHANGES
@@ -1056,6 +1062,9 @@ Release 2.6.5 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.6.4 - 2016-02-11
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c6f2d761/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
index 6d495ef..bda0218 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
@@ -53,8 +53,11 @@ public class UncompressedSplitLineReader extends SplitLineReader {
throws IOException {
int maxBytesToRead = buffer.length;
if (totalBytesRead < splitLength) {
- maxBytesToRead = Math.min(maxBytesToRead,
- (int)(splitLength - totalBytesRead));
+ long leftBytesForSplit = splitLength - totalBytesRead;
+ // check if leftBytesForSplit exceed Integer.MAX_VALUE
+ if (leftBytesForSplit <= Integer.MAX_VALUE) {
+ maxBytesToRead = Math.min(maxBytesToRead, (int)leftBytesForSplit);
+ }
}
int bytesRead = in.read(buffer, 0, maxBytesToRead);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c6f2d761/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
index f0cf9f5..f50e1ef 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
@@ -110,6 +110,43 @@ public class TestLineRecordReader {
numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
+ private void testLargeSplitRecordForFile(Configuration conf,
+ long firstSplitLength, long testFileSize, Path testFilePath)
+ throws IOException {
+ conf.setInt(org.apache.hadoop.mapreduce.lib.input.
+ LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
+ assertTrue("unexpected firstSplitLength:" + firstSplitLength,
+ testFileSize < firstSplitLength);
+ String delimiter = conf.get("textinputformat.record.delimiter");
+ byte[] recordDelimiterBytes = null;
+ if (null != delimiter) {
+ recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
+ }
+ // read the data without splitting to count the records
+ FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
+ (String[])null);
+ LineRecordReader reader = new LineRecordReader(conf, split,
+ recordDelimiterBytes);
+ LongWritable key = new LongWritable();
+ Text value = new Text();
+ int numRecordsNoSplits = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsNoSplits;
+ }
+ reader.close();
+
+ // count the records in the first split
+ split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null);
+ reader = new LineRecordReader(conf, split, recordDelimiterBytes);
+ int numRecordsFirstSplit = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsFirstSplit;
+ }
+ reader.close();
+ assertEquals("Unexpected number of records in split",
+ numRecordsNoSplits, numRecordsFirstSplit);
+ }
+
@Test
public void testBzip2SplitEndsAtCR() throws IOException {
// the test data contains a carriage-return at the end of the first
@@ -332,6 +369,22 @@ public class TestLineRecordReader {
}
@Test
+ public void testUncompressedInputWithLargeSplitSize() throws Exception {
+ Configuration conf = new Configuration();
+ // single char delimiter
+ String inputData = "abcde +fghij+ klmno+pqrst+uvwxyz";
+ Path inputFile = createInputFile(conf, inputData);
+ conf.set("textinputformat.record.delimiter", "+");
+ // split size over max value of integer
+ long longSplitSize = (long)Integer.MAX_VALUE + 1;
+ for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
+ conf.setInt("io.file.buffer.size", bufferSize);
+ testLargeSplitRecordForFile(conf, longSplitSize, inputData.length(),
+ inputFile);
+ }
+ }
+
+ @Test
public void testUncompressedInput() throws Exception {
Configuration conf = new Configuration();
// single char delimiter, best case
[4/5] hadoop git commit: MAPREDUCE-6635. Unsafe long to int
conversion in UncompressedSplitLineReader and IndexOutOfBoundsException.
Contributed by Junping Du.
Posted by vv...@apache.org.
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader and IndexOutOfBoundsException. Contributed by Junping Du.
(cherry picked from commit c6f2d761d5430eac6b9f07f137a7028de4e0660c)
(cherry picked from commit f1999fe2754cbf11b138fb048c7486cab9b02c97)
Conflicts:
hadoop-mapreduce-project/CHANGES.txt
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d32100d7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d32100d7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d32100d7
Branch: refs/heads/branch-2.6
Commit: d32100d7fcdbdf2a0584a647bf4d7249909cf402
Parents: 3fea7f0
Author: Varun Vasudev <vv...@apache.org>
Authored: Tue Feb 23 13:05:18 2016 +0530
Committer: Varun Vasudev <vv...@apache.org>
Committed: Tue Feb 23 14:08:14 2016 +0530
----------------------------------------------------------------------
hadoop-mapreduce-project/CHANGES.txt | 3 ++
.../lib/input/UncompressedSplitLineReader.java | 7 ++-
.../hadoop/mapred/TestLineRecordReader.java | 53 ++++++++++++++++++++
3 files changed, 61 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d32100d7/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 97a9f7b..6ab854c 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -18,6 +18,9 @@ Release 2.6.5 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.6.4 - 2016-02-11
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d32100d7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
index 6d495ef..bda0218 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
@@ -53,8 +53,11 @@ public class UncompressedSplitLineReader extends SplitLineReader {
throws IOException {
int maxBytesToRead = buffer.length;
if (totalBytesRead < splitLength) {
- maxBytesToRead = Math.min(maxBytesToRead,
- (int)(splitLength - totalBytesRead));
+ long leftBytesForSplit = splitLength - totalBytesRead;
+ // check if leftBytesForSplit exceed Integer.MAX_VALUE
+ if (leftBytesForSplit <= Integer.MAX_VALUE) {
+ maxBytesToRead = Math.min(maxBytesToRead, (int)leftBytesForSplit);
+ }
}
int bytesRead = in.read(buffer, 0, maxBytesToRead);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/d32100d7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
index 3e6002b..cb733bd 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
@@ -105,6 +105,43 @@ public class TestLineRecordReader {
numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
+ private void testLargeSplitRecordForFile(Configuration conf,
+ long firstSplitLength, long testFileSize, Path testFilePath)
+ throws IOException {
+ conf.setInt(org.apache.hadoop.mapreduce.lib.input.
+ LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
+ assertTrue("unexpected firstSplitLength:" + firstSplitLength,
+ testFileSize < firstSplitLength);
+ String delimiter = conf.get("textinputformat.record.delimiter");
+ byte[] recordDelimiterBytes = null;
+ if (null != delimiter) {
+ recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
+ }
+ // read the data without splitting to count the records
+ FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
+ (String[])null);
+ LineRecordReader reader = new LineRecordReader(conf, split,
+ recordDelimiterBytes);
+ LongWritable key = new LongWritable();
+ Text value = new Text();
+ int numRecordsNoSplits = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsNoSplits;
+ }
+ reader.close();
+
+ // count the records in the first split
+ split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null);
+ reader = new LineRecordReader(conf, split, recordDelimiterBytes);
+ int numRecordsFirstSplit = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsFirstSplit;
+ }
+ reader.close();
+ assertEquals("Unexpected number of records in split",
+ numRecordsNoSplits, numRecordsFirstSplit);
+ }
+
@Test
public void testBzip2SplitEndsAtCR() throws IOException {
// the test data contains a carriage-return at the end of the first
@@ -267,6 +304,22 @@ public class TestLineRecordReader {
}
@Test
+ public void testUncompressedInputWithLargeSplitSize() throws Exception {
+ Configuration conf = new Configuration();
+ // single char delimiter
+ String inputData = "abcde +fghij+ klmno+pqrst+uvwxyz";
+ Path inputFile = createInputFile(conf, inputData);
+ conf.set("textinputformat.record.delimiter", "+");
+ // split size over max value of integer
+ long longSplitSize = (long)Integer.MAX_VALUE + 1;
+ for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
+ conf.setInt("io.file.buffer.size", bufferSize);
+ testLargeSplitRecordForFile(conf, longSplitSize, inputData.length(),
+ inputFile);
+ }
+ }
+
+ @Test
public void testUncompressedInput() throws Exception {
Configuration conf = new Configuration();
// single char delimiter, best case
[3/5] hadoop git commit: MAPREDUCE-6635. Unsafe long to int
conversion in UncompressedSplitLineReader and IndexOutOfBoundsException.
Contributed by Junping Du.
Posted by vv...@apache.org.
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader and IndexOutOfBoundsException. Contributed by Junping Du.
(cherry picked from commit c6f2d761d5430eac6b9f07f137a7028de4e0660c)
(cherry picked from commit f1999fe2754cbf11b138fb048c7486cab9b02c97)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cb4f5ebd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cb4f5ebd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cb4f5ebd
Branch: refs/heads/branch-2.8
Commit: cb4f5ebd3e39e414e1700eab2b916039ce925990
Parents: 5f68f64
Author: Varun Vasudev <vv...@apache.org>
Authored: Tue Feb 23 13:05:18 2016 +0530
Committer: Varun Vasudev <vv...@apache.org>
Committed: Tue Feb 23 13:07:40 2016 +0530
----------------------------------------------------------------------
hadoop-mapreduce-project/CHANGES.txt | 9 ++++
.../lib/input/UncompressedSplitLineReader.java | 7 ++-
.../hadoop/mapred/TestLineRecordReader.java | 53 ++++++++++++++++++++
3 files changed, 67 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb4f5ebd/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index d7a0dd8..918d542 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -412,6 +412,9 @@ Release 2.8.0 - UNRELEASED
MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte
characters in the job name. (Kousuke Saruta via aajisaka)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES
@@ -451,6 +454,9 @@ Release 2.7.3 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.7.2 - 2016-01-25
INCOMPATIBLE CHANGES
@@ -741,6 +747,9 @@ Release 2.6.5 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.6.4 - 2016-02-11
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb4f5ebd/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
index 6d495ef..bda0218 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
@@ -53,8 +53,11 @@ public class UncompressedSplitLineReader extends SplitLineReader {
throws IOException {
int maxBytesToRead = buffer.length;
if (totalBytesRead < splitLength) {
- maxBytesToRead = Math.min(maxBytesToRead,
- (int)(splitLength - totalBytesRead));
+ long leftBytesForSplit = splitLength - totalBytesRead;
+ // check if leftBytesForSplit exceed Integer.MAX_VALUE
+ if (leftBytesForSplit <= Integer.MAX_VALUE) {
+ maxBytesToRead = Math.min(maxBytesToRead, (int)leftBytesForSplit);
+ }
}
int bytesRead = in.read(buffer, 0, maxBytesToRead);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb4f5ebd/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
index f0cf9f5..f50e1ef 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
@@ -110,6 +110,43 @@ public class TestLineRecordReader {
numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
+ private void testLargeSplitRecordForFile(Configuration conf,
+ long firstSplitLength, long testFileSize, Path testFilePath)
+ throws IOException {
+ conf.setInt(org.apache.hadoop.mapreduce.lib.input.
+ LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
+ assertTrue("unexpected firstSplitLength:" + firstSplitLength,
+ testFileSize < firstSplitLength);
+ String delimiter = conf.get("textinputformat.record.delimiter");
+ byte[] recordDelimiterBytes = null;
+ if (null != delimiter) {
+ recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
+ }
+ // read the data without splitting to count the records
+ FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
+ (String[])null);
+ LineRecordReader reader = new LineRecordReader(conf, split,
+ recordDelimiterBytes);
+ LongWritable key = new LongWritable();
+ Text value = new Text();
+ int numRecordsNoSplits = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsNoSplits;
+ }
+ reader.close();
+
+ // count the records in the first split
+ split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null);
+ reader = new LineRecordReader(conf, split, recordDelimiterBytes);
+ int numRecordsFirstSplit = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsFirstSplit;
+ }
+ reader.close();
+ assertEquals("Unexpected number of records in split",
+ numRecordsNoSplits, numRecordsFirstSplit);
+ }
+
@Test
public void testBzip2SplitEndsAtCR() throws IOException {
// the test data contains a carriage-return at the end of the first
@@ -332,6 +369,22 @@ public class TestLineRecordReader {
}
@Test
+ public void testUncompressedInputWithLargeSplitSize() throws Exception {
+ Configuration conf = new Configuration();
+ // single char delimiter
+ String inputData = "abcde +fghij+ klmno+pqrst+uvwxyz";
+ Path inputFile = createInputFile(conf, inputData);
+ conf.set("textinputformat.record.delimiter", "+");
+ // split size over max value of integer
+ long longSplitSize = (long)Integer.MAX_VALUE + 1;
+ for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
+ conf.setInt("io.file.buffer.size", bufferSize);
+ testLargeSplitRecordForFile(conf, longSplitSize, inputData.length(),
+ inputFile);
+ }
+ }
+
+ @Test
public void testUncompressedInput() throws Exception {
Configuration conf = new Configuration();
// single char delimiter, best case
[2/5] hadoop git commit: MAPREDUCE-6635. Unsafe long to int
conversion in UncompressedSplitLineReader and IndexOutOfBoundsException.
Contributed by Junping Du.
Posted by vv...@apache.org.
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader and IndexOutOfBoundsException. Contributed by Junping Du.
(cherry picked from commit c6f2d761d5430eac6b9f07f137a7028de4e0660c)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f1999fe2
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f1999fe2
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f1999fe2
Branch: refs/heads/branch-2
Commit: f1999fe2754cbf11b138fb048c7486cab9b02c97
Parents: 2c218ca
Author: Varun Vasudev <vv...@apache.org>
Authored: Tue Feb 23 13:05:18 2016 +0530
Committer: Varun Vasudev <vv...@apache.org>
Committed: Tue Feb 23 13:05:52 2016 +0530
----------------------------------------------------------------------
hadoop-mapreduce-project/CHANGES.txt | 9 ++++
.../lib/input/UncompressedSplitLineReader.java | 7 ++-
.../hadoop/mapred/TestLineRecordReader.java | 53 ++++++++++++++++++++
3 files changed, 67 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1999fe2/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index b9c1ea6..8d56f56 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -435,6 +435,9 @@ Release 2.8.0 - UNRELEASED
MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte
characters in the job name. (Kousuke Saruta via aajisaka)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES
@@ -474,6 +477,9 @@ Release 2.7.3 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.7.2 - 2016-01-25
INCOMPATIBLE CHANGES
@@ -764,6 +770,9 @@ Release 2.6.5 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.6.4 - 2016-02-11
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1999fe2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
index 6d495ef..bda0218 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
@@ -53,8 +53,11 @@ public class UncompressedSplitLineReader extends SplitLineReader {
throws IOException {
int maxBytesToRead = buffer.length;
if (totalBytesRead < splitLength) {
- maxBytesToRead = Math.min(maxBytesToRead,
- (int)(splitLength - totalBytesRead));
+ long leftBytesForSplit = splitLength - totalBytesRead;
+ // check if leftBytesForSplit exceed Integer.MAX_VALUE
+ if (leftBytesForSplit <= Integer.MAX_VALUE) {
+ maxBytesToRead = Math.min(maxBytesToRead, (int)leftBytesForSplit);
+ }
}
int bytesRead = in.read(buffer, 0, maxBytesToRead);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1999fe2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
index f0cf9f5..f50e1ef 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
@@ -110,6 +110,43 @@ public class TestLineRecordReader {
numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
+ private void testLargeSplitRecordForFile(Configuration conf,
+ long firstSplitLength, long testFileSize, Path testFilePath)
+ throws IOException {
+ conf.setInt(org.apache.hadoop.mapreduce.lib.input.
+ LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
+ assertTrue("unexpected firstSplitLength:" + firstSplitLength,
+ testFileSize < firstSplitLength);
+ String delimiter = conf.get("textinputformat.record.delimiter");
+ byte[] recordDelimiterBytes = null;
+ if (null != delimiter) {
+ recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
+ }
+ // read the data without splitting to count the records
+ FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
+ (String[])null);
+ LineRecordReader reader = new LineRecordReader(conf, split,
+ recordDelimiterBytes);
+ LongWritable key = new LongWritable();
+ Text value = new Text();
+ int numRecordsNoSplits = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsNoSplits;
+ }
+ reader.close();
+
+ // count the records in the first split
+ split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null);
+ reader = new LineRecordReader(conf, split, recordDelimiterBytes);
+ int numRecordsFirstSplit = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsFirstSplit;
+ }
+ reader.close();
+ assertEquals("Unexpected number of records in split",
+ numRecordsNoSplits, numRecordsFirstSplit);
+ }
+
@Test
public void testBzip2SplitEndsAtCR() throws IOException {
// the test data contains a carriage-return at the end of the first
@@ -332,6 +369,22 @@ public class TestLineRecordReader {
}
@Test
+ public void testUncompressedInputWithLargeSplitSize() throws Exception {
+ Configuration conf = new Configuration();
+ // single char delimiter
+ String inputData = "abcde +fghij+ klmno+pqrst+uvwxyz";
+ Path inputFile = createInputFile(conf, inputData);
+ conf.set("textinputformat.record.delimiter", "+");
+ // split size over max value of integer
+ long longSplitSize = (long)Integer.MAX_VALUE + 1;
+ for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
+ conf.setInt("io.file.buffer.size", bufferSize);
+ testLargeSplitRecordForFile(conf, longSplitSize, inputData.length(),
+ inputFile);
+ }
+ }
+
+ @Test
public void testUncompressedInput() throws Exception {
Configuration conf = new Configuration();
// single char delimiter, best case
[5/5] hadoop git commit: MAPREDUCE-6635. Unsafe long to int
conversion in UncompressedSplitLineReader and IndexOutOfBoundsException.
Contributed by Junping Du.
Posted by vv...@apache.org.
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader and IndexOutOfBoundsException. Contributed by Junping Du.
(cherry picked from commit c6f2d761d5430eac6b9f07f137a7028de4e0660c)
(cherry picked from commit f1999fe2754cbf11b138fb048c7486cab9b02c97)
Conflicts:
hadoop-mapreduce-project/CHANGES.txt
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0edc7641
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0edc7641
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0edc7641
Branch: refs/heads/branch-2.7
Commit: 0edc7641842e535976b7fab40abf8d3d293cabe9
Parents: 33b961e
Author: Varun Vasudev <vv...@apache.org>
Authored: Tue Feb 23 13:05:18 2016 +0530
Committer: Varun Vasudev <vv...@apache.org>
Committed: Tue Feb 23 14:45:07 2016 +0530
----------------------------------------------------------------------
hadoop-mapreduce-project/CHANGES.txt | 6 +++
.../lib/input/UncompressedSplitLineReader.java | 7 ++-
.../hadoop/mapred/TestLineRecordReader.java | 53 ++++++++++++++++++++
3 files changed, 64 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0edc7641/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 6f90ef5..ae855ca 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -52,6 +52,9 @@ Release 2.7.3 - UNRELEASED
MAPREDUCE-6460. TestRMContainerAllocator.
testAttemptNotFoundCausesRMCommunicatorException fails. (Zhihai Xu)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.7.2 - 2016-01-25
INCOMPATIBLE CHANGES
@@ -342,6 +345,9 @@ Release 2.6.5 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang)
+ MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
+ and IndexOutOfBoundsException. (Junping Du via vvasudev)
+
Release 2.6.4 - 2016-02-11
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0edc7641/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
index 6d495ef..bda0218 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/UncompressedSplitLineReader.java
@@ -53,8 +53,11 @@ public class UncompressedSplitLineReader extends SplitLineReader {
throws IOException {
int maxBytesToRead = buffer.length;
if (totalBytesRead < splitLength) {
- maxBytesToRead = Math.min(maxBytesToRead,
- (int)(splitLength - totalBytesRead));
+ long leftBytesForSplit = splitLength - totalBytesRead;
+ // check if leftBytesForSplit exceed Integer.MAX_VALUE
+ if (leftBytesForSplit <= Integer.MAX_VALUE) {
+ maxBytesToRead = Math.min(maxBytesToRead, (int)leftBytesForSplit);
+ }
}
int bytesRead = in.read(buffer, 0, maxBytesToRead);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/0edc7641/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
index f9d0335..986a2b2 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
@@ -110,6 +110,43 @@ public class TestLineRecordReader {
numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
}
+ private void testLargeSplitRecordForFile(Configuration conf,
+ long firstSplitLength, long testFileSize, Path testFilePath)
+ throws IOException {
+ conf.setInt(org.apache.hadoop.mapreduce.lib.input.
+ LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
+ assertTrue("unexpected firstSplitLength:" + firstSplitLength,
+ testFileSize < firstSplitLength);
+ String delimiter = conf.get("textinputformat.record.delimiter");
+ byte[] recordDelimiterBytes = null;
+ if (null != delimiter) {
+ recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
+ }
+ // read the data without splitting to count the records
+ FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
+ (String[])null);
+ LineRecordReader reader = new LineRecordReader(conf, split,
+ recordDelimiterBytes);
+ LongWritable key = new LongWritable();
+ Text value = new Text();
+ int numRecordsNoSplits = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsNoSplits;
+ }
+ reader.close();
+
+ // count the records in the first split
+ split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null);
+ reader = new LineRecordReader(conf, split, recordDelimiterBytes);
+ int numRecordsFirstSplit = 0;
+ while (reader.next(key, value)) {
+ ++numRecordsFirstSplit;
+ }
+ reader.close();
+ assertEquals("Unexpected number of records in split",
+ numRecordsNoSplits, numRecordsFirstSplit);
+ }
+
@Test
public void testBzip2SplitEndsAtCR() throws IOException {
// the test data contains a carriage-return at the end of the first
@@ -325,6 +362,22 @@ public class TestLineRecordReader {
}
@Test
+ public void testUncompressedInputWithLargeSplitSize() throws Exception {
+ Configuration conf = new Configuration();
+ // single char delimiter
+ String inputData = "abcde +fghij+ klmno+pqrst+uvwxyz";
+ Path inputFile = createInputFile(conf, inputData);
+ conf.set("textinputformat.record.delimiter", "+");
+ // split size over max value of integer
+ long longSplitSize = (long)Integer.MAX_VALUE + 1;
+ for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
+ conf.setInt("io.file.buffer.size", bufferSize);
+ testLargeSplitRecordForFile(conf, longSplitSize, inputData.length(),
+ inputFile);
+ }
+ }
+
+ @Test
public void testUncompressedInput() throws Exception {
Configuration conf = new Configuration();
// single char delimiter, best case