You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jl...@apache.org on 2015/01/06 22:21:25 UTC
hadoop git commit: HADOOP-11445. Bzip2Codec: Data block is skipped
when position of newly created stream is equal to start of split. Contributed
by Ankit Kamboj (cherry picked from commit
d02fb53750bc592c23ba470ae82eb6f47d9a00ec)
Repository: hadoop
Updated Branches:
refs/heads/branch-2 53ecb6358 -> 2b408d8dc
HADOOP-11445. Bzip2Codec: Data block is skipped when position of newly created stream is equal to start of split. Contributed by Ankit Kamboj
(cherry picked from commit d02fb53750bc592c23ba470ae82eb6f47d9a00ec)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2b408d8d
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2b408d8d
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2b408d8d
Branch: refs/heads/branch-2
Commit: 2b408d8dc70a9042e6185a5573a3b5f37d2c91cd
Parents: 53ecb63
Author: Jason Lowe <jl...@apache.org>
Authored: Tue Jan 6 21:19:10 2015 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Tue Jan 6 21:20:31 2015 +0000
----------------------------------------------------------------------
hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++
.../apache/hadoop/io/compress/BZip2Codec.java | 2 +-
.../hadoop/mapred/TestLineRecordReader.java | 21 ++++++++++++++++++++
3 files changed, 25 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2b408d8d/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index f0a1aae..e747ea3 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -322,6 +322,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11459. Fix recent findbugs in ActiveStandbyElector, NetUtils
and ShellBasedIdMapping (vinayakumarb)
+ HADOOP-11445. Bzip2Codec: Data block is skipped when position of newly
+ created stream is equal to start of split (Ankit Kamboj via jlowe)
+
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2b408d8d/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
index 91178ec..2c5a7be 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
@@ -225,7 +225,7 @@ public class BZip2Codec implements Configurable, SplittableCompressionCodec {
// ........................................^^[We align at wrong position!]
// ...........................................................^^[While this pos is correct]
- if (in.getPos() <= start) {
+ if (in.getPos() < start) {
((Seekable)seekableIn).seek(start);
in = new BZip2CompressionInputStream(seekableIn, start, end, readMode);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2b408d8d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
index a7a87c9..4c94e59 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java
@@ -106,6 +106,27 @@ public class TestLineRecordReader {
testSplitRecords("blockEndingInCRThenLF.txt.bz2", 136498);
}
+ //This test ensures record reader doesn't lose records when it starts
+ //exactly at the starting byte of a bz2 compressed block
+ @Test
+ public void testBzip2SplitStartAtBlockMarker() throws IOException {
+ //136504 in blockEndingInCR.txt.bz2 is the byte at which the bz2 block ends
+ //In the following test cases record readers should iterate over all the records
+ //and should not miss any record.
+
+ //Start next split at just the start of the block.
+ testSplitRecords("blockEndingInCR.txt.bz2", 136504);
+
+ //Start next split a byte forward in next block.
+ testSplitRecords("blockEndingInCR.txt.bz2", 136505);
+
+ //Start next split 3 bytes forward in next block.
+ testSplitRecords("blockEndingInCR.txt.bz2", 136508);
+
+ //Start next split 10 bytes from behind the end marker.
+ testSplitRecords("blockEndingInCR.txt.bz2", 136494);
+ }
+
// Use the LineRecordReader to read records from the file
public ArrayList<String> readRecords(URL testFileUrl, int splitSize)
throws IOException {