You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xy...@apache.org on 2018/02/26 22:31:48 UTC
[39/59] [abbrv] hadoop git commit: HADOOP-6852. apparent bug in
concatenated-bzip2 support (decoding). Contributed by Zsolt Venczel.
HADOOP-6852. apparent bug in concatenated-bzip2 support (decoding). Contributed by Zsolt Venczel.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2bc3351e
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2bc3351e
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2bc3351e
Branch: refs/heads/HDFS-7240
Commit: 2bc3351eaf240ea685bcf5042d79f1554bf89e00
Parents: 92cbbfe
Author: Sean Mackrory <ma...@apache.org>
Authored: Wed Feb 21 12:53:18 2018 -0700
Committer: Sean Mackrory <ma...@apache.org>
Committed: Wed Feb 21 12:57:14 2018 -0700
----------------------------------------------------------------------
.../hadoop-client-minicluster/pom.xml | 1 +
.../apache/hadoop/io/compress/BZip2Codec.java | 3 +-
.../mapred/TestConcatenatedCompressedInput.java | 84 +++++++++----------
.../src/test/resources/testdata/concat.bz2 | Bin 0 -> 208 bytes
.../src/test/resources/testdata/concat.gz | Bin 0 -> 148 bytes
.../testdata/testCompressThenConcat.txt.bz2 | Bin 0 -> 3056 bytes
.../testdata/testCompressThenConcat.txt.gz | Bin 0 -> 3413 bytes
.../testdata/testConcatThenCompress.txt.bz2 | Bin 0 -> 2567 bytes
.../testdata/testConcatThenCompress.txt.gz | Bin 0 -> 2734 bytes
9 files changed, 42 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-client-modules/hadoop-client-minicluster/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
index 905d53a..a443648 100644
--- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml
+++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
@@ -615,6 +615,7 @@
<excludes>
<exclude>testjar/*</exclude>
<exclude>testshell/*</exclude>
+ <exclude>testdata/*</exclude>
</excludes>
</filter>
<!-- Mockito tries to include its own unrelocated copy of hamcrest. :( -->
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
index 3c78cfc..99590ed 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
@@ -180,7 +180,8 @@ public class BZip2Codec implements Configurable, SplittableCompressionCodec {
new DecompressorStream(in, decompressor,
conf.getInt(IO_FILE_BUFFER_SIZE_KEY,
IO_FILE_BUFFER_SIZE_DEFAULT)) :
- new BZip2CompressionInputStream(in);
+ new BZip2CompressionInputStream(
+ in, 0L, Long.MAX_VALUE, READ_MODE.BYBLOCK);
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java
index 977d083..af6b952 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java
@@ -18,18 +18,6 @@
package org.apache.hadoop.mapred;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.ByteArrayInputStream;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.zip.Inflater;
-
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
@@ -42,16 +30,26 @@ import org.apache.hadoop.io.compress.zlib.ZlibFactory;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.After;
-import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-@Ignore
+import java.io.ByteArrayInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.Inflater;
+
+import static org.junit.Assert.*;
+
+/**
+ * Test class for concatenated {@link CompressionInputStream}.
+ */
public class TestConcatenatedCompressedInput {
private static final Logger LOG =
LoggerFactory.getLogger(TestConcatenatedCompressedInput.class);
- private static int MAX_LENGTH = 10000;
private static JobConf defaultConf = new JobConf();
private static FileSystem localFs = null;
@@ -85,13 +83,15 @@ public class TestConcatenatedCompressedInput {
public void after() {
ZlibFactory.loadNativeZLib();
}
+
+ private static final String DEFAULT_WORK_DIR = "target/test-classes/testdata";
private static Path workDir = localFs.makeQualified(new Path(
- System.getProperty("test.build.data", "/tmp"),
+ System.getProperty("test.build.data", DEFAULT_WORK_DIR),
"TestConcatenatedCompressedInput"));
private static LineReader makeStream(String str) throws IOException {
- return new LineReader(new ByteArrayInputStream(str.getBytes("UTF-8")),
- defaultConf);
+ return new LineReader(new ByteArrayInputStream(
+ str.getBytes("UTF-8")), defaultConf);
}
private static void writeFile(FileSystem fs, Path name,
@@ -190,7 +190,8 @@ public class TestConcatenatedCompressedInput {
// copy prebuilt (correct!) version of concat.gz to HDFS
final String fn = "concat" + gzip.getDefaultExtension();
- Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn);
+ Path fnLocal = new Path(
+ System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn);
Path fnHDFS = new Path(workDir, fn);
localFs.copyFromLocalFile(fnLocal, fnHDFS);
@@ -227,7 +228,7 @@ public class TestConcatenatedCompressedInput {
@Test
public void testPrototypeInflaterGzip() throws IOException {
CompressionCodec gzip = new GzipCodec(); // used only for file extension
- localFs.delete(workDir, true); // localFs = FileSystem instance
+ localFs.delete(workDir, true); // localFs = FileSystem instance
System.out.println(COLOR_BR_BLUE + "testPrototypeInflaterGzip() using " +
"non-native/Java Inflater and manual gzip header/trailer parsing" +
@@ -235,7 +236,8 @@ public class TestConcatenatedCompressedInput {
// copy prebuilt (correct!) version of concat.gz to HDFS
final String fn = "concat" + gzip.getDefaultExtension();
- Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn);
+ Path fnLocal = new Path(
+ System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn);
Path fnHDFS = new Path(workDir, fn);
localFs.copyFromLocalFile(fnLocal, fnHDFS);
@@ -326,14 +328,16 @@ public class TestConcatenatedCompressedInput {
// copy single-member test file to HDFS
String fn1 = "testConcatThenCompress.txt" + gzip.getDefaultExtension();
- Path fnLocal1 = new Path(System.getProperty("test.concat.data","/tmp"),fn1);
+ Path fnLocal1 = new Path(
+ System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn1);
Path fnHDFS1 = new Path(workDir, fn1);
localFs.copyFromLocalFile(fnLocal1, fnHDFS1);
// copy multiple-member test file to HDFS
// (actually in "seekable gzip" format, a la JIRA PIG-42)
String fn2 = "testCompressThenConcat.txt" + gzip.getDefaultExtension();
- Path fnLocal2 = new Path(System.getProperty("test.concat.data","/tmp"),fn2);
+ Path fnLocal2 = new Path(
+ System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn2);
Path fnHDFS2 = new Path(workDir, fn2);
localFs.copyFromLocalFile(fnLocal2, fnHDFS2);
@@ -439,7 +443,8 @@ public class TestConcatenatedCompressedInput {
InputSplit[] splits = format.getSplits(jConf, 100);
assertEquals("compressed splits == 2", 2, splits.length);
FileSplit tmp = (FileSplit) splits[0];
- if (tmp.getPath().getName().equals("testCompressThenConcat.txt.gz")) {
+ if (tmp.getPath()
+ .getName().equals("testdata/testCompressThenConcat.txt.gz")) {
System.out.println(" (swapping)");
splits[0] = splits[1];
splits[1] = tmp;
@@ -481,7 +486,8 @@ public class TestConcatenatedCompressedInput {
// copy prebuilt (correct!) version of concat.bz2 to HDFS
final String fn = "concat" + bzip2.getDefaultExtension();
- Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn);
+ Path fnLocal = new Path(
+ System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn);
Path fnHDFS = new Path(workDir, fn);
localFs.copyFromLocalFile(fnLocal, fnHDFS);
@@ -531,13 +537,15 @@ public class TestConcatenatedCompressedInput {
// copy single-member test file to HDFS
String fn1 = "testConcatThenCompress.txt" + bzip2.getDefaultExtension();
- Path fnLocal1 = new Path(System.getProperty("test.concat.data","/tmp"),fn1);
+ Path fnLocal1 = new Path(
+ System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn1);
Path fnHDFS1 = new Path(workDir, fn1);
localFs.copyFromLocalFile(fnLocal1, fnHDFS1);
// copy multiple-member test file to HDFS
String fn2 = "testCompressThenConcat.txt" + bzip2.getDefaultExtension();
- Path fnLocal2 = new Path(System.getProperty("test.concat.data","/tmp"),fn2);
+ Path fnLocal2 = new Path(
+ System.getProperty("test.concat.data", DEFAULT_WORK_DIR), fn2);
Path fnHDFS2 = new Path(workDir, fn2);
localFs.copyFromLocalFile(fnLocal2, fnHDFS2);
@@ -549,21 +557,6 @@ public class TestConcatenatedCompressedInput {
assertEquals("concat bytes available", 2567, in1.available());
assertEquals("concat bytes available", 3056, in2.available());
-/*
- // FIXME
- // The while-loop below dies at the beginning of the 2nd concatenated
- // member (after 17 lines successfully read) with:
- //
- // java.io.IOException: bad block header
- // at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock(
- // CBZip2InputStream.java:527)
- //
- // It is not critical to concatenated-gzip support, HADOOP-6835, so it's
- // simply commented out for now (and HADOOP-6852 filed). If and when the
- // latter issue is resolved--perhaps by fixing an error here--this code
- // should be reenabled. Note that the doMultipleBzip2BufferSizes() test
- // below uses the same testCompressThenConcat.txt.bz2 file but works fine.
-
CompressionInputStream cin2 = bzip2.createInputStream(in2);
LineReader in = new LineReader(cin2);
Text out = new Text();
@@ -578,7 +571,6 @@ public class TestConcatenatedCompressedInput {
5346, totalBytes);
assertEquals("total uncompressed lines in concatenated test file",
84, lineNum);
- */
// test CBZip2InputStream with lots of different input-buffer sizes
doMultipleBzip2BufferSizes(jobConf);
@@ -645,7 +637,8 @@ public class TestConcatenatedCompressedInput {
// this tests both files (testCompressThenConcat, testConcatThenCompress); all
// should work with existing Java bzip2 decoder and any future native version
- private static void doSingleBzip2BufferSize(JobConf jConf) throws IOException {
+ private static void doSingleBzip2BufferSize(JobConf jConf)
+ throws IOException {
TextInputFormat format = new TextInputFormat();
format.configure(jConf);
format.setMinSplitSize(5500); // work around 256-byte/22-splits issue
@@ -654,7 +647,8 @@ public class TestConcatenatedCompressedInput {
InputSplit[] splits = format.getSplits(jConf, 100);
assertEquals("compressed splits == 2", 2, splits.length);
FileSplit tmp = (FileSplit) splits[0];
- if (tmp.getPath().getName().equals("testCompressThenConcat.txt.gz")) {
+ if (tmp.getPath()
+ .getName().equals("testdata/testCompressThenConcat.txt.gz")) {
System.out.println(" (swapping)");
splits[0] = splits[1];
splits[1] = tmp;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.bz2
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.bz2 b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.bz2
new file mode 100644
index 0000000..f31fb0c
Binary files /dev/null and b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.bz2 differ
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.gz
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.gz b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.gz
new file mode 100644
index 0000000..53d5a07
Binary files /dev/null and b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/concat.gz differ
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.bz2
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.bz2 b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.bz2
new file mode 100644
index 0000000..a21c0e2
Binary files /dev/null and b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.bz2 differ
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.gz
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.gz b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.gz
new file mode 100644
index 0000000..75e5f8c
Binary files /dev/null and b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testCompressThenConcat.txt.gz differ
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.bz2
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.bz2 b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.bz2
new file mode 100644
index 0000000..5983e52
Binary files /dev/null and b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.bz2 differ
http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.gz
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.gz b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.gz
new file mode 100644
index 0000000..6e8eaa5
Binary files /dev/null and b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/resources/testdata/testConcatThenCompress.txt.gz differ
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org