You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2020/01/01 17:11:34 UTC
[commons-compress] 02/06: COMPRESS-124 : add testcases for
extracting sparse
This is an automated email from the ASF dual-hosted git repository.
bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit 50569e5bfb1526c54acf0abe5a6e4d5463c5a4bd
Author: Lee <pe...@gmail.com>
AuthorDate: Fri Nov 22 17:24:10 2019 +0800
COMPRESS-124 : add testcases for extracting sparse
---
.../compress/archivers/tar/TarArchiveEntry.java | 24 +--
.../archivers/tar/TarArchiveInputStream.java | 237 ++++++++++++++++++---
.../archivers/tar/TarArchiveSparseEntry.java | 2 +
.../archivers/tar/TarArchiveSparseInputStream.java | 222 -------------------
.../archivers/tar/TarArchiveStructSparse.java | 24 +++
.../compress/archivers/tar/SparseFilesTest.java | 205 +++++++++++++++++-
.../compress/archivers/tar/TarUtilsTest.java | 12 ++
src/test/resources/oldgnu_extended_sparse.tar | Bin 0 -> 10240 bytes
8 files changed, 452 insertions(+), 274 deletions(-)
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
index 9012a3c..2aac73e 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
@@ -852,10 +852,14 @@ public class TarArchiveEntry implements ArchiveEntry, TarConstants {
/**
* Get this entry's real file size in case of a sparse file.
+ * If the file is not a sparse file, return size instead of realSize.
*
- * @return This entry's real file size.
+ * @return This entry's real file size, if the file is not a sparse file, return size instead of realSize.
*/
public long getRealSize() {
+ if (!isSparse()) {
+ return size;
+ }
return realSize;
}
@@ -1077,16 +1081,13 @@ public class TarArchiveEntry implements ArchiveEntry, TarConstants {
/**
* Update the entry using a map of pax headers.
* @param headers
- * @param sparseHeaders for 0.0 PAX Format, the sparse headers may appear more than 1 time in headers map,
-* * this means it can not be read from a map, therefore the sparse headers have already
-* * been parsed to a list and was passed through parameter sparseHeaders
* @since 1.15
*/
- void updateEntryFromPaxHeaders(Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) {
+ void updateEntryFromPaxHeaders(Map<String, String> headers) {
for (final Map.Entry<String, String> ent : headers.entrySet()) {
final String key = ent.getKey();
final String val = ent.getValue();
- processPaxHeader(key, val, headers, sparseHeaders);
+ processPaxHeader(key, val, headers);
}
}
@@ -1101,10 +1102,6 @@ public class TarArchiveEntry implements ArchiveEntry, TarConstants {
processPaxHeader(key,val,extraPaxHeaders);
}
- private void processPaxHeader(String key, String val, Map<String, String> headers) {
- processPaxHeader(key, val, headers, null);
- }
-
/**
* Process one pax header, using the supplied map as source for extra headers to be used when handling
* entries for sparse files
@@ -1112,13 +1109,9 @@ public class TarArchiveEntry implements ArchiveEntry, TarConstants {
* @param key the header name.
* @param val the header value.
* @param headers map of headers used for dealing with sparse file.
- * @param sparseHeaders for 0.0 PAX Format, the sparse headers may appear more than 1 time in headers map,
- * this means it can not be read from a map, therefore the sparse headers have already
- * been parsed to a list and was passed through parameter sparseHeaders
* @since 1.15
*/
- private void processPaxHeader(String key, String val, Map<String, String> headers,
- final List<TarArchiveStructSparse> sparseHeaders) {
+ private void processPaxHeader(String key, String val, Map<String, String> headers) {
/*
* The following headers are defined for Pax.
* atime, ctime, charset: cannot use these without changing TarArchiveEntry fields
@@ -1172,7 +1165,6 @@ public class TarArchiveEntry implements ArchiveEntry, TarConstants {
break;
case "GNU.sparse.size":
fillGNUSparse0xData(headers);
- this.sparseHeaders = sparseHeaders;
break;
case "GNU.sparse.realsize":
fillGNUSparse1xData(headers);
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index 9194db8..72b6653 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -26,16 +26,14 @@ package org.apache.commons.compress.archivers.tar;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipEncoding;
import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
import org.apache.commons.compress.utils.ArchiveUtils;
+import org.apache.commons.compress.utils.BoundedInputStream;
import org.apache.commons.compress.utils.CharsetNames;
import org.apache.commons.compress.utils.IOUtils;
@@ -70,7 +68,12 @@ public class TarArchiveInputStream extends ArchiveInputStream {
/** An input stream to read from */
private final InputStream is;
- /** An input stream to read sparse file */
+ /** Input streams for reading sparse entries **/
+ private List<InputStream> sparseInputStreams;
+
+ /** the index of current input stream being read when reading sparse entries */
+ private int currentSparseInputStreamIndex;
+
private InputStream sparseInputStream;
/** The meta-data about the current entry */
@@ -86,7 +89,7 @@ public class TarArchiveInputStream extends ArchiveInputStream {
private Map<String, String> globalPaxHeaders = new HashMap<>();
// the global sparse headers, this is only used in PAX Format 0.X
- private List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
+ private final List<TarArchiveStructSparse> globalSparseHeaders = new ArrayList<>();
private final boolean lenient;
@@ -193,6 +196,13 @@ public class TarArchiveInputStream extends ArchiveInputStream {
*/
@Override
public void close() throws IOException {
+ // Close all the input streams in sparseInputStreams
+ if(sparseInputStreams != null) {
+ for (InputStream inputStream : sparseInputStreams) {
+ inputStream.close();
+ }
+ }
+
is.close();
}
@@ -223,16 +233,10 @@ public class TarArchiveInputStream extends ArchiveInputStream {
return 0;
}
- // for sparse entries, there are actually currEntry.getRealSize() bytes to read
- long entryActualSize = entrySize;
- if (currEntry.isSparse()) {
- entryActualSize = currEntry.getRealSize();
- }
-
- if (entryActualSize - entryOffset > Integer.MAX_VALUE) {
+ if (currEntry.getRealSize() - entryOffset > Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
}
- return (int) (entryActualSize - entryOffset);
+ return (int) (currEntry.getRealSize() - entryOffset);
}
@@ -258,15 +262,12 @@ public class TarArchiveInputStream extends ArchiveInputStream {
return 0;
}
- long available;
+ long available = currEntry.getRealSize() - entryOffset;
long skipped;
if(!currEntry.isSparse()) {
- available = entrySize - entryOffset;
skipped = IOUtils.skip(is, Math.min(n, available));
} else {
- // for sparse entries, there are actually currEntry.getRealSize() bytes to read
- available = currEntry.getRealSize() - entryOffset;
- skipped = IOUtils.skip(sparseInputStream, Math.min(n, available));
+ skipped = skipSparse(n);
}
count(skipped);
entryOffset += skipped;
@@ -274,6 +275,35 @@ public class TarArchiveInputStream extends ArchiveInputStream {
}
/**
+ * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip,
+ * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped
+ * or the input streams are all skipped
+ *
+ * @param n bytes of data to skip
+ * @return actual bytes of data skipped
+ * @throws IOException
+ */
+ private long skipSparse(final long n) throws IOException {
+ if (sparseInputStreams.size() == 0) {
+ return is.skip(n);
+ }
+
+ long bytesSkipped = 0;
+ InputStream currentInputStream;
+
+ while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
+ currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
+ bytesSkipped += currentInputStream.skip(n - bytesSkipped);
+
+ if (bytesSkipped < n) {
+ currentSparseInputStreamIndex++;
+ }
+ }
+
+ return bytesSkipped;
+ }
+
+ /**
* Since we do not support marking just yet, we return false.
*
* @return False.
@@ -536,8 +566,8 @@ public class TarArchiveInputStream extends ArchiveInputStream {
}
// sparse headers are all done reading, we need to build
- // a new input stream using these sparse headers
- sparseInputStream = new TarArchiveSparseInputStream(sparseHeaders, is);
+ // sparse input streams using these sparse headers
+ buildSparseInputStreams();
}
/**
@@ -717,8 +747,8 @@ public class TarArchiveInputStream extends ArchiveInputStream {
}
private void applyPaxHeadersToCurrentEntry(final Map<String, String> headers, final List<TarArchiveStructSparse> sparseHeaders) {
- currEntry.updateEntryFromPaxHeaders(headers, sparseHeaders);
-
+ currEntry.updateEntryFromPaxHeaders(headers);
+ currEntry.setSparseHeaders(sparseHeaders);
}
/**
@@ -742,8 +772,8 @@ public class TarArchiveInputStream extends ArchiveInputStream {
}
// sparse headers are all done reading, we need to build
- // a new input stream using these sparse headers
- sparseInputStream = new TarArchiveSparseInputStream(currEntry.getSparseHeaders(), is);
+ // sparse input streams using these sparse headers
+ buildSparseInputStreams();
}
private boolean isDirectory() {
@@ -816,19 +846,22 @@ public class TarArchiveInputStream extends ArchiveInputStream {
throw new IllegalStateException("No current tar entry");
}
- if(!currEntry.isSparse() && entryOffset >= entrySize) {
- return -1;
- }
-
- // for sparse entries, there are actually currEntry.getRealSize() bytes to read
- if(currEntry.isSparse() && entryOffset >= currEntry.getRealSize()) {
- return -1;
+ if (!currEntry.isSparse()) {
+ if (entryOffset >= entrySize) {
+ return -1;
+ }
+ } else {
+ // for sparse entries, there are actually currEntry.getRealSize() bytes to read
+ if (entryOffset >= currEntry.getRealSize()) {
+ return -1;
+ }
}
numToRead = Math.min(numToRead, available());
- if(currEntry.isSparse()) {
- totalRead = sparseInputStream.read(buf, offset, numToRead);
+ if (currEntry.isSparse()) {
+ // for sparse entries, we need to read them in another way
+ totalRead = readSparse(buf, offset, numToRead);
} else {
totalRead = is.read(buf, offset, numToRead);
}
@@ -847,6 +880,61 @@ public class TarArchiveInputStream extends ArchiveInputStream {
}
/**
+ * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is
+ * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the
+ * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the
+ * non-zero data block.
+ * When reading sparse entries, the actual data is read out with "holes" and non-zero data combined together
+ * according to the sparse headers.
+ *
+ * @param buf The buffer into which to place bytes read.
+ * @param offset The offset at which to place bytes read.
+ * @param numToRead The number of bytes to read.
+ * @return The number of bytes read, or -1 at EOF.
+ * @throws IOException on error
+ */
+ private int readSparse(final byte[] buf, final int offset, int numToRead) throws IOException {
+ // if there are no actual input streams, just read from the original input stream
+ if (sparseInputStreams.size() == 0) {
+ return is.read(buf, offset, numToRead);
+ }
+
+ if(currentSparseInputStreamIndex >= sparseInputStreams.size()) {
+ return -1;
+ }
+
+ InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
+ int readLen = currentInputStream.read(buf, offset, numToRead);
+
+ // if the current input stream is the last input stream,
+ // just return the number of bytes read from current input stream
+ if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
+ return readLen;
+ }
+
+ // if EOF of current input stream is meet, open a new input stream and recursively call read
+ if (readLen == -1) {
+ currentSparseInputStreamIndex++;
+ return readSparse(buf, offset, numToRead);
+ }
+
+ // if the rest data of current input stream is not long enough, open a new input stream
+ // and recursively call read
+ if (readLen < numToRead) {
+ currentSparseInputStreamIndex++;
+ int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
+ if (readLenOfNext == -1) {
+ return readLen;
+ }
+
+ return readLen + readLenOfNext;
+ }
+
+ // if the rest data of current input stream is enough(which means readLen == len), just return readLen
+ return readLen;
+ }
+
+ /**
* Whether this class is able to read the given entry.
*
* <p>May return false if the current entry is a sparse file.</p>
@@ -937,4 +1025,85 @@ public class TarArchiveInputStream extends ArchiveInputStream {
signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN);
}
+ /**
+ * Build the input streams consisting of all-zero input streams and non-zero input streams.
+ * When reading from the non-zero input streams, the data is actually read from the original input stream.
+ * The size of each input stream is introduced by the sparse headers.
+ *
+ * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
+ * 0 size input streams because they are meaningless.
+ */
+ private void buildSparseInputStreams() throws IOException {
+ currentSparseInputStreamIndex = -1;
+ sparseInputStreams = new ArrayList<>();
+ InputStream zeroInputStream = new TarArchiveSparseZeroInputStream();
+
+ long offset = 0;
+ List<TarArchiveStructSparse> sparseHeaders = currEntry.getSparseHeaders();
+ // sort the sparse headers in case they are written in wrong order
+ if (sparseHeaders != null && sparseHeaders.size() > 1) {
+ final Comparator<TarArchiveStructSparse> sparseHeaderComparator = new Comparator<TarArchiveStructSparse>() {
+ @Override
+ public int compare(final TarArchiveStructSparse p, final TarArchiveStructSparse q) {
+ Long pOffset = p.getOffset();
+ Long qOffset = q.getOffset();
+ return pOffset.compareTo(qOffset);
+ }
+ };
+ Collections.sort(sparseHeaders, sparseHeaderComparator);
+ }
+
+ for (TarArchiveStructSparse sparseHeader : sparseHeaders) {
+ if (sparseHeader.getOffset() == 0 && sparseHeader.getNumbytes() == 0) {
+ break;
+ }
+
+ if ((sparseHeader.getOffset() - offset) < 0) {
+ throw new IOException("Corrupted struct sparse detected");
+ }
+
+ // only store the input streams with non-zero size
+ if ((sparseHeader.getOffset() - offset) > 0) {
+ sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset));
+ }
+
+ // only store the input streams with non-zero size
+ if (sparseHeader.getNumbytes() > 0) {
+ sparseInputStreams.add(new BoundedInputStream(is, sparseHeader.getNumbytes()));
+ }
+
+ offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
+ }
+
+ if (sparseInputStreams.size() > 0) {
+ currentSparseInputStreamIndex = 0;
+ }
+ }
+
+ /**
+ * This is an inputstream that always return 0,
+ * this is used when reading the "holes" of a sparse file
+ */
+ public class TarArchiveSparseZeroInputStream extends InputStream {
+ /**
+ * Just return 0
+ * @return
+ * @throws IOException
+ */
+ @Override
+ public int read() throws IOException {
+ return 0;
+ }
+
+ /**
+ * these's nothing need to do when skipping
+ *
+ * @param n bytes to skip
+ * @return bytes actually skipped
+ */
+ @Override
+ public long skip(final long n) {
+ return n;
+ }
+ }
}
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
index 7a8e2ee..5599e06 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
@@ -19,6 +19,7 @@
package org.apache.commons.compress.archivers.tar;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
/**
@@ -56,6 +57,7 @@ public class TarArchiveSparseEntry implements TarConstants {
*/
public TarArchiveSparseEntry(final byte[] headerBuf) throws IOException {
int offset = 0;
+ sparseHeaders = new ArrayList<>();
for(int i = 0; i < SPARSE_HEADERS_IN_EXTENSION_HEADER;i++) {
TarArchiveStructSparse sparseHeader = TarUtils.parseSparse(headerBuf,
offset + i * (SPARSE_OFFSET_LEN + SPARSE_NUMBYTES_LEN));
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseInputStream.java
deleted file mode 100644
index 7dd609b..0000000
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseInputStream.java
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.commons.compress.archivers.tar;
-
-import org.apache.commons.compress.utils.BoundedInputStream;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is
- * stored in tar files, and they are stored separately. The structure of non-zero data is introduced by the
- * sparse headers using the offset, where a block of non-zero data starts, and numbytes, the length of the
- * non-zero data block.
- * This class is used to construct an input stream that combines the "holes" and the non-zero data together using
- * the sparse headers. When reading from this input stream, the actual data is read out with "holes" and non-zero
- * data combined together according to the sparse headers.
- */
-public class TarArchiveSparseInputStream extends InputStream {
- /** the sparse headers describing the sparse information */
- private List<TarArchiveStructSparse> sparseHeaders;
-
- /** the input stream of the tar file */
- private InputStream inputStream;
-
- /** the input streams consisting of all-zero input streams and non-zero streams */
- private List<InputStream> inputStreams;
-
- /** the index of current input stream being read */
- private int currentInputStreamIndex = -1;
-
- public TarArchiveSparseInputStream(List<TarArchiveStructSparse> sparseHeaders, InputStream inputStream) {
- this.sparseHeaders = sparseHeaders;
- this.inputStream = inputStream;
- buildInputStreams();
-
- if (inputStreams.size() > 0) {
- currentInputStreamIndex = 0;
- }
- }
-
- @Override
- public int read() throws IOException {
- // if there are no actual input streams, just read from the original input stream
- if (inputStreams.size() == 0) {
- return inputStream.read();
- }
-
- int value = inputStreams.get(currentInputStreamIndex).read();
- if (value != -1) {
- return value;
- }
-
- if (currentInputStreamIndex == inputStreams.size() - 1) {
- return -1;
- }
-
- currentInputStreamIndex++;
- return inputStreams.get(currentInputStreamIndex).read();
- }
-
- @Override
- public int read(byte[] buf) throws IOException {
- return read(buf, 0, buf.length);
- }
-
- @Override
- public int read(byte[] buf, int offset, int len) throws IOException {
- // if there are no actual input streams, just read from the original input stream
- if (inputStreams.size() == 0) {
- return inputStream.read(buf, offset, len);
- }
-
- InputStream currentInputStream = inputStreams.get(currentInputStreamIndex);
- int readLen = currentInputStream.read(buf, offset, len);
-
- // if the current input stream is the last input stream,
- // just return the number of bytes read from current input stream
- if (currentInputStreamIndex == inputStreams.size() - 1) {
- return readLen;
- }
-
- // if EOF of current input stream is meet, open a new input stream and recursively call read
- if (readLen == -1) {
- currentInputStreamIndex++;
- return read(buf, offset, len);
- }
-
- // if the rest data of current input stream is not long enough, open a new input stream
- // and recursively call read
- if (readLen < len) {
- currentInputStreamIndex++;
- int readLenOfNext = read(buf, offset + readLen, len - readLen);
- if (readLenOfNext == -1) {
- return readLen;
- }
-
- return readLen + readLenOfNext;
- }
-
- // if the rest data of current input stream is enough(which means readLen == len), just return readLen
- return readLen;
- }
-
- /**
- * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip,
- * jump to the next input stream and skip the rest bytes, keep doing this until total n bytes are skipped
- * or the input streams are all skipped
- *
- * @param n bytes of data to skip
- * @return actual bytes of data skipped
- * @throws IOException
- */
- @Override
- public long skip(final long n) throws IOException {
- if (inputStreams.size() == 0) {
- return inputStream.skip(n);
- }
-
- long bytesSkipped = 0;
- InputStream currentInputStream;
-
- while (bytesSkipped < n && currentInputStreamIndex < inputStreams.size()) {
- currentInputStream = inputStreams.get(currentInputStreamIndex);
- bytesSkipped += currentInputStream.skip(n - bytesSkipped);
-
- if (bytesSkipped < n) {
- currentInputStreamIndex++;
- }
- }
-
- return bytesSkipped;
- }
-
- /**
- * Close all the input streams in inputStreams
- * @throws IOException
- */
- @Override
- public void close() throws IOException {
- for (InputStream inputStream : inputStreams) {
- inputStream.close();
- }
- }
-
- /**
- * Build the input streams consisting of all-zero input streams and non-zero input streams.
- * When reading from the non-zero input streams, the data is actually read from the original input stream.
- * The size of each input stream is introduced by the sparse headers.
- *
- * NOTE : Some all-zero input streams and non-zero input streams have the size of 0. We DO NOT store the
- * 0 size input streams because they are meaningless.
- */
- private void buildInputStreams() {
- inputStreams = new ArrayList<>();
- InputStream zeroInputStream = new TarArchiveSparseZeroInputStream();
-
- long offset = 0;
- for (TarArchiveStructSparse sparseHeader : sparseHeaders) {
- if (sparseHeader.getOffset() == 0 && sparseHeader.getNumbytes() == 0) {
- break;
- }
-
- // only store the input streams with non-zero size
- if ((sparseHeader.getOffset() - offset) > 0) {
- inputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset));
- }
-
- // only store the input streams with non-zero size
- if (sparseHeader.getNumbytes() > 0) {
- inputStreams.add(new BoundedInputStream(inputStream, sparseHeader.getNumbytes()));
- }
-
- offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
- }
- }
-
- /**
- * This is an inputstream that always return 0,
- * this is used when writing the holes of a sparse file
- */
- public class TarArchiveSparseZeroInputStream extends InputStream {
- /**
- * Just return 0
- * @return
- * @throws IOException
- */
- @Override
- public int read() throws IOException {
- return 0;
- }
-
- /**
- * these's nothing need to do when skipping
- *
- * @param n bytes to skip
- * @return bytes actually skipped
- */
- @Override
- public long skip(final long n) {
- return n;
- }
- }
-}
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveStructSparse.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveStructSparse.java
index ac08e68..8221a25 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveStructSparse.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveStructSparse.java
@@ -18,6 +18,8 @@
*/
package org.apache.commons.compress.archivers.tar;
+import java.util.Objects;
+
/**
* This class represents struct sparse in a Tar archive.
* <p>
@@ -39,6 +41,28 @@ public class TarArchiveStructSparse {
this.numbytes = numbytes;
}
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ TarArchiveStructSparse that = (TarArchiveStructSparse) o;
+ return offset == that.offset &&
+ numbytes == that.numbytes;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(offset, numbytes);
+ }
+
+ @Override
+ public String toString() {
+ return "TarArchiveStructSparse{" +
+ "offset=" + offset +
+ ", numbytes=" + numbytes +
+ '}';
+ }
+
public long getOffset() {
return offset;
}
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
index 56ac227..33b0c91 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
@@ -18,15 +18,24 @@
package org.apache.commons.compress.archivers.tar;
-import static org.apache.commons.compress.AbstractTestCase.getFile;
import static org.junit.Assert.*;
+
+import org.apache.commons.compress.AbstractTestCase;
+import org.junit.Assert;
import org.junit.Test;
+import shaded.org.apache.commons.io.IOUtils;
import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.Locale;
+
+public class SparseFilesTest extends AbstractTestCase {
-public class SparseFilesTest {
+ private final boolean isOnWindows = System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows");
@Test
public void testOldGNU() throws Throwable {
@@ -40,6 +49,18 @@ public class SparseFilesTest {
assertTrue(ae.isGNUSparse());
assertFalse(ae.isPaxGNUSparse());
assertFalse(tin.canReadEntryData(ae));
+
+ List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders();
+ assertEquals(3, sparseHeaders.size());
+
+ assertEquals(0, sparseHeaders.get(0).getOffset());
+ assertEquals(2048, sparseHeaders.get(0).getNumbytes());
+
+ assertEquals(1050624L, sparseHeaders.get(1).getOffset());
+ assertEquals(2560, sparseHeaders.get(1).getNumbytes());
+
+ assertEquals(3101184L, sparseHeaders.get(2).getOffset());
+ assertEquals(0, sparseHeaders.get(2).getNumbytes());
} finally {
if (tin != null) {
tin.close();
@@ -63,6 +84,159 @@ public class SparseFilesTest {
}
}
+ @Test
+ public void testExtractSparseTarsOnWindows() throws IOException {
+ if (!isOnWindows) {
+ return;
+ }
+
+ final File oldGNUSparseTar = getFile("oldgnu_sparse.tar");
+ final File paxGNUSparseTar = getFile("pax_gnu_sparse.tar");
+ TarArchiveInputStream oldGNUSparseInputStream = null;
+ TarArchiveInputStream paxGNUSparseInputStream = null;
+ try {
+ // compare between old GNU and PAX 0.0
+ oldGNUSparseInputStream = new TarArchiveInputStream(new FileInputStream(oldGNUSparseTar));
+ oldGNUSparseInputStream.getNextTarEntry();
+ paxGNUSparseInputStream = new TarArchiveInputStream(new FileInputStream(paxGNUSparseTar));
+ paxGNUSparseInputStream.getNextTarEntry();
+ Assert.assertTrue(IOUtils.contentEquals(oldGNUSparseInputStream, paxGNUSparseInputStream));
+
+ // compare between old GNU and PAX 0.1
+ oldGNUSparseInputStream.close();
+ oldGNUSparseInputStream = new TarArchiveInputStream(new FileInputStream(oldGNUSparseTar));
+ oldGNUSparseInputStream.getNextTarEntry();
+ paxGNUSparseInputStream.getNextTarEntry();
+ Assert.assertTrue(IOUtils.contentEquals(oldGNUSparseInputStream, paxGNUSparseInputStream));
+
+ // compare between old GNU and PAX 1.0
+ oldGNUSparseInputStream.close();
+ oldGNUSparseInputStream = new TarArchiveInputStream(new FileInputStream(oldGNUSparseTar));
+ oldGNUSparseInputStream.getNextTarEntry();
+ paxGNUSparseInputStream.getNextTarEntry();
+ Assert.assertTrue(IOUtils.contentEquals(oldGNUSparseInputStream, paxGNUSparseInputStream));
+ } finally {
+ if (oldGNUSparseInputStream != null) {
+ oldGNUSparseInputStream.close();
+ }
+
+ if (paxGNUSparseInputStream != null) {
+ paxGNUSparseInputStream.close();
+ }
+ }
+ }
+
+ @Test
+ public void testExtractOldGNU() throws IOException, InterruptedException {
+ if (isOnWindows) {
+ return;
+ }
+
+ final File file = getFile("oldgnu_sparse.tar");
+ InputStream sparseFileInputStream = null;
+ TarArchiveInputStream tin = null;
+ try {
+ sparseFileInputStream = extractTarAndGetInputStream(file, "sparsefile");
+ tin = new TarArchiveInputStream(new FileInputStream(file));
+ tin.getNextTarEntry();
+
+ Assert.assertTrue(IOUtils.contentEquals(tin, sparseFileInputStream));
+ } finally {
+ if (sparseFileInputStream != null) {
+ sparseFileInputStream.close();
+ }
+
+ if (tin != null) {
+ tin.close();
+ }
+ }
+ }
+
+ @Test
+ public void testExtractExtendedOldGNU() throws IOException, InterruptedException {
+ if (isOnWindows) {
+ return;
+ }
+
+ final File file = getFile("oldgnu_extended_sparse.tar");
+ InputStream sparseFileInputStream = null;
+ TarArchiveInputStream tin = null;
+ try {
+ sparseFileInputStream = extractTarAndGetInputStream(file, "sparse6");
+ tin = new TarArchiveInputStream(new FileInputStream(file));
+ final TarArchiveEntry ae = tin.getNextTarEntry();
+
+ Assert.assertTrue(IOUtils.contentEquals(tin, sparseFileInputStream));
+
+ List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders();
+ assertEquals(7, sparseHeaders.size());
+
+ assertEquals(0, sparseHeaders.get(0).getOffset());
+ assertEquals(1024, sparseHeaders.get(0).getNumbytes());
+
+ assertEquals(10240, sparseHeaders.get(1).getOffset());
+ assertEquals(1024, sparseHeaders.get(1).getNumbytes());
+
+ assertEquals(16384, sparseHeaders.get(2).getOffset());
+ assertEquals(1024, sparseHeaders.get(2).getNumbytes());
+
+ assertEquals(24576, sparseHeaders.get(3).getOffset());
+ assertEquals(1024, sparseHeaders.get(3).getNumbytes());
+
+ assertEquals(29696, sparseHeaders.get(4).getOffset());
+ assertEquals(1024, sparseHeaders.get(4).getNumbytes());
+
+ assertEquals(36864, sparseHeaders.get(5).getOffset());
+ assertEquals(1024, sparseHeaders.get(5).getNumbytes());
+
+ assertEquals(51200, sparseHeaders.get(6).getOffset());
+ assertEquals(0, sparseHeaders.get(6).getNumbytes());
+ } finally {
+ if (sparseFileInputStream != null) {
+ sparseFileInputStream.close();
+ }
+
+ if (tin != null) {
+ tin.close();
+ }
+ }
+ }
+
+ @Test
+ public void testExtractPaxGNU() throws IOException, InterruptedException {
+ if (isOnWindows) {
+ return;
+ }
+
+ final File file = getFile("pax_gnu_sparse.tar");
+ InputStream sparseFileInputStream = null;
+ TarArchiveInputStream tin = null;
+ try {
+ sparseFileInputStream = extractTarAndGetInputStream(file, "sparsefile-0.0");
+ tin = new TarArchiveInputStream(new FileInputStream(file));
+ tin.getNextTarEntry();
+ Assert.assertTrue(IOUtils.contentEquals(tin, sparseFileInputStream));
+
+ // TODO : it's wired that I can only get a 0 size sparsefile-0.1 on my Ubuntu 16.04
+ // using "tar -xf pax_gnu_sparse.tar"
+ sparseFileInputStream = extractTarAndGetInputStream(file, "sparsefile-0.0");
+ tin.getNextTarEntry();
+ Assert.assertTrue(IOUtils.contentEquals(tin, sparseFileInputStream));
+
+ sparseFileInputStream = extractTarAndGetInputStream(file, "sparsefile-1.0");
+ tin.getNextTarEntry();
+ Assert.assertTrue(IOUtils.contentEquals(tin, sparseFileInputStream));
+ } finally {
+ if (sparseFileInputStream != null) {
+ sparseFileInputStream.close();
+ }
+
+ if (tin != null) {
+ tin.close();
+ }
+ }
+ }
+
private void assertPaxGNUEntry(final TarArchiveInputStream tin, final String suffix) throws Throwable {
final TarArchiveEntry ae = tin.getNextTarEntry();
assertEquals("sparsefile-" + suffix, ae.getName());
@@ -70,6 +244,33 @@ public class SparseFilesTest {
assertTrue(ae.isPaxGNUSparse());
assertFalse(ae.isOldGNUSparse());
assertFalse(tin.canReadEntryData(ae));
+
+ List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders();
+ assertEquals(3, sparseHeaders.size());
+
+ assertEquals(0, sparseHeaders.get(0).getOffset());
+ assertEquals(2048, sparseHeaders.get(0).getNumbytes());
+
+ assertEquals(1050624L, sparseHeaders.get(1).getOffset());
+ assertEquals(2560, sparseHeaders.get(1).getNumbytes());
+
+ assertEquals(3101184L, sparseHeaders.get(2).getOffset());
+ assertEquals(0, sparseHeaders.get(2).getNumbytes());
+ }
+
+ private InputStream extractTarAndGetInputStream(File tarFile, String sparseFileName) throws IOException, InterruptedException {
+ Runtime runtime = Runtime.getRuntime();
+ Process process = runtime.exec("tar -xf " + tarFile.getPath() + " -C " + resultDir.getPath());
+ // wait until the extract finishes
+ process.waitFor();
+
+ for (File file : resultDir.listFiles()) {
+ if(file.getName().equals(sparseFileName)) {
+ return new FileInputStream(file);
+ }
+ }
+
+ return null;
}
}
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
index 393c0aa..91b0ef7 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
@@ -381,4 +381,16 @@ public class TarUtilsTest {
}
}
+ @Test
+ public void testParseSparse() {
+ final long expectedOffset = 0100000;
+ final long expectedNumbytes = 0111000;
+ final byte [] buffer = new byte[] {
+ ' ', ' ', ' ', ' ', ' ', '0', '1', '0', '0', '0', '0', '0', // sparseOffset
+ ' ', ' ', ' ', ' ', ' ', '0', '1', '1', '1', '0', '0', '0'};
+ TarArchiveStructSparse sparse = TarUtils.parseSparse(buffer, 0);
+ assertEquals(sparse.getOffset(), expectedOffset);
+ assertEquals(sparse.getNumbytes(), expectedNumbytes);
+ }
+
}
diff --git a/src/test/resources/oldgnu_extended_sparse.tar b/src/test/resources/oldgnu_extended_sparse.tar
new file mode 100644
index 0000000..cba3ebc
Binary files /dev/null and b/src/test/resources/oldgnu_extended_sparse.tar differ