You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2019/11/02 16:21:42 UTC
[commons-compress] 01/03: COMPRESS-497 Handle missing endheader
offset
This is an automated email from the ASF dual-hosted git repository.
bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit 1cc808076dbe3047cdc4ed30655241d0e0e87c86
Author: Stefan Schlott <st...@ploing.de>
AuthorDate: Mon Oct 28 20:38:17 2019 +0100
COMPRESS-497 Handle missing endheader offset
---
.../compress/archivers/sevenz/SevenZFile.java | 76 +++++++++++++++++++--
.../compress/archivers/sevenz/SevenZFileTest.java | 5 ++
src/test/resources/bla.noendheaderoffset.7z | Bin 0 -> 512 bytes
3 files changed, 75 insertions(+), 6 deletions(-)
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
index d32536e..f85d202 100644
--- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
@@ -441,17 +441,81 @@ public class SevenZFile implements Closeable {
archiveVersionMajor, archiveVersionMinor));
}
+ boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive"
final long startHeaderCrc = 0xffffFFFFL & buf.getInt();
- final StartHeader startHeader = readStartHeader(startHeaderCrc);
+ if (startHeaderCrc == 0) {
+ // This is an indication of a corrupt header - peek the next 20 bytes
+ long currentPosition = channel.position();
+ ByteBuffer peekBuf = ByteBuffer.allocate(20);
+ readFully(peekBuf);
+ channel.position(currentPosition);
+ // Header invalid if all data is 0
+ while (peekBuf.hasRemaining()) {
+ if (peekBuf.get()!=0) {
+ headerLooksValid = true;
+ break;
+ }
+ }
+ } else {
+ headerLooksValid = true;
+ }
+
+ if (headerLooksValid) {
+ final StartHeader startHeader = readStartHeader(startHeaderCrc);
+ return initializeArchive(startHeader, password, true);
+ } else {
+ // No valid header found - probably first file of multipart archive was removed too early. Scan for end header.
+ ByteBuffer nidBuf = ByteBuffer.allocate(1);
+ final long searchLimit = 1024 * 1024 * 1;
+ final long previousDataSize = channel.position() + 20; // Main header, plus bytes that readStartHeader would read
+ final long minPos;
+ // Determine minimal position - can't start before current position
+ if (channel.position() + searchLimit > channel.size()) {
+ minPos = channel.position();
+ } else {
+ minPos = channel.size() - searchLimit;
+ }
+ long pos = channel.size() - 1;
+ // Loop: Try from end of archive
+ while (pos > minPos) {
+ pos--;
+ channel.position(pos);
+ nidBuf.rewind();
+ channel.read(nidBuf);
+ int nid = nidBuf.array()[0];
+ // First indicator: Byte equals one of these header identifiers
+ if ((nid == NID.kEncodedHeader) || (nid == NID.kHeader)) {
+ try {
+ // Try to initialize Archive structure from here
+ final StartHeader startHeader = new StartHeader();
+ startHeader.nextHeaderOffset = pos - previousDataSize;
+ startHeader.nextHeaderSize = channel.size() - pos;
+ Archive result = initializeArchive(startHeader, password, false);
+ // Sanity check: There must be some data...
+ if (result.packSizes!=null && result.files.length>0) {
+ return result;
+ }
+ } catch (Exception ignore) {
+ // Wrong guess...
+ }
+ }
+ }
+ throw new IOException("Start header corrupt and unable to guess end header");
+ }
+ }
+
+ private Archive initializeArchive(StartHeader startHeader, final byte[] password, boolean verifyCrc) throws IOException {
assertFitsIntoInt("nextHeaderSize", startHeader.nextHeaderSize);
final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize;
channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset);
- buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN);
+ ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN);
readFully(buf);
- final CRC32 crc = new CRC32();
- crc.update(buf.array());
- if (startHeader.nextHeaderCrc != crc.getValue()) {
- throw new IOException("NextHeader CRC mismatch");
+ if (verifyCrc) {
+ final CRC32 crc = new CRC32();
+ crc.update(buf.array());
+ if (startHeader.nextHeaderCrc != crc.getValue()) {
+ throw new IOException("NextHeader CRC mismatch");
+ }
}
Archive archive = new Archive();
diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
index db4fe1d..8e904bc 100644
--- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
@@ -395,6 +395,11 @@ public class SevenZFileTest extends AbstractTestCase {
}
}
+ @Test
+ public void test7zUnarchiveWithDefectHeader() throws Exception {
+ test7zUnarchive(getFile("bla.noendheaderoffset.7z"), SevenZMethod.LZMA);
+ }
+
private void test7zUnarchive(final File f, final SevenZMethod m, final byte[] password) throws Exception {
try (SevenZFile sevenZFile = new SevenZFile(f, password)) {
test7zUnarchive(sevenZFile, m);
diff --git a/src/test/resources/bla.noendheaderoffset.7z b/src/test/resources/bla.noendheaderoffset.7z
new file mode 100644
index 0000000..7ddb9e9
Binary files /dev/null and b/src/test/resources/bla.noendheaderoffset.7z differ