You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2019/11/02 16:21:42 UTC

[commons-compress] 01/03: COMPRESS-497 Handle missing endheader offset

This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit 1cc808076dbe3047cdc4ed30655241d0e0e87c86
Author: Stefan Schlott <st...@ploing.de>
AuthorDate: Mon Oct 28 20:38:17 2019 +0100

    COMPRESS-497 Handle missing endheader offset
---
 .../compress/archivers/sevenz/SevenZFile.java      |  76 +++++++++++++++++++--
 .../compress/archivers/sevenz/SevenZFileTest.java  |   5 ++
 src/test/resources/bla.noendheaderoffset.7z        | Bin 0 -> 512 bytes
 3 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
index d32536e..f85d202 100644
--- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
@@ -441,17 +441,81 @@ public class SevenZFile implements Closeable {
                     archiveVersionMajor, archiveVersionMinor));
         }
 
+        boolean headerLooksValid = false;  // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive"
         final long startHeaderCrc = 0xffffFFFFL & buf.getInt();
-        final StartHeader startHeader = readStartHeader(startHeaderCrc);
+        if (startHeaderCrc == 0) {
+            // This is an indication of a corrupt header - peek the next 20 bytes
+            long currentPosition = channel.position();
+            ByteBuffer peekBuf = ByteBuffer.allocate(20);
+            readFully(peekBuf);
+            channel.position(currentPosition);
+            // Header invalid if all data is 0
+            while (peekBuf.hasRemaining()) {
+                if (peekBuf.get()!=0) {
+                    headerLooksValid = true;
+                    break;
+                }
+            }
+        } else {
+            headerLooksValid = true;
+        }
+
+        if (headerLooksValid) {
+            final StartHeader startHeader = readStartHeader(startHeaderCrc);
+            return initializeArchive(startHeader, password, true);
+        } else {
+            // No valid header found - probably first file of multipart archive was removed too early. Scan for end header.
+            ByteBuffer nidBuf = ByteBuffer.allocate(1);
+            final long searchLimit = 1024 * 1024 * 1;
+            final long previousDataSize = channel.position() + 20;  // Main header, plus bytes that readStartHeader would read
+            final long minPos;
+            // Determine minimal position - can't start before current position
+            if (channel.position() + searchLimit > channel.size()) {
+                minPos = channel.position();
+            } else {
+                minPos = channel.size() - searchLimit;
+            }
+            long pos = channel.size() - 1;
+            // Loop: Try from end of archive
+            while (pos > minPos) {
+                pos--;
+                channel.position(pos);
+                nidBuf.rewind();
+                channel.read(nidBuf);
+                int nid = nidBuf.array()[0];
+                // First indicator: Byte equals one of these header identifiers
+                if ((nid == NID.kEncodedHeader) || (nid == NID.kHeader)) {
+                    try {
+                        // Try to initialize Archive structure from here
+                        final StartHeader startHeader = new StartHeader();
+                        startHeader.nextHeaderOffset = pos - previousDataSize;
+                        startHeader.nextHeaderSize = channel.size() - pos;
+                        Archive result = initializeArchive(startHeader, password, false);
+                        // Sanity check: There must be some data...
+                        if (result.packSizes!=null && result.files.length>0) {
+                            return result;
+                        }
+                    } catch (Exception ignore) {
+                        // Wrong guess...
+                    }
+                }
+            }
+            throw new IOException("Start header corrupt and unable to guess end header");
+        }
+    }
+
+    private Archive initializeArchive(StartHeader startHeader, final byte[] password, boolean verifyCrc) throws IOException {
         assertFitsIntoInt("nextHeaderSize", startHeader.nextHeaderSize);
         final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize;
         channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset);
-        buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN);
+        ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN);
         readFully(buf);
-        final CRC32 crc = new CRC32();
-        crc.update(buf.array());
-        if (startHeader.nextHeaderCrc != crc.getValue()) {
-            throw new IOException("NextHeader CRC mismatch");
+        if (verifyCrc) {
+            final CRC32 crc = new CRC32();
+            crc.update(buf.array());
+            if (startHeader.nextHeaderCrc != crc.getValue()) {
+                throw new IOException("NextHeader CRC mismatch");
+            }
         }
 
         Archive archive = new Archive();
diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
index db4fe1d..8e904bc 100644
--- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
@@ -395,6 +395,11 @@ public class SevenZFileTest extends AbstractTestCase {
         }
     }
 
+    @Test
+    public void test7zUnarchiveWithDefectHeader() throws Exception {
+        test7zUnarchive(getFile("bla.noendheaderoffset.7z"), SevenZMethod.LZMA);
+    }
+
     private void test7zUnarchive(final File f, final SevenZMethod m, final byte[] password) throws Exception {
         try (SevenZFile sevenZFile = new SevenZFile(f, password)) {
             test7zUnarchive(sevenZFile, m);
diff --git a/src/test/resources/bla.noendheaderoffset.7z b/src/test/resources/bla.noendheaderoffset.7z
new file mode 100644
index 0000000..7ddb9e9
Binary files /dev/null and b/src/test/resources/bla.noendheaderoffset.7z differ