You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2019/12/15 11:10:50 UTC

[commons-compress] branch master updated (dfa9ed3 -> 26c7aff)

This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git.


    from dfa9ed3  COMPRESS-477 mostly cosmetic changes to #84
     new be2d130  COMPRESS-477 support for splitted zip files
     new e61d22e  COMPRESS-477 building a split/spanned zip
     new 95140bd  Merge branch 'master' of https://github.com/apache/commons-compress into COMPRESS-477-constructing
     new 21025c7  COMPRESS-477 set disk number start to long
     new 26c7aff  COMPRESS-477 add javadocs, make ZipSplitOutputStream an implementation detail

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../compress/archivers/zip/ZipArchiveEntry.java    |   1 -
 .../archivers/zip/ZipArchiveOutputStream.java      | 197 +++++++++++++++--
 .../archivers/zip/ZipSplitOutputStream.java        | 243 +++++++++++++++++++++
 .../commons/compress/archivers/ZipTestCase.java    | 135 ++++++++++++
 .../archivers/zip/ZipSplitOutputStreamTest.java    | 104 +++++++++
 5 files changed, 660 insertions(+), 20 deletions(-)
 create mode 100644 src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
 create mode 100644 src/test/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStreamTest.java


[commons-compress] 03/05: Merge branch 'master' of https://github.com/apache/commons-compress into COMPRESS-477-constructing

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit 95140bd9675df0284e97706c1b48d34e339738c9
Merge: e61d22e dfa9ed3
Author: Lee <pe...@gmail.com>
AuthorDate: Mon Dec 9 10:02:20 2019 +0800

    Merge branch 'master' of https://github.com/apache/commons-compress into COMPRESS-477-constructing

 .../compress/archivers/zip/ZipArchiveEntry.java    |   12 +
 .../archivers/zip/ZipArchiveInputStream.java       |   35 +-
 .../commons/compress/archivers/zip/ZipFile.java    |  167 +-
 .../archivers/zip/ZipSplitOutputStream.java        |   12 +-
 .../zip/ZipSplitReadOnlySeekableByteChannel.java   |  250 ++
 .../commons/compress/compressors/FileNameUtil.java |   15 -
 .../commons/compress/utils/FileNameUtils.java      |   78 +
 .../utils/MultiReadOnlySeekableByteChannel.java    |   17 +
 .../archivers/zip/ZipArchiveInputStreamTest.java   |   72 +
 .../compress/archivers/zip/ZipFileTest.java        |   83 +
 .../commons/compress/utils/FileNameUtilsTest.java  |   53 +
 .../MultiReadOnlySeekableByteChannelTest.java      |    8 +-
 .../ZipSplitReadOnlySeekableByteChannelTest.java   |  181 ++
 .../split_zip_created_by_winrar/file_to_compare_1} | 2594 ++++++++++----------
 .../split_zip_created_by_winrar.z01                |  Bin 0 -> 262144 bytes
 .../split_zip_created_by_winrar.z02                |  Bin 0 -> 262144 bytes
 .../split_zip_created_by_winrar.zip                |  Bin 0 -> 50536 bytes
 .../zip_to_compare_created_by_winrar.zip           |  Bin 0 -> 574820 bytes
 .../split_zip_created_by_zip/file_to_compare_1     |   38 +
 .../split_zip_created_by_zip/file_to_compare_2     |   79 +
 .../split_zip_created_by_zip.z01                   |  Bin 0 -> 262144 bytes
 .../split_zip_created_by_zip.z02                   |  Bin 0 -> 262144 bytes
 .../split_zip_created_by_zip.zip                   |  Bin 0 -> 57763 bytes
 .../split_zip_created_by_zip_zip64.z01             |  Bin 0 -> 262144 bytes
 .../split_zip_created_by_zip_zip64.z02             |  Bin 0 -> 262144 bytes
 .../split_zip_created_by_zip_zip64.zip             |  Bin 0 -> 69177 bytes
 .../zip_to_compare_created_by_zip_zip64.zip        |  Bin 0 -> 584681 bytes
 27 files changed, 2347 insertions(+), 1347 deletions(-)

diff --cc src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
index bcf3459,53af93a..47a3edd
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
@@@ -1080,11 -1081,23 +1080,23 @@@ public class ZipArchiveEntry extends ja
          this.commentSource = commentSource;
      }
  
+     /**
+      * The number of the split segment this entry starts at.
+      *
+      * @return the number of the split segment this entry starts at.
+      * @since 1.20
+      */
 -    public long getDiskNumberStart() {
 +    public int getDiskNumberStart() {
          return diskNumberStart;
      }
  
+     /**
+      * The number of the split segment this entry starts at.
+      *
+      * @param diskNumberStart the number of the split segment this entry starts at.
+      * @since 1.20
+      */
 -    public void setDiskNumberStart(long diskNumberStart) {
 +    public void setDiskNumberStart(int diskNumberStart) {
          this.diskNumberStart = diskNumberStart;
      }
  
diff --cc src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
index 152272b,2319456..6f2a16c
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
@@@ -852,6 -860,10 +860,10 @@@ public class ZipFile implements Closeab
              if (hasRelativeHeaderOffset) {
                  ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
              }
+ 
+             if (hasDiskStart) {
 -                ze.setDiskNumberStart(z64.getDiskStartNumber().getValue());
++                ze.setDiskNumberStart(z64.getDiskStartNumber().getIntValue());
+             }
          }
      }
  
diff --cc src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
index 20ed3c1,0000000..d622604
mode 100644,000000..100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
@@@ -1,236 -1,0 +1,238 @@@
 +/*
 + *  Licensed to the Apache Software Foundation (ASF) under one or more
 + *  contributor license agreements.  See the NOTICE file distributed with
 + *  this work for additional information regarding copyright ownership.
 + *  The ASF licenses this file to You under the Apache License, Version 2.0
 + *  (the "License"); you may not use this file except in compliance with
 + *  the License.  You may obtain a copy of the License at
 + *
 + *      http://www.apache.org/licenses/LICENSE-2.0
 + *
 + *  Unless required by applicable law or agreed to in writing, software
 + *  distributed under the License is distributed on an "AS IS" BASIS,
 + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + *  See the License for the specific language governing permissions and
 + *  limitations under the License.
 + *
 + */
 +package org.apache.commons.compress.archivers.zip;
 +
- import org.apache.commons.compress.compressors.FileNameUtil;
++import org.apache.commons.compress.utils.FileNameUtils;
 +
- import java.io.*;
- import java.nio.ByteBuffer;
++import java.io.File;
++import java.io.FileOutputStream;
++import java.io.IOException;
++import java.io.OutputStream;
 +
 +public class ZipSplitOutputStream extends OutputStream {
 +    private OutputStream outputStream;
 +    private File zipFile;
 +    private final long splitSize;
 +    private int currentSplitSegmentIndex = 0;
 +    private long currentSplitSegmentBytesWritten = 0;
 +    private boolean finished = false;
 +    private final byte[] singleByte = new byte[1];
 +
 +    /**
 +     * 8.5.1 Capacities for split archives are as follows:
 +     * <p>
 +     * Maximum number of segments = 4,294,967,295 - 1
 +     * Maximum .ZIP segment size = 4,294,967,295 bytes (refer to section 8.5.6)
 +     * Minimum segment size = 64K
 +     * Maximum PKSFX segment size = 2,147,483,647 bytes
 +     */
 +    private final long ZIP_SEGMENT_MIN_SIZE = 64 * 1024L;
 +    private final long ZIP_SEGMENT_MAX_SIZE = 4294967295L;
 +
 +    /**
 +     * Create a split zip. If the zip file is smaller than the split size,
 +     * then there will only be one split zip, and its suffix is .zip,
 +     * otherwise the split segments should be like .z01, .z02, ... .z(N-1), .zip
 +     *
 +     * @param zipFile   the zip file to write to
 +     * @param splitSize the split size
 +     */
 +    public ZipSplitOutputStream(final File zipFile, final long splitSize) throws IllegalArgumentException, IOException {
 +        if (splitSize < ZIP_SEGMENT_MIN_SIZE || splitSize > ZIP_SEGMENT_MAX_SIZE) {
 +            throw new IllegalArgumentException("zip split segment size should between 64K and 4,294,967,295");
 +        }
 +
 +        this.zipFile = zipFile;
 +        this.splitSize = splitSize;
 +
 +        this.outputStream = new FileOutputStream(zipFile);
 +        // write the zip split signature 0x08074B50 to the zip file
 +        writeZipSplitSignature();
 +    }
 +
 +    /**
 +     * Some data can not be written to different split segments, for example:
 +     * <p>
 +     * 4.4.1.5  The end of central directory record and the Zip64 end
 +     * of central directory locator record MUST reside on the same
 +     * disk when splitting or spanning an archive.
 +     *
 +     * @param unsplittableContentSize
 +     * @throws IllegalArgumentException
 +     * @throws IOException
 +     */
 +    public void prepareToWriteUnsplittableContent(long unsplittableContentSize) throws IllegalArgumentException, IOException {
 +        if (unsplittableContentSize > this.splitSize) {
 +            throw new IllegalArgumentException("The unsplittable content size is bigger than the split segment size");
 +        }
 +
 +        long bytesRemainingInThisSegment = this.splitSize - this.currentSplitSegmentBytesWritten;
 +        if (bytesRemainingInThisSegment < unsplittableContentSize) {
 +            openNewSplitSegment();
 +        }
 +    }
 +
 +    @Override
 +    public void write(int i) throws IOException {
 +        singleByte[0] = (byte)(i & 0xff);
 +        write(singleByte);
 +    }
 +
 +    @Override
 +    public void write(byte[] b) throws IOException {
 +        write(b, 0, b.length);
 +    }
 +
 +    /**
 +     * Write the data to zip split segments, if the remaining space of current split segment
 +     * is not enough, then a new split segment should be created
 +     *
 +     * @param b   data to write
 +     * @param off offset of the start of data in param b
 +     * @param len the length of data to write
 +     * @throws IOException
 +     */
 +    @Override
 +    public void write(byte[] b, int off, int len) throws IOException {
 +        if (len <= 0) {
 +            return;
 +        }
 +
 +        if (currentSplitSegmentBytesWritten >= splitSize) {
 +            openNewSplitSegment();
 +            write(b, off, len);
 +        } else if (currentSplitSegmentBytesWritten + len > splitSize) {
 +            int bytesToWriteForThisSegment = (int) splitSize - (int) currentSplitSegmentBytesWritten;
 +            write(b, off, bytesToWriteForThisSegment);
 +            openNewSplitSegment();
 +            write(b, off + bytesToWriteForThisSegment, len - bytesToWriteForThisSegment);
 +        } else {
 +            outputStream.write(b, off, len);
 +            currentSplitSegmentBytesWritten += len;
 +        }
 +    }
 +
 +    @Override
 +    public void close() throws IOException {
 +        if (!finished) {
 +            finish();
 +        }
 +    }
 +
 +    /**
 +     * The last zip split segment's suffix should be .zip
 +     *
 +     * @throws IOException
 +     */
 +    private void finish() throws IOException {
 +        if (finished) {
 +            throw new IOException("This archive has already been finished");
 +        }
 +
-         String zipFileBaseName = FileNameUtil.getBaseName(zipFile.getName());
++        String zipFileBaseName = FileNameUtils.getBaseName(zipFile.getName());
 +        File lastZipSplitSegmentFile = new File(zipFile.getParentFile(), zipFileBaseName + ".zip");
 +        outputStream.close();
 +        zipFile.renameTo(lastZipSplitSegmentFile);
 +        finished = true;
 +    }
 +
 +    /**
 +     * Create a new zip split segment and prepare to write to the new segment
 +     *
 +     * @return
 +     * @throws IOException
 +     */
 +    private OutputStream openNewSplitSegment() throws IOException {
 +        File newFile;
 +        if (currentSplitSegmentIndex == 0) {
 +            outputStream.close();
 +            newFile = createNewSplitSegmentFile(1);
 +            zipFile.renameTo(newFile);
 +        }
 +
 +        newFile = createNewSplitSegmentFile(null);
 +
 +
 +        OutputStream newFileOutputStream = new FileOutputStream(newFile);
 +        outputStream.close();
 +        outputStream = newFileOutputStream;
 +        currentSplitSegmentBytesWritten = 0;
 +        zipFile = newFile;
 +        currentSplitSegmentIndex++;
 +
 +        return newFileOutputStream;
 +    }
 +
 +    /**
 +     * Write the zip split signature (0x08074B50) to the head of the first zip split segment
 +     *
 +     * @throws IOException
 +     */
 +    private void writeZipSplitSignature() throws IOException {
 +        outputStream.write(ZipArchiveOutputStream.DD_SIG);
 +        currentSplitSegmentBytesWritten += ZipArchiveOutputStream.DD_SIG.length;
 +    }
 +
 +    /**
 +     * Create the new zip split segment, the last zip segment should be .zip, and the zip split segments' suffix should be
 +     * like .z01, .z02, .z03, ... .z99, .z100, ..., .z(N-1), .zip
 +     * <p>
 +     * 8.3.3 Split ZIP files are typically written to the same location
 +     * and are subject to name collisions if the spanned name
 +     * format is used since each segment will reside on the same
 +     * drive. To avoid name collisions, split archives are named
 +     * as follows.
 +     * <p>
 +     * Segment 1   = filename.z01
 +     * Segment n-1 = filename.z(n-1)
 +     * Segment n   = filename.zip
 +     * <p>
 +     * NOTE:
 +     * The zip split segment begin from 1,2,3,... , and we're creating a new segment,
 +     * so the new segment suffix should be (currentSplitSegmentIndex + 2)
 +     *
 +     * @param zipSplitSegmentSuffixIndex
 +     * @return
 +     * @throws IOException
 +     */
 +    private File createNewSplitSegmentFile(Integer zipSplitSegmentSuffixIndex) throws IOException {
 +        int newZipSplitSegmentSuffixIndex = zipSplitSegmentSuffixIndex == null ? (currentSplitSegmentIndex + 2) : zipSplitSegmentSuffixIndex;
-         String baseName = FileNameUtil.getBaseName(zipFile.getName());
++        String baseName = FileNameUtils.getBaseName(zipFile.getName());
 +        String extension = ".z";
 +        if (newZipSplitSegmentSuffixIndex <= 9) {
 +            extension += "0" + newZipSplitSegmentSuffixIndex;
 +        } else {
 +            extension += newZipSplitSegmentSuffixIndex;
 +        }
 +
 +        File newFile = new File(zipFile.getParent(), baseName + extension);
 +
 +        if (newFile.exists()) {
 +            throw new IOException("split zip segment " + baseName + extension + " already exists");
 +        }
 +        return newFile;
 +    }
 +
 +    public int getCurrentSplitSegmentIndex() {
 +        return currentSplitSegmentIndex;
 +    }
 +
 +    public long getCurrentSplitSegmentBytesWritten() {
 +        return currentSplitSegmentBytesWritten;
 +    }
 +}


[commons-compress] 01/05: COMPRESS-477 support for splitted zip files

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit be2d130862163cfa8597744140830c3f7bb3a5ed
Author: Lee <pe...@gmail.com>
AuthorDate: Thu Nov 7 14:48:03 2019 +0800

    COMPRESS-477 support for splitted zip files
    
    add support for constructing splitted zip files
---
 .../compress/archivers/zip/ZipArchiveEntry.java    |  10 +-
 .../archivers/zip/ZipArchiveOutputStream.java      | 168 ++++++++++++++--
 .../archivers/zip/ZipSplitOutputStream.java        | 219 +++++++++++++++++++++
 .../commons/compress/compressors/FileNameUtil.java |  15 ++
 4 files changed, 393 insertions(+), 19 deletions(-)

diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
index 30f8479..bcf3459 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
@@ -144,7 +144,7 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry
     private boolean isStreamContiguous = false;
     private NameSource nameSource = NameSource.NAME;
     private CommentSource commentSource = CommentSource.COMMENT;
-
+    private int diskNumberStart;
 
     /**
      * Creates a new zip entry with the specified name.
@@ -1080,6 +1080,14 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry
         this.commentSource = commentSource;
     }
 
+    public int getDiskNumberStart() {
+        return diskNumberStart;
+    }
+
+    public void setDiskNumberStart(int diskNumberStart) {
+        this.diskNumberStart = diskNumberStart;
+    }
+
     private ZipExtraField[] copyOf(final ZipExtraField[] src, final int length) {
         final ZipExtraField[] cpy = new ZipExtraField[length];
         System.arraycopy(src, 0, cpy, 0, Math.min(src.length, length));
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
index c40aab2..962e60b 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
@@ -190,6 +190,16 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
     private long cdLength = 0;
 
     /**
+     * Disk number start of central directory.
+     */
+    private long cdDiskNumberStart = 0;
+
+    /**
+     * Length of end of central directory
+     */
+    private long eocdLength = 0;
+
+    /**
      * Helper, a 0 as ZipShort.
      */
     private static final byte[] ZERO = {0, 0};
@@ -267,6 +277,17 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
     private final Calendar calendarInstance = Calendar.getInstance();
 
     /**
+     * Whether we are creating a split zip
+     */
+    private boolean isSplitZip = false;
+
+    /**
+     * Holds the number of Central Directories on each disk, this is used
+     * when writing Zip64 End Of Central Directory and End Of Central Directory
+     */
+    private final Map<Integer, Integer> numberOfCDInDiskData = new HashMap<>();
+
+    /**
      * Creates a new ZIP OutputStream filtering the underlying stream.
      * @param out the outputstream to zip
      */
@@ -306,6 +327,14 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
         streamCompressor = _streamCompressor;
     }
 
+    public ZipArchiveOutputStream(final File file, final long zipSplitSize) throws IOException {
+        def = new Deflater(level, true);
+        this.out = new ZipSplitOutputStream(file, zipSplitSize);
+        streamCompressor = StreamCompressor.create(this.out, def);
+        isSplitZip = true;
+        channel = null;
+    }
+
     /**
      * Creates a new ZIP OutputStream writing to a SeekableByteChannel.
      *
@@ -467,15 +496,41 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
             throw new IOException("This archive contains unclosed entries.");
         }
 
-        cdOffset = streamCompressor.getTotalBytesWritten();
+        long cdOverallOffset = streamCompressor.getTotalBytesWritten();
+        cdOffset = cdOverallOffset;
+        if (isSplitZip) {
+            // when creating a split zip, the offset should be
+            // the offset to the corresponding segment disk
+            ZipSplitOutputStream zipSplitOutputStream = (ZipSplitOutputStream)this.out;
+            cdOffset = zipSplitOutputStream.getCurrentSplitSegmentBytesWritten();
+            cdDiskNumberStart = zipSplitOutputStream.getCurrentSplitSegmentIndex();
+        }
         writeCentralDirectoryInChunks();
 
-        cdLength = streamCompressor.getTotalBytesWritten() - cdOffset;
+        cdLength = streamCompressor.getTotalBytesWritten() - cdOverallOffset;
+
+        // calculate the length of end of central directory, as it may be used in writeZip64CentralDirectory
+        final ByteBuffer commentData = this.zipEncoding.encode(comment);
+        final int commentLength = commentData.limit() - commentData.position();
+        eocdLength = WORD /* length of EOCD_SIG */
+                + SHORT /* number of this disk */
+                + SHORT /* disk number of start of central directory */
+                + SHORT /* total number of entries on this disk */
+                + SHORT /* total number of entries */
+                + WORD  /* size of central directory */
+                + WORD  /* offset of start of central directory */
+                + SHORT /* zip comment length */
+                + commentLength /* zip comment */;
+
         writeZip64CentralDirectory();
         writeCentralDirectoryEnd();
         metaData.clear();
         entries.clear();
         streamCompressor.close();
+        if (isSplitZip) {
+            // trigger the ZipSplitOutputStream to write the final split segment
+            out.close();
+        }
         finished = true;
     }
 
@@ -1036,7 +1091,15 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
             addUnicodeExtraFields(ze, encodable, name);
         }
 
-        final long localHeaderStart = streamCompressor.getTotalBytesWritten();
+        long localHeaderStart = streamCompressor.getTotalBytesWritten();
+        if (isSplitZip) {
+            // when creating a split zip, the offset should be
+            // the offset to the corresponding segment disk
+            ZipSplitOutputStream splitOutputStream = (ZipSplitOutputStream)this.out;
+            ze.setDiskNumberStart(splitOutputStream.getCurrentSplitSegmentIndex());
+            localHeaderStart = splitOutputStream.getCurrentSplitSegmentBytesWritten();
+        }
+
         final byte[] localHeader = createLocalFileHeader(ze, name, encodable, phased, localHeaderStart);
         metaData.put(ze, new EntryMetaData(localHeaderStart, usesDataDescriptor(ze.getMethod(), phased)));
         entry.localDataStart = localHeaderStart + LFH_CRC_OFFSET; // At crc offset
@@ -1235,6 +1298,16 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
     private byte[] createCentralFileHeader(final ZipArchiveEntry ze, final ByteBuffer name,
                                            final EntryMetaData entryMetaData,
                                            final boolean needsZip64Extra) throws IOException {
+        if(isSplitZip) {
+            int currentSplitSegment = ((ZipSplitOutputStream)this.out).getCurrentSplitSegmentIndex();
+            if(numberOfCDInDiskData.get(currentSplitSegment) == null) {
+                numberOfCDInDiskData.put(currentSplitSegment, 1);
+            } else {
+                int originalNumberOfCD = numberOfCDInDiskData.get(currentSplitSegment);
+                numberOfCDInDiskData.put(currentSplitSegment, originalNumberOfCD + 1);
+            }
+        }
+
         final byte[] extra = ze.getCentralDirectoryExtra();
 
         // file comment length
@@ -1291,7 +1364,11 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
         putShort(commentLen, buf, CFH_COMMENT_LENGTH_OFFSET);
 
         // disk number start
-        System.arraycopy(ZERO, 0, buf, CFH_DISK_NUMBER_OFFSET, SHORT);
+        if(isSplitZip) {
+            putShort(ze.getDiskNumberStart(), buf, CFH_DISK_NUMBER_OFFSET);
+        } else {
+            System.arraycopy(ZERO, 0, buf, CFH_DISK_NUMBER_OFFSET, SHORT);
+        }
 
         // internal file attributes
         putShort(ze.getInternalAttributes(), buf, CFH_INTERNAL_ATTRIBUTES_OFFSET);
@@ -1352,11 +1429,21 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
      * and {@link Zip64Mode #setUseZip64} is {@link Zip64Mode#Never}.
      */
     protected void writeCentralDirectoryEnd() throws IOException {
+        if(!hasUsedZip64) {
+            ((ZipSplitOutputStream)this.out).prepareToWriteUnsplittableContent(eocdLength);
+        }
+
         writeCounted(EOCD_SIG);
 
-        // disk numbers
-        writeCounted(ZERO);
-        writeCounted(ZERO);
+        // number of this disk
+        int numberOfThisDisk = 0;
+        if(isSplitZip) {
+            numberOfThisDisk = ((ZipSplitOutputStream)this.out).getCurrentSplitSegmentIndex();
+        }
+        writeCounted(ZipShort.getBytes(numberOfThisDisk));
+
+        // disk number of the start of central directory
+        writeCounted(ZipShort.getBytes((int)cdDiskNumberStart));
 
         // number of entries
         final int numberOfEntries = entries.size();
@@ -1370,9 +1457,15 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
                                              .ARCHIVE_TOO_BIG_MESSAGE);
         }
 
+        // total number of entries in the central directory on this disk
+        int numOfEntriesOnThisDisk = numberOfCDInDiskData.get(numberOfThisDisk) == null ? 0 : numberOfCDInDiskData.get(numberOfThisDisk);
+        final byte[] numOfEntriesOnThisDiskData = ZipShort
+                .getBytes(Math.min(numOfEntriesOnThisDisk, ZIP64_MAGIC_SHORT));
+        writeCounted(numOfEntriesOnThisDiskData);
+
+        // number of entries
         final byte[] num = ZipShort.getBytes(Math.min(numberOfEntries,
-                                                ZIP64_MAGIC_SHORT));
-        writeCounted(num);
+                ZIP64_MAGIC_SHORT));
         writeCounted(num);
 
         // length and location of CD
@@ -1408,11 +1501,20 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
             return;
         }
 
-        final long offset = streamCompressor.getTotalBytesWritten();
+        long offset = streamCompressor.getTotalBytesWritten();
+        long diskNumberStart = 0L;
+        if(isSplitZip) {
+            // when creating a split zip, the offset of should be
+            // the offset to the corresponding segment disk
+            ZipSplitOutputStream zipSplitOutputStream = (ZipSplitOutputStream)this.out;
+            offset = zipSplitOutputStream.getCurrentSplitSegmentBytesWritten();
+            diskNumberStart = zipSplitOutputStream.getCurrentSplitSegmentIndex();
+        }
+
 
         writeOut(ZIP64_EOCD_SIG);
-        // size, we don't have any variable length as we don't support
-        // the extensible data sector, yet
+        // size of zip64 end of central directory, we don't have any variable length
+        // as we don't support the extensible data sector, yet
         writeOut(ZipEightByteInteger
                  .getBytes(SHORT   /* version made by */
                            + SHORT /* version needed to extract */
@@ -1428,14 +1530,25 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
         writeOut(ZipShort.getBytes(ZIP64_MIN_VERSION));
         writeOut(ZipShort.getBytes(ZIP64_MIN_VERSION));
 
-        // disk numbers - four bytes this time
-        writeOut(LZERO);
-        writeOut(LZERO);
+        // number of this disk
+        long numberOfThisDisk = 0;
+        if (isSplitZip) {
+            numberOfThisDisk = ((ZipSplitOutputStream)this.out).getCurrentSplitSegmentIndex();
+        }
+        writeOut(ZipLong.getBytes(numberOfThisDisk));
+
+        // disk number of the start of central directory
+        writeOut(ZipLong.getBytes(cdDiskNumberStart));
+
+        // total number of entries in the central directory on this disk
+        int numOfEntriesOnThisDisk = numberOfCDInDiskData.get(numberOfThisDisk) == null ? 0 : numberOfCDInDiskData.get(numberOfThisDisk);
+        final byte[] numOfEntriesOnThisDiskData = ZipEightByteInteger
+                .getBytes(Math.min(numOfEntriesOnThisDisk, ZIP64_MAGIC_SHORT));
+        writeOut(numOfEntriesOnThisDiskData);
 
         // number of entries
         final byte[] num = ZipEightByteInteger.getBytes(entries.size());
         writeOut(num);
-        writeOut(num);
 
         // length and location of CD
         writeOut(ZipEightByteInteger.getBytes(cdLength));
@@ -1443,15 +1556,34 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
 
         // no "zip64 extensible data sector" for now
 
+        if(isSplitZip) {
+            // based on the zip specification, the End Of Central Directory record and
+            // the Zip64 End Of Central Directory locator record must be on the same segment
+            long zip64EOCDLOCLength = WORD  /* length of ZIP64_EOCD_LOC_SIG */
+                    + WORD  /* disk number of ZIP64_EOCD_SIG */
+                    + DWORD /* offset of ZIP64_EOCD_SIG */
+                    + WORD  /* total number of disks */;
+
+            long unsplittableContentSize = zip64EOCDLOCLength + eocdLength;
+            ((ZipSplitOutputStream)this.out).prepareToWriteUnsplittableContent(unsplittableContentSize);
+        }
+
         // and now the "ZIP64 end of central directory locator"
         writeOut(ZIP64_EOCD_LOC_SIG);
 
         // disk number holding the ZIP64 EOCD record
-        writeOut(LZERO);
+        writeOut(ZipLong.getBytes(diskNumberStart));
         // relative offset of ZIP64 EOCD record
         writeOut(ZipEightByteInteger.getBytes(offset));
         // total number of disks
-        writeOut(ONE);
+        if(isSplitZip) {
+            // the Zip64 End Of Central Directory Locator and the End Of Central Directory must be
+            // in the same split disk, it means they must be located in the last disk
+            long totalNumberOfDisks = ((ZipSplitOutputStream)this.out).getCurrentSplitSegmentIndex() + 1;
+            writeOut(ZipLong.getBytes(totalNumberOfDisks));
+        } else {
+            writeOut(ONE);
+        }
     }
 
     /**
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
new file mode 100644
index 0000000..80a896d
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
@@ -0,0 +1,219 @@
+package org.apache.commons.compress.archivers.zip;
+
+import org.apache.commons.compress.compressors.FileNameUtil;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+
+public class ZipSplitOutputStream extends OutputStream {
+    private OutputStream outputStream;
+    private File zipFile;
+    private long splitSize;
+    private int currentSplitSegmentIndex = 0;
+    private long currentSplitSegmentBytesWritten = 0;
+    private boolean finished = false;
+
+    /**
+     * 8.5.1 Capacities for split archives are as follows:
+     * <p>
+     * Maximum number of segments = 4,294,967,295 - 1
+     * Maximum .ZIP segment size = 4,294,967,295 bytes (refer to section 8.5.6)
+     * Minimum segment size = 64K
+     * Maximum PKSFX segment size = 2,147,483,647 bytes
+     */
+    private final long ZIP_SEGMENT_MIN_SIZE = 64 * 1024L;
+    private final long ZIP_SEGMENT_MAX_SIZE = 4294967295L;
+
+    /**
+     * Create a split zip. If the zip file is smaller than the split size,
+     * then there will only be one split zip, and its suffix is .zip,
+     * otherwise the split segments should be like .z01, .z02, ... .z(N-1), .zip
+     *
+     * @param zipFile   the zip file to write to
+     * @param splitSize the split size
+     */
+    public ZipSplitOutputStream(final File zipFile, final long splitSize) throws IllegalArgumentException, IOException {
+        if (splitSize < ZIP_SEGMENT_MIN_SIZE || splitSize > ZIP_SEGMENT_MAX_SIZE) {
+            throw new IllegalArgumentException("zip split segment size should between 64K and 4,294,967,295");
+        }
+
+        this.zipFile = zipFile;
+        this.splitSize = splitSize;
+
+        this.outputStream = new FileOutputStream(zipFile);
+        // write the zip split signature 0x08074B50 to the zip file
+        writeZipSplitSignature();
+    }
+
+    /**
+     * Some data can not be written to different split segments, for example:
+     * <p>
+     * 4.4.1.5  The end of central directory record and the Zip64 end
+     * of central directory locator record MUST reside on the same
+     * disk when splitting or spanning an archive.
+     *
+     * @param unsplittableContentSize
+     * @throws IllegalArgumentException
+     * @throws IOException
+     */
+    public void prepareToWriteUnsplittableContent(long unsplittableContentSize) throws IllegalArgumentException, IOException {
+        if (unsplittableContentSize > this.splitSize) {
+            throw new IllegalArgumentException("The unsplittable content size is bigger than the split segment size");
+        }
+
+        long bytesRemainingInThisSegment = this.splitSize - this.currentSplitSegmentBytesWritten;
+        if (bytesRemainingInThisSegment < unsplittableContentSize) {
+            openNewSplitSegment();
+        }
+    }
+
+    @Override
+    public void write(int i) throws IOException {
+        byte[] b = ByteBuffer.allocate(4).putInt(i).array();
+        write(b);
+    }
+
+    @Override
+    public void write(byte[] b) throws IOException {
+        write(b, 0, b.length);
+    }
+
+    /**
+     * Write the data to zip split segments, if the remaining space of current split segment
+     * is not enough, then a new split segment should be created
+     *
+     * @param b   data to write
+     * @param off offset of the start of data in param b
+     * @param len the length of data to write
+     * @throws IOException
+     */
+    @Override
+    public void write(byte[] b, int off, int len) throws IOException {
+        if (len <= 0) {
+            return;
+        }
+
+        if (currentSplitSegmentBytesWritten >= splitSize) {
+            openNewSplitSegment();
+            write(b, off, len);
+        } else if (currentSplitSegmentBytesWritten + len > splitSize) {
+            int bytesToWriteForThisSegment = (int) splitSize - (int) currentSplitSegmentBytesWritten;
+            write(b, off, bytesToWriteForThisSegment);
+            openNewSplitSegment();
+            write(b, off + bytesToWriteForThisSegment, len - bytesToWriteForThisSegment);
+        } else {
+            outputStream.write(b, off, len);
+            currentSplitSegmentBytesWritten += len;
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (!finished) {
+            finish();
+        }
+    }
+
+    /**
+     * The last zip split segment's suffix should be .zip
+     *
+     * @throws IOException
+     */
+    private void finish() throws IOException {
+        if (finished) {
+            throw new IOException("This archive has already been finished");
+        }
+
+        String zipFileBaseName = FileNameUtil.getBaseName(zipFile.getName());
+        File lastZipSplitSegmentFile = new File(zipFile.getParentFile(), zipFileBaseName + ".zip");
+        outputStream.close();
+        zipFile.renameTo(lastZipSplitSegmentFile);
+        finished = true;
+    }
+
+    /**
+     * Create a new zip split segment and prepare to write to the new segment
+     *
+     * @return
+     * @throws IOException
+     */
+    private OutputStream openNewSplitSegment() throws IOException {
+        File newFile;
+        if (currentSplitSegmentIndex == 0) {
+            outputStream.close();
+            newFile = createNewSplitSegmentFile(1);
+            zipFile.renameTo(newFile);
+        }
+
+        newFile = createNewSplitSegmentFile(null);
+
+
+        OutputStream newFileOutputStream = new FileOutputStream(newFile);
+        outputStream.close();
+        outputStream = newFileOutputStream;
+        currentSplitSegmentBytesWritten = 0;
+        zipFile = newFile;
+        currentSplitSegmentIndex++;
+
+        return newFileOutputStream;
+    }
+
+    /**
+     * Write the zip split signature (0x08074B50) to the head of the first zip split segment
+     *
+     * @throws IOException
+     */
+    private void writeZipSplitSignature() throws IOException {
+        outputStream.write(ZipArchiveOutputStream.DD_SIG);
+        currentSplitSegmentBytesWritten += ZipArchiveOutputStream.DD_SIG.length;
+    }
+
+    /**
+     * Create the new zip split segment, the last zip segment should be .zip, and the zip split segments' suffix should be
+     * like .z01, .z02, .z03, ... .z99, .z100, ..., .z(N-1), .zip
+     * <p>
+     * 8.3.3 Split ZIP files are typically written to the same location
+     * and are subject to name collisions if the spanned name
+     * format is used since each segment will reside on the same
+     * drive. To avoid name collisions, split archives are named
+     * as follows.
+     * <p>
+     * Segment 1   = filename.z01
+     * Segment n-1 = filename.z(n-1)
+     * Segment n   = filename.zip
+     * <p>
+     * NOTE:
+     * The zip split segment begin from 1,2,3,... , and we're creating a new segment,
+     * so the new segment suffix should be (currentSplitSegmentIndex + 2)
+     *
+     * @param zipSplitSegmentSuffixIndex
+     * @return
+     * @throws IOException
+     */
+    private File createNewSplitSegmentFile(Integer zipSplitSegmentSuffixIndex) throws IOException {
+        int newZipSplitSegmentSuffixIndex = zipSplitSegmentSuffixIndex == null ? (currentSplitSegmentIndex + 2) : zipSplitSegmentSuffixIndex;
+        String baseName = FileNameUtil.getBaseName(zipFile.getName());
+        String extension = ".z";
+        if (newZipSplitSegmentSuffixIndex <= 9) {
+            extension += "0" + newZipSplitSegmentSuffixIndex;
+        } else {
+            extension += newZipSplitSegmentSuffixIndex;
+        }
+
+        String newFileName = zipFile.getParent() + File.separatorChar + baseName + extension;
+        File newFile = new File(newFileName);
+
+        if (newFile.exists()) {
+            throw new IOException("split zip segment " + newFileName + " already exists");
+        }
+        return newFile;
+    }
+
+    public int getCurrentSplitSegmentIndex() {
+        return currentSplitSegmentIndex;
+    }
+
+    public long getCurrentSplitSegmentBytesWritten() {
+        return currentSplitSegmentBytesWritten;
+    }
+}
diff --git a/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
index cc69031..570b12e 100644
--- a/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
+++ b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
@@ -193,4 +193,19 @@ public class FileNameUtil {
         return fileName + defaultExtension;
     }
 
+    public static String getBaseName(String filename) {
+        if (filename == null) {
+            return null;
+        }
+
+        int lastSeparatorPosition = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
+        String name = filename.substring(lastSeparatorPosition + 1);
+
+        int extensionPosition = name.lastIndexOf('.');
+        if(extensionPosition < 0) {
+            return name;
+        }
+
+        return name.substring(0, extensionPosition);
+    }
 }


[commons-compress] 02/05: COMPRESS-477 building a split/spanned zip

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit e61d22ee3549e9ac7379114d9e0a40d68b5dc600
Author: Lee <pe...@gmail.com>
AuthorDate: Tue Nov 12 09:20:10 2019 +0800

    COMPRESS-477 building a split/spanned zip
    
    add testcases for building a split/spanned zip, as the PR of extracting a split zip is not merged yet,
    there're no other ways to test in code if my constructed zip is valid.
    
    I will add this part of testcases as soon as the PR of extracting split zip is merged.
---
 .../archivers/zip/ZipArchiveOutputStream.java      |   4 +-
 .../archivers/zip/ZipSplitOutputStream.java        |  29 ++++--
 .../commons/compress/compressors/FileNameUtil.java |   4 +-
 .../commons/compress/archivers/ZipTestCase.java    |  64 +++++++++++++
 .../archivers/zip/ZipSplitOutputStreamTest.java    | 104 +++++++++++++++++++++
 .../zip_to_compare_created_by_zip.zip              | Bin 0 -> 582047 bytes
 6 files changed, 196 insertions(+), 9 deletions(-)

diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
index 962e60b..ebd9c75 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
@@ -1299,6 +1299,8 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
                                            final EntryMetaData entryMetaData,
                                            final boolean needsZip64Extra) throws IOException {
         if(isSplitZip) {
+            // calculate the disk number for every central file header,
+            // this will be used in writing End Of Central Directory and Zip64 End Of Central Directory
             int currentSplitSegment = ((ZipSplitOutputStream)this.out).getCurrentSplitSegmentIndex();
             if(numberOfCDInDiskData.get(currentSplitSegment) == null) {
                 numberOfCDInDiskData.put(currentSplitSegment, 1);
@@ -1429,7 +1431,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
      * and {@link Zip64Mode #setUseZip64} is {@link Zip64Mode#Never}.
      */
     protected void writeCentralDirectoryEnd() throws IOException {
-        if(!hasUsedZip64) {
+        if(!hasUsedZip64 && isSplitZip) {
             ((ZipSplitOutputStream)this.out).prepareToWriteUnsplittableContent(eocdLength);
         }
 
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
index 80a896d..20ed3c1 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
@@ -1,3 +1,20 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
 package org.apache.commons.compress.archivers.zip;
 
 import org.apache.commons.compress.compressors.FileNameUtil;
@@ -8,10 +25,11 @@ import java.nio.ByteBuffer;
 public class ZipSplitOutputStream extends OutputStream {
     private OutputStream outputStream;
     private File zipFile;
-    private long splitSize;
+    private final long splitSize;
     private int currentSplitSegmentIndex = 0;
     private long currentSplitSegmentBytesWritten = 0;
     private boolean finished = false;
+    private final byte[] singleByte = new byte[1];
 
     /**
      * 8.5.1 Capacities for split archives are as follows:
@@ -69,8 +87,8 @@ public class ZipSplitOutputStream extends OutputStream {
 
     @Override
     public void write(int i) throws IOException {
-        byte[] b = ByteBuffer.allocate(4).putInt(i).array();
-        write(b);
+        singleByte[0] = (byte)(i & 0xff);
+        write(singleByte);
     }
 
     @Override
@@ -200,11 +218,10 @@ public class ZipSplitOutputStream extends OutputStream {
             extension += newZipSplitSegmentSuffixIndex;
         }
 
-        String newFileName = zipFile.getParent() + File.separatorChar + baseName + extension;
-        File newFile = new File(newFileName);
+        File newFile = new File(zipFile.getParent(), baseName + extension);
 
         if (newFile.exists()) {
-            throw new IOException("split zip segment " + newFileName + " already exists");
+            throw new IOException("split zip segment " + baseName + extension + " already exists");
         }
         return newFile;
     }
diff --git a/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
index 570b12e..6f7f557 100644
--- a/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
+++ b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
@@ -18,6 +18,7 @@
  */
 package org.apache.commons.compress.compressors;
 
+import java.io.File;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
@@ -198,8 +199,7 @@ public class FileNameUtil {
             return null;
         }
 
-        int lastSeparatorPosition = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\'));
-        String name = filename.substring(lastSeparatorPosition + 1);
+        String name = new File(filename).getName();
 
         int extensionPosition = name.lastIndexOf('.');
         if(extensionPosition < 0) {
diff --git a/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java b/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java
index 6083b41..438c527 100644
--- a/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java
+++ b/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java
@@ -647,6 +647,18 @@ public final class ZipTestCase extends AbstractTestCase {
         testInputStreamStatistics("COMPRESS-380/COMPRESS-380.zip", expected);
     }
 
+    @Test
+    public void buildSplitZipTest() throws IOException {
+        File directoryToZip = getFilesToZip();
+        File outputZipFile = new File(dir, "splitZip.zip");
+        long splitSize = 100 * 1024L; /* 100 KB */
+        final ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputZipFile, splitSize);
+
+        addFilesToZip(zipArchiveOutputStream, directoryToZip);
+        zipArchiveOutputStream.close();
+        // TODO: validate the created zip files when extracting split zip is merged into master
+    }
+
     private void testInputStreamStatistics(String fileName, Map<String, List<Long>> expectedStatistics)
         throws IOException, ArchiveException {
         final File input = getFile(fileName);
@@ -701,4 +713,56 @@ public final class ZipTestCase extends AbstractTestCase {
         final long b = stats.getCompressedCount();
         l.add(Arrays.asList(t, b));
     }
+
+    private File getFilesToZip() throws IOException {
+        File originalZipFile = getFile("COMPRESS-477/split_zip_created_by_zip/zip_to_compare_created_by_zip.zip");
+        ZipFile zipFile = new ZipFile(originalZipFile);
+        Enumeration<ZipArchiveEntry> zipEntries = zipFile.getEntries();
+        ZipArchiveEntry zipEntry;
+        File outputFile;
+        InputStream inputStream;
+        OutputStream outputStream;
+        byte[] buffer;
+        int readLen;
+
+        while (zipEntries.hasMoreElements()) {
+            zipEntry = zipEntries.nextElement();
+            if (zipEntry.isDirectory()) {
+                continue;
+            }
+
+            outputFile = new File(dir, zipEntry.getName());
+            if (!outputFile.getParentFile().exists()) {
+                outputFile.getParentFile().mkdirs();
+            }
+            outputFile = new File(dir, zipEntry.getName());
+
+            inputStream = zipFile.getInputStream(zipEntry);
+            outputStream = new FileOutputStream(outputFile);
+            buffer = new byte[(int)zipEntry.getSize()];
+            while((readLen = inputStream.read(buffer)) > 0) {
+                outputStream.write(buffer, 0, readLen);
+            }
+
+            inputStream.close();
+            outputStream.close();
+        }
+
+        return dir.listFiles()[0];
+    }
+
+    private void addFilesToZip(ZipArchiveOutputStream zipArchiveOutputStream, File fileToAdd) throws IOException {
+        if(fileToAdd.isDirectory()) {
+            for(File file : fileToAdd.listFiles()) {
+                addFilesToZip(zipArchiveOutputStream, file);
+            }
+        } else {
+            ZipArchiveEntry zipArchiveEntry = new ZipArchiveEntry(fileToAdd.getPath());
+            zipArchiveEntry.setMethod(ZipEntry.DEFLATED);
+
+            zipArchiveOutputStream.putArchiveEntry(zipArchiveEntry);
+            IOUtils.copy(new FileInputStream(fileToAdd), zipArchiveOutputStream);
+            zipArchiveOutputStream.closeArchiveEntry();
+        }
+    }
 }
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStreamTest.java
new file mode 100644
index 0000000..4889e58
--- /dev/null
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStreamTest.java
@@ -0,0 +1,104 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.zip;
+
+import org.apache.commons.compress.AbstractTestCase;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+public class ZipSplitOutputStreamTest extends AbstractTestCase {
+    @Rule
+    public ExpectedException thrown = ExpectedException.none();
+
+    @Test
+    public void throwsExceptionIfSplitSizeIsTooSmall() throws IOException {
+        thrown.expect(IllegalArgumentException.class);
+        new ZipSplitOutputStream(File.createTempFile("temp", "zip"), (64 * 1024 - 1));
+    }
+
+    @Test
+    public void throwsExceptionIfSplitSizeIsTooLarge() throws IOException {
+        thrown.expect(IllegalArgumentException.class);
+        new ZipSplitOutputStream(File.createTempFile("temp", "zip"), (4 * 1024 * 1024 * 1024L));
+    }
+
+    @Test
+    public void throwsIfUnsplittableSizeLargerThanSplitSize() throws IOException {
+        thrown.expect(IllegalArgumentException.class);
+        long splitSize = 100 * 1024;
+        ZipSplitOutputStream output = new ZipSplitOutputStream(File.createTempFile("temp", "zip"), splitSize);
+        output.prepareToWriteUnsplittableContent(splitSize + 1);
+    }
+
+    @Test
+    public void splitZipBeginsWithZipSplitSignature() throws IOException {
+        File tempFile = File.createTempFile("temp", "zip");
+        new ZipSplitOutputStream(tempFile, 100 * 1024L);
+
+        InputStream inputStream = new FileInputStream(tempFile);
+        byte[] buffer = new byte[4];
+        inputStream.read(buffer);
+
+        Assert.assertEquals(ByteBuffer.wrap(ZipArchiveOutputStream.DD_SIG).getInt(), ByteBuffer.wrap(buffer).getInt());
+    }
+
+    @Test
+    public void testCreateSplittedFiles() throws IOException {
+        File testOutputFile = new File(dir, "testCreateSplittedFiles.zip");
+        int splitSize = 100 * 1024; /* 100KB */
+        ZipSplitOutputStream zipSplitOutputStream = new ZipSplitOutputStream(testOutputFile, splitSize);
+
+        File fileToTest = getFile("COMPRESS-477/split_zip_created_by_zip/zip_to_compare_created_by_zip.zip");
+        InputStream inputStream = new FileInputStream(fileToTest);
+        byte[] buffer = new byte[4096];
+        int readLen;
+
+        while ((readLen = inputStream.read(buffer)) > 0) {
+            zipSplitOutputStream.write(buffer, 0, readLen);
+        }
+
+        inputStream.close();
+        zipSplitOutputStream.close();
+
+        File zipFile = new File(dir.getPath(), "testCreateSplittedFiles.z01");
+        Assert.assertEquals(zipFile.length(), splitSize);
+
+        zipFile = new File(dir.getPath(), "testCreateSplittedFiles.z02");
+        Assert.assertEquals(zipFile.length(), splitSize);
+
+        zipFile = new File(dir.getPath(), "testCreateSplittedFiles.z03");
+        Assert.assertEquals(zipFile.length(), splitSize);
+
+        zipFile = new File(dir.getPath(), "testCreateSplittedFiles.z04");
+        Assert.assertEquals(zipFile.length(), splitSize);
+
+        zipFile = new File(dir.getPath(), "testCreateSplittedFiles.z05");
+        Assert.assertEquals(zipFile.length(), splitSize);
+
+        zipFile = new File(dir.getPath(), "testCreateSplittedFiles.zip");
+        Assert.assertEquals(zipFile.length(), (fileToTest.length() + 4 - splitSize * 5));
+    }
+}
diff --git a/src/test/resources/COMPRESS-477/split_zip_created_by_zip/zip_to_compare_created_by_zip.zip b/src/test/resources/COMPRESS-477/split_zip_created_by_zip/zip_to_compare_created_by_zip.zip
new file mode 100644
index 0000000..b7326ec
Binary files /dev/null and b/src/test/resources/COMPRESS-477/split_zip_created_by_zip/zip_to_compare_created_by_zip.zip differ


[commons-compress] 05/05: COMPRESS-477 add javadocs, make ZipSplitOutputStream an implementation detail

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit 26c7aff58aa4cdf4943cf3ddb744f680a1fbe007
Author: Stefan Bodewig <bo...@apache.org>
AuthorDate: Sun Dec 15 12:09:54 2019 +0100

    COMPRESS-477 add javadocs, make ZipSplitOutputStream an implementation detail
    
    see #86
---
 .../archivers/zip/ZipArchiveOutputStream.java      | 31 +++++++++++++++++++---
 .../archivers/zip/ZipSplitOutputStream.java        |  7 ++++-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
index 481f2f8..345a5ad 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
@@ -61,7 +61,8 @@ import static org.apache.commons.compress.archivers.zip.ZipShort.putShort;
  *
  * <p>This class will try to use {@link
  * java.nio.channels.SeekableByteChannel} when it knows that the
- * output is going to go to a file.</p>
+ * output is going to go to a file and no split archive shall be
+ * created.</p>
  *
  * <p>If SeekableByteChannel cannot be used, this implementation will use
  * a Data Descriptor to store size and CRC information for {@link
@@ -279,7 +280,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
     /**
      * Whether we are creating a split zip
      */
-    private boolean isSplitZip = false;
+    private final boolean isSplitZip;
 
     /**
      * Holds the number of Central Directories on each disk, this is used
@@ -296,6 +297,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
         this.channel = null;
         def = new Deflater(level, true);
         streamCompressor = StreamCompressor.create(out, def);
+        isSplitZip = false;
     }
 
     /**
@@ -325,14 +327,36 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
         out = o;
         channel = _channel;
         streamCompressor = _streamCompressor;
+        isSplitZip = false;
     }
 
+    /**
+     * Creates a split ZIP Archive.
+     *
+     * <p>The files making up the archive will use Z01, Z02,
+     * ... extensions and the last part of it will be the given {@code
+     * file}.</p>
+     *
+     * <p>Even though the stream writes to a file this stream will
+     * behave as if no random access was possible. This means the
+     * sizes of stored entries need to be known before the actual
+     * entry data is written.</p>
+     *
+     * @param file the file that will become the last part of the split archive
+     * @param zipSplitSize maximum size of a single part of the split
+     * archive created by this stream. Must be between 64kB and about
+     * 4GB.
+     *
+     * @throws IOException on error
+     * @throws IllegalArgumentException if zipSplitSize is not in the required range
+     * @since 1.20
+     */
     public ZipArchiveOutputStream(final File file, final long zipSplitSize) throws IOException {
         def = new Deflater(level, true);
         this.out = new ZipSplitOutputStream(file, zipSplitSize);
         streamCompressor = StreamCompressor.create(this.out, def);
-        isSplitZip = true;
         channel = null;
+        isSplitZip = true;
     }
 
     /**
@@ -352,6 +376,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
         def = new Deflater(level, true);
         streamCompressor = StreamCompressor.create(channel, def);
         out = null;
+        isSplitZip = false;
     }
 
     /**
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
index d622604..0bed063 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java
@@ -24,7 +24,12 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 
-public class ZipSplitOutputStream extends OutputStream {
+/**
+ * Used internally by {@link ZipArchiveOutputStream} when creating a split archive.
+ *
+ * @since 1.20
+ */
+class ZipSplitOutputStream extends OutputStream {
     private OutputStream outputStream;
     private File zipFile;
     private final long splitSize;


[commons-compress] 04/05: COMPRESS-477 set disk number start to long

Posted by bo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit 21025c71938d91bcaa1d55529087c0a082d22324
Author: Lee <pe...@gmail.com>
AuthorDate: Mon Dec 9 21:18:46 2019 +0800

    COMPRESS-477 set disk number start to long
    
    Basing on bodewig's rebased branch, disk number start should be a long variable.
---
 .../compress/archivers/zip/ZipArchiveEntry.java    |  6 +-
 .../archivers/zip/ZipArchiveOutputStream.java      |  2 +-
 .../commons/compress/archivers/zip/ZipFile.java    |  2 +-
 .../commons/compress/compressors/FileNameUtil.java |  1 -
 .../commons/compress/archivers/ZipTestCase.java    | 77 +++++++++++++++++++++-
 5 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
index 47a3edd..ac9c5e7 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveEntry.java
@@ -144,7 +144,7 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry
     private boolean isStreamContiguous = false;
     private NameSource nameSource = NameSource.NAME;
     private CommentSource commentSource = CommentSource.COMMENT;
-    private int diskNumberStart;
+    private long diskNumberStart;
 
     /**
      * Creates a new zip entry with the specified name.
@@ -1086,7 +1086,7 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry
      * @return the number of the split segment this entry starts at.
      * @since 1.20
      */
-    public int getDiskNumberStart() {
+    public long getDiskNumberStart() {
         return diskNumberStart;
     }
 
@@ -1096,7 +1096,7 @@ public class ZipArchiveEntry extends java.util.zip.ZipEntry
      * @param diskNumberStart the number of the split segment this entry starts at.
      * @since 1.20
      */
-    public void setDiskNumberStart(int diskNumberStart) {
+    public void setDiskNumberStart(long diskNumberStart) {
         this.diskNumberStart = diskNumberStart;
     }
 
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
index ebd9c75..481f2f8 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
@@ -1367,7 +1367,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream {
 
         // disk number start
         if(isSplitZip) {
-            putShort(ze.getDiskNumberStart(), buf, CFH_DISK_NUMBER_OFFSET);
+            putShort((int) ze.getDiskNumberStart(), buf, CFH_DISK_NUMBER_OFFSET);
         } else {
             System.arraycopy(ZERO, 0, buf, CFH_DISK_NUMBER_OFFSET, SHORT);
         }
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
index 6f2a16c..2319456 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
@@ -862,7 +862,7 @@ public class ZipFile implements Closeable {
             }
 
             if (hasDiskStart) {
-                ze.setDiskNumberStart(z64.getDiskStartNumber().getIntValue());
+                ze.setDiskNumberStart(z64.getDiskStartNumber().getValue());
             }
         }
     }
diff --git a/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
index af50dfc..ed94885 100644
--- a/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
+++ b/src/main/java/org/apache/commons/compress/compressors/FileNameUtil.java
@@ -18,7 +18,6 @@
  */
 package org.apache.commons.compress.compressors;
 
-import java.io.File;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
diff --git a/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java b/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java
index 438c527..ba5e114 100644
--- a/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java
+++ b/src/test/java/org/apache/commons/compress/archivers/ZipTestCase.java
@@ -27,6 +27,9 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Enumeration;
@@ -44,6 +47,7 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
 import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.commons.compress.archivers.zip.ZipMethod;
+import org.apache.commons.compress.archivers.zip.ZipSplitReadOnlySeekableByteChannel;
 import org.apache.commons.compress.utils.IOUtils;
 import org.apache.commons.compress.utils.InputStreamStatistics;
 import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
@@ -647,16 +651,59 @@ public final class ZipTestCase extends AbstractTestCase {
         testInputStreamStatistics("COMPRESS-380/COMPRESS-380.zip", expected);
     }
 
-    @Test
-    public void buildSplitZipTest() throws IOException {
+    @Test(expected = IllegalArgumentException.class)
+    public void buildSplitZipWithTooSmallSizeThrowsException() throws IOException {
+        new ZipArchiveOutputStream(File.createTempFile("temp", "zip"), 64 * 1024 - 1);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void buildSplitZipWithTooLargeSizeThrowsException() throws IOException {
+        new ZipArchiveOutputStream(File.createTempFile("temp", "zip"), 4294967295L + 1);
+    }
+
+    @Test(expected = IOException.class)
+    public void buildSplitZipWithSegmentAlreadyExistThrowsException() throws IOException {
         File directoryToZip = getFilesToZip();
         File outputZipFile = new File(dir, "splitZip.zip");
         long splitSize = 100 * 1024L; /* 100 KB */
         final ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputZipFile, splitSize);
 
+        // create a file that has the same name of one of the created split segments
+        File sameNameFile = new File(dir, "splitZip.z01");
+        sameNameFile.createNewFile();
+
         addFilesToZip(zipArchiveOutputStream, directoryToZip);
         zipArchiveOutputStream.close();
-        // TODO: validate the created zip files when extracting split zip is merged into master
+    }
+
+    @Test
+    public void buildSplitZipTest() throws IOException {
+        File directoryToZip = getFilesToZip();
+        createTestSplitZipSegments();
+
+        File lastFile = new File(dir, "splitZip.zip");
+        SeekableByteChannel channel = ZipSplitReadOnlySeekableByteChannel.buildFromLastSplitSegment(lastFile);
+        InputStream inputStream = Channels.newInputStream(channel);
+        ZipArchiveInputStream splitInputStream = new ZipArchiveInputStream(inputStream, StandardCharsets.UTF_8.toString(), true, false, true);
+
+        ArchiveEntry entry;
+        File fileToCompare;
+        InputStream inputStreamToCompare;
+        int filesNum = countNonDirectories(directoryToZip);
+        int filesCount = 0;
+        while((entry = splitInputStream.getNextEntry()) != null) {
+            if(entry.isDirectory()) {
+                continue;
+            }
+            // compare all files one by one
+            fileToCompare = new File(entry.getName());
+            inputStreamToCompare = new FileInputStream(fileToCompare);
+            Assert.assertTrue(shaded.org.apache.commons.io.IOUtils.contentEquals(splitInputStream, inputStreamToCompare));
+            inputStreamToCompare.close();
+            filesCount++;
+        }
+        // and the number of files should equal
+        assertEquals(filesCount, filesNum);
     }
 
     private void testInputStreamStatistics(String fileName, Map<String, List<Long>> expectedStatistics)
@@ -751,6 +798,17 @@ public final class ZipTestCase extends AbstractTestCase {
         return dir.listFiles()[0];
     }
 
+    private ZipArchiveOutputStream createTestSplitZipSegments() throws IOException {
+        File directoryToZip = getFilesToZip();
+        File outputZipFile = new File(dir, "splitZip.zip");
+        long splitSize = 100 * 1024L; /* 100 KB */
+        final ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputZipFile, splitSize);
+
+        addFilesToZip(zipArchiveOutputStream, directoryToZip);
+        zipArchiveOutputStream.close();
+        return zipArchiveOutputStream;
+    }
+
     private void addFilesToZip(ZipArchiveOutputStream zipArchiveOutputStream, File fileToAdd) throws IOException {
         if(fileToAdd.isDirectory()) {
             for(File file : fileToAdd.listFiles()) {
@@ -765,4 +823,17 @@ public final class ZipTestCase extends AbstractTestCase {
             zipArchiveOutputStream.closeArchiveEntry();
         }
     }
+
+    private int countNonDirectories(File file) {
+        if(!file.isDirectory()) {
+            return 1;
+        }
+
+        int result = 0;
+        for (File fileInDirectory : file.listFiles()) {
+            result += countNonDirectories(fileInDirectory);
+        }
+
+        return result;
+    }
 }