You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2013/08/08 17:57:55 UTC

svn commit: r1511843 - in /commons/proper/compress/trunk/src: changes/ main/java/org/apache/commons/compress/archivers/tar/ main/java/org/apache/commons/compress/utils/ test/java/org/apache/commons/compress/archivers/tar/

Author: bodewig
Date: Thu Aug  8 15:57:55 2013
New Revision: 1511843

URL: http://svn.apache.org/r1511843
Log:
COMPRESS-234 read/skip performance improvements to TarArchiveInputStream - patch by BELUGA BEHR

Removed:
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarBuffer.java
Modified:
    commons/proper/compress/trunk/src/changes/changes.xml
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java
    commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
    commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java

Modified: commons/proper/compress/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/changes/changes.xml (original)
+++ commons/proper/compress/trunk/src/changes/changes.xml Thu Aug  8 15:57:55 2013
@@ -80,9 +80,10 @@ The <action> type attribute can be add,u
               due-to="BELUGA BEHR">
         Readabilty patch to TarArchiveInputStream.
       </action>
-      <action type="update" date="2013-07-08" issue="COMPRESS-233"
+      <action type="update" date="2013-07-08" issue="COMPRESS-234"
               due-to="BELUGA BEHR">
-        Performance and readability patch to TarBuffer.
+        Performance improvements to TarArchiveInputStream, in
+        particular to the skip method.
       </action>
     </release>
     <release version="1.5" date="2013-03-14"

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java Thu Aug  8 15:57:55 2013
@@ -36,6 +36,7 @@ import org.apache.commons.compress.archi
 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
 import org.apache.commons.compress.utils.ArchiveUtils;
 import org.apache.commons.compress.utils.CharsetNames;
+import org.apache.commons.compress.utils.IOUtils;
 
 /**
  * The TarInputStream reads a UNIX tar archive as an InputStream.
@@ -45,18 +46,33 @@ import org.apache.commons.compress.utils
  * @NotThreadSafe
  */
 public class TarArchiveInputStream extends ArchiveInputStream {
+
     private static final int SMALL_BUFFER_SIZE = 256;
-    private static final int BUFFER_SIZE = 8 * 1024;
 
-    private final byte[] SKIP_BUF = new byte[BUFFER_SIZE];
     private final byte[] SMALL_BUF = new byte[SMALL_BUFFER_SIZE];
 
+    /** The size the TAR header */
+    private final int recordSize;
+
+    /** The size of a block */
+    private final int blockSize;
+
+    /** True if file has hit EOF */
     private boolean hasHitEOF;
+
+    /** Size of the current entry */
     private long entrySize;
+
+    /** How far into the entry the stream is at */
     private long entryOffset;
-    private byte[] readBuf;
-    protected final TarBuffer buffer;
+
+    /** An input stream to read from */
+    private final InputStream is;
+
+    /** The meta-data about the current entry */
     private TarArchiveEntry currEntry;
+
+    /** The encoding of the file */
     private final ZipEncoding encoding;
 
     /**
@@ -64,7 +80,7 @@ public class TarArchiveInputStream exten
      * @param is the input stream to use
      */
     public TarArchiveInputStream(InputStream is) {
-        this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
+        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
     }
 
     /**
@@ -74,7 +90,8 @@ public class TarArchiveInputStream exten
      * @since 1.4
      */
     public TarArchiveInputStream(InputStream is, String encoding) {
-        this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
+        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE,
+             encoding);
     }
 
     /**
@@ -83,7 +100,7 @@ public class TarArchiveInputStream exten
      * @param blockSize the block size to use
      */
     public TarArchiveInputStream(InputStream is, int blockSize) {
-        this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
+        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE);
     }
 
     /**
@@ -95,7 +112,7 @@ public class TarArchiveInputStream exten
      */
     public TarArchiveInputStream(InputStream is, int blockSize,
                                  String encoding) {
-        this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
+        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
     }
 
     /**
@@ -105,7 +122,7 @@ public class TarArchiveInputStream exten
      * @param recordSize the record size to use
      */
     public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
-        this(is, blockSize, recordSize, null);
+        this(is, blockSize, recordSize, null);      
     }
 
     /**
@@ -118,10 +135,11 @@ public class TarArchiveInputStream exten
      */
     public TarArchiveInputStream(InputStream is, int blockSize, int recordSize,
                                  String encoding) {
-        this.buffer = new TarBuffer(is, blockSize, recordSize);
-        this.readBuf = null;
+        this.is = is;
         this.hasHitEOF = false;
         this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
+        this.recordSize = recordSize;
+        this.blockSize = blockSize;
     }
 
     /**
@@ -130,16 +148,16 @@ public class TarArchiveInputStream exten
      */
     @Override
     public void close() throws IOException {
-        buffer.close();
+        is.close();
     }
 
     /**
-     * Get the record size being used by this stream's TarBuffer.
+     * Get the record size being used by this stream's buffer.
      *
      * @return The TarBuffer record size.
      */
     public int getRecordSize() {
-        return buffer.getRecordSize();
+        return recordSize;
     }
 
     /**
@@ -174,21 +192,14 @@ public class TarArchiveInputStream exten
      */
     @Override
     public long skip(long numToSkip) throws IOException {
-        // REVIEW
-        // This is horribly inefficient, but it ensures that we
-        // properly skip over bytes via the TarBuffer...
-        //
-        long skip = numToSkip;
-        while (skip > 0) {
-            int realSkip = (int) (skip > SKIP_BUF.length
-                                  ? SKIP_BUF.length : skip);
-            int numRead = read(SKIP_BUF, 0, realSkip);
-            if (numRead == -1) {
-                break;
-            }
-            skip -= numRead;
-        }
-        return (numToSkip - skip);
+
+        long available = (entrySize - entryOffset);
+        numToSkip = Math.min(numToSkip, available);
+
+        long skipped = IOUtils.skip(is, numToSkip); 
+        count(skipped);
+        entryOffset += skipped;
+        return skipped;
     }
 
     /**
@@ -217,18 +228,11 @@ public class TarArchiveInputStream exten
         }
 
         if (currEntry != null) {
-            long numToSkip = entrySize - entryOffset;
+            /* Skip will only go to the end of the current entry */
+            skip(Long.MAX_VALUE);
 
-            while (numToSkip > 0) {
-                long skipped = skip(numToSkip);
-                if (skipped <= 0) {
-                    throw new RuntimeException("failed to skip current tar"
-                                               + " entry");
-                }
-                numToSkip -= skipped;
-            }
-
-            readBuf = null;
+            /* skip to the end of the last record */
+            skipRecordPadding();
         }
 
         byte[] headerBuf = getRecord();
@@ -246,6 +250,7 @@ public class TarArchiveInputStream exten
             ioe.initCause(e);
             throw ioe;
         }
+
         entryOffset = 0;
         entrySize = currEntry.getSize();
 
@@ -284,8 +289,22 @@ public class TarArchiveInputStream exten
         // information, we update entrySize here so that it contains
         // the correct value.
         entrySize = currEntry.getSize();
+
         return currEntry;
     }
+    
+    /**
+     * The last record block should be written at the full size, so skip any
+     * additional space used to fill a record after an entry
+     */
+    private void skipRecordPadding() throws IOException {
+        if (this.entrySize > 0 && this.entrySize % this.recordSize != 0) {
+            long numRecords = (this.entrySize / this.recordSize) + 1;
+            long padding = (numRecords * this.recordSize) - this.entrySize;
+            long skipped = IOUtils.skip(is, padding);
+            count(skipped);
+        }
+    }
 
     /**
      * Get the next entry in this tar archive as longname data.
@@ -335,19 +354,46 @@ public class TarArchiveInputStream exten
      * @throws IOException on error
      */
     private byte[] getRecord() throws IOException {
-        byte[] headerBuf = null;
-        if (!hasHitEOF) {
-            headerBuf = buffer.readRecord();
-            hasHitEOF = buffer.isEOFRecord(headerBuf);
-            if (hasHitEOF && headerBuf != null) {
-                buffer.tryToConsumeSecondEOFRecord();
-                headerBuf = null;
-            }
+        byte[] headerBuf = readRecord();
+        hasHitEOF = isEOFRecord(headerBuf);
+        if (hasHitEOF && headerBuf != null) {
+            tryToConsumeSecondEOFRecord();
+            consumeRemainderOfLastBlock();
+            headerBuf = null;
         }
-
         return headerBuf;
     }
 
+    /**
+     * Determine if an archive record indicate End of Archive. End of
+     * archive is indicated by a record that consists entirely of null bytes.
+     *
+     * @param record The record data to check.
+     * @return true if the record data is an End of Archive
+     */
+    protected boolean isEOFRecord(byte[] record) {
+        return record == null || ArchiveUtils.isArrayZero(record, recordSize);
+    }
+    
+    /**
+     * Read a record from the input stream and return the data.
+     *
+     * @return The record data or null if EOF has been hit.
+     * @throws IOException on error
+     */
+    protected byte[] readRecord() throws IOException {
+
+        byte[] record = new byte[recordSize];
+
+        int readNow = is.read(record);
+        count(readNow);
+        if (readNow != recordSize) {
+            return null;
+        }
+
+        return record;
+    }
+
     private void paxHeaders() throws IOException{
         Map<String, String> headers = parsePaxHeaders(this);
         getNextEntry(); // Get the actual file entry
@@ -468,10 +514,43 @@ public class TarArchiveInputStream exten
         }
     }
 
+    /**
+     * Returns the next Archive Entry in this Stream.
+     *
+     * @return the next entry,
+     *         or {@code null} if there are no more entries
+     * @throws IOException if the next entry could not be read
+     */
     @Override
     public ArchiveEntry getNextEntry() throws IOException {
         return getNextTarEntry();
     }
+    
+    /**
+     * Tries to read the next record rewinding the stream if it is not a EOF record.
+     *
+     * <p>This is meant to protect against cases where a tar
+     * implementation has written only one EOF record when two are
+     * expected.  Actually this won't help since a non-conforming
+     * implementation likely won't fill full blocks consisting of - by
+     * default - ten records either so we probably have already read
+     * beyond the archive anyway.</p>
+     */
+    private void tryToConsumeSecondEOFRecord() throws IOException {
+        boolean shouldReset = true;
+        boolean marked = is.markSupported();
+        if (marked) {
+            is.mark(recordSize);
+        }
+        try {
+            shouldReset = !isEOFRecord(readRecord());
+        } finally {
+            if (shouldReset && marked) {
+                pushedBackBytes(recordSize);
+            	is.reset();
+            }
+        }
+    }
 
     /**
      * Reads bytes from the current tar archive entry.
@@ -488,69 +567,23 @@ public class TarArchiveInputStream exten
      */
     @Override
     public int read(byte[] buf, int offset, int numToRead) throws IOException {
-        int totalRead = 0;
+    	int totalRead = 0;
 
-        if (entryOffset >= entrySize) {
+        if (hasHitEOF || entryOffset >= entrySize) {
             return -1;
         }
 
-        if ((numToRead + entryOffset) > entrySize) {
-            numToRead = (int) (entrySize - entryOffset);
-        }
-
-        if (readBuf != null) {
-            int sz = (numToRead > readBuf.length) ? readBuf.length
-                : numToRead;
-
-            System.arraycopy(readBuf, 0, buf, offset, sz);
-
-            if (sz >= readBuf.length) {
-                readBuf = null;
-            } else {
-                int newLen = readBuf.length - sz;
-                byte[] newBuf = new byte[newLen];
-
-                System.arraycopy(readBuf, sz, newBuf, 0, newLen);
-
-                readBuf = newBuf;
-            }
-
-            totalRead += sz;
-            numToRead -= sz;
-            offset += sz;
-        }
-
-        while (numToRead > 0) {
-            byte[] rec = buffer.readRecord();
-
-            if (rec == null) {
-                // Unexpected EOF!
-                throw new IOException("unexpected EOF with " + numToRead
-                                      + " bytes unread. Occured at byte: " + getBytesRead());
-            }
-            count(rec.length);
-            int sz = numToRead;
-            int recLen = rec.length;
-
-            if (recLen > sz) {
-                System.arraycopy(rec, 0, buf, offset, sz);
-
-                readBuf = new byte[recLen - sz];
-
-                System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
-            } else {
-                sz = recLen;
-
-                System.arraycopy(rec, 0, buf, offset, recLen);
-            }
-
-            totalRead += sz;
-            numToRead -= sz;
-            offset += sz;
+        numToRead = Math.min(numToRead, available());
+        
+        totalRead = is.read(buf, offset, numToRead);
+        count(totalRead);
+        
+        if (totalRead == -1) {
+            hasHitEOF = true;
+        } else {
+            entryOffset += (long) totalRead;
         }
 
-        entryOffset += totalRead;
-
         return totalRead;
     }
 
@@ -568,7 +601,12 @@ public class TarArchiveInputStream exten
         return false;
     }
 
-    protected final TarArchiveEntry getCurrentEntry() {
+    /**
+     * Get the current TAR Archive Entry that this input stream is processing
+     * 
+     * @return The current Archive Entry
+     */
+    public ArchiveEntry getCurrentEntry() {
         return currEntry;
     }
 
@@ -585,6 +623,19 @@ public class TarArchiveInputStream exten
     }
 
     /**
+     * This method is invoked once the end of the archive is hit, it
+     * tries to consume the remaining bytes under the assumption that
+     * the tool creating this archive has padded the last block.
+     */
+    private void consumeRemainderOfLastBlock() throws IOException {
+        long bytesReadOfLastBlock = getBytesRead() % blockSize;
+        if (bytesReadOfLastBlock > 0) {
+            long skipped = IOUtils.skip(is, blockSize - bytesReadOfLastBlock);
+            count(skipped);
+        }
+    }
+
+    /**
      * Checks if the signature matches what is expected for a tar file.
      *
      * @param signature

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java Thu Aug  8 15:57:55 2013
@@ -67,9 +67,11 @@ public class TarArchiveOutputStream exte
     private final byte[]    recordBuf;
     private int       assemLen;
     private final byte[]    assemBuf;
-    protected final TarBuffer buffer;
     private int       longFileMode = LONGFILE_ERROR;
     private int       bigNumberMode = BIGNUMBER_ERROR;
+    private int recordsWritten;
+    private final int recordsPerBlock;
+    private final int recordSize;
 
     private boolean closed = false;
 
@@ -92,7 +94,7 @@ public class TarArchiveOutputStream exte
      * @param os the output stream to use
      */
     public TarArchiveOutputStream(OutputStream os) {
-        this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
+        this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
     }
 
     /**
@@ -102,7 +104,7 @@ public class TarArchiveOutputStream exte
      * @since 1.4
      */
     public TarArchiveOutputStream(OutputStream os, String encoding) {
-        this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
+        this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding);
     }
 
     /**
@@ -111,7 +113,7 @@ public class TarArchiveOutputStream exte
      * @param blockSize the block size to use
      */
     public TarArchiveOutputStream(OutputStream os, int blockSize) {
-        this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE);
+        this(os, blockSize, TarConstants.DEFAULT_RCDSIZE);
     }
 
     /**
@@ -123,7 +125,7 @@ public class TarArchiveOutputStream exte
      */
     public TarArchiveOutputStream(OutputStream os, int blockSize,
                                   String encoding) {
-        this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
+        this(os, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
     }
 
     /**
@@ -149,10 +151,11 @@ public class TarArchiveOutputStream exte
         out = new CountingOutputStream(os);
         this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
 
-        this.buffer = new TarBuffer(out, blockSize, recordSize);
         this.assemLen = 0;
         this.assemBuf = new byte[recordSize];
         this.recordBuf = new byte[recordSize];
+        this.recordSize = recordSize;
+        this.recordsPerBlock = blockSize / recordSize;
     }
 
     /**
@@ -217,7 +220,8 @@ public class TarArchiveOutputStream exte
         }
         writeEOFRecord();
         writeEOFRecord();
-        buffer.flushBlock();
+        padAsNeeded();
+        out.flush();
         finished = true;
     }
 
@@ -227,12 +231,11 @@ public class TarArchiveOutputStream exte
      */
     @Override
     public void close() throws IOException {
-        if(!finished) {
+        if (!finished) {
             finish();
         }
 
         if (!closed) {
-            buffer.close();
             out.close();
             closed = true;
         }
@@ -244,7 +247,7 @@ public class TarArchiveOutputStream exte
      * @return The TarBuffer record size.
      */
     public int getRecordSize() {
-        return buffer.getRecordSize();
+        return this.recordSize;
     }
 
     /**
@@ -317,7 +320,7 @@ public class TarArchiveOutputStream exte
 
         entry.writeEntryHeader(recordBuf, encoding,
                                bigNumberMode == BIGNUMBER_STAR);
-        buffer.writeRecord(recordBuf);
+        writeRecord(recordBuf);
 
         currBytes = 0;
 
@@ -353,7 +356,7 @@ public class TarArchiveOutputStream exte
                 assemBuf[i] = 0;
             }
 
-            buffer.writeRecord(assemBuf);
+            writeRecord(assemBuf);
 
             currBytes += assemLen;
             assemLen = 0;
@@ -407,7 +410,7 @@ public class TarArchiveOutputStream exte
                                  assemLen);
                 System.arraycopy(wBuf, wOffset, recordBuf,
                                  assemLen, aLen);
-                buffer.writeRecord(recordBuf);
+                writeRecord(recordBuf);
 
                 currBytes += recordBuf.length;
                 wOffset += aLen;
@@ -438,7 +441,7 @@ public class TarArchiveOutputStream exte
                 break;
             }
 
-            buffer.writeRecord(wBuf, wOffset);
+            writeRecord(wBuf, wOffset);
 
             int num = recordBuf.length;
 
@@ -512,7 +515,7 @@ public class TarArchiveOutputStream exte
      */
     private void writeEOFRecord() throws IOException {
         Arrays.fill(recordBuf, (byte) 0);
-        buffer.writeRecord(recordBuf);
+        writeRecord(recordBuf);
     }
 
     @Override
@@ -528,6 +531,55 @@ public class TarArchiveOutputStream exte
         }
         return new TarArchiveEntry(inputFile, entryName);
     }
+    
+    /**
+     * Write an archive record to the archive.
+     *
+     * @param record The record data to write to the archive.
+     * @throws IOException on error
+     */
+    private void writeRecord(byte[] record) throws IOException {
+        if (record.length != recordSize) {
+            throw new IOException("record to write has length '"
+                                  + record.length
+                                  + "' which is not the record size of '"
+                                  + recordSize + "'");
+        }
+
+        out.write(record);
+        recordsWritten++;
+    }
+    
+    /**
+     * Write an archive record to the archive, where the record may be
+     * inside of a larger array buffer. The buffer must be "offset plus
+     * record size" long.
+     *
+     * @param buf The buffer containing the record data to write.
+     * @param offset The offset of the record data within buf.
+     * @throws IOException on error
+     */
+    private void writeRecord(byte[] buf, int offset) throws IOException {
+ 
+        if ((offset + recordSize) > buf.length) {
+            throw new IOException("record has length '" + buf.length
+                                  + "' with offset '" + offset
+                                  + "' which is less than the record size of '"
+                                  + recordSize + "'");
+        }
+
+        out.write(buf, offset, recordSize);
+        recordsWritten++;
+    }
+
+    private void padAsNeeded() throws IOException {
+        int start = recordsWritten % recordsPerBlock;
+        if (start != 0) {
+            for (int i = start; i < recordsPerBlock; i++) {
+                writeEOFRecord();
+            }
+        }
+    }
 
     private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders,
                                             TarArchiveEntry entry) {

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java Thu Aug  8 15:57:55 2013
@@ -27,6 +27,12 @@ package org.apache.commons.compress.arch
 // CheckStyle:InterfaceIsTypeCheck OFF (bc)
 public interface TarConstants {
 
+    /** Default record size */
+    int DEFAULT_RCDSIZE = (512);
+
+    /** Default block size */
+    int DEFAULT_BLKSIZE = (DEFAULT_RCDSIZE * 20);
+
     /**
      * GNU format as per before tar 1.12.
      */

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java Thu Aug  8 15:57:55 2013
@@ -230,5 +230,22 @@ public class ArchiveUtils {
             final byte[] buffer2, final int offset2, final int length2){
         return isEqual(buffer1, offset1, length1, buffer2, offset2, length2, true);
     }
-
+    
+    /**
+     * Returns true if the first N bytes of an array are all zero
+     * 
+     * @param a
+     *            The array to check
+     * @param size
+     *            The number of characters to check (not the size of the array)
+     * @return true if the first N bytes are zero
+     */
+    public static boolean isArrayZero(byte[] a, int size) {
+        for (int i = 0; i < size; i++) {
+            if (a[i] != 0) {
+                return false;
+            }
+        }
+        return true;
+    }
 }

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java Thu Aug  8 15:57:55 2013
@@ -70,7 +70,30 @@ public final class IOUtils {
         }
         return count;
     }
+    
+    /**
+     * Skips the given number of bytes by repeatedly invoking skip on
+     * the given input stream if necessary.
+     *
+     * <p>This method will only skip less than the requested number of
+     * bytes if the end of the input stream has been reached.</p>
 
+     * @param input stream to skip bytes in
+     * @param numToSkip the number of bytes to skip
+     * @return the number of bytes actually skipped
+     * @throws IOException
+     */
+    public static long skip(InputStream input, long numToSkip) throws IOException {
+        long available = numToSkip;
+        while (numToSkip > 0) {
+            long skipped = input.skip(numToSkip);
+            if (skipped == 0) {
+                break;
+            }
+            numToSkip -= skipped;
+        }
+        return (available - numToSkip);
+    }
 
     // toByteArray(InputStream) copied from:
     // commons/proper/io/trunk/src/main/java/org/apache/commons/io/IOUtils.java?revision=1428941

Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java (original)
+++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java Thu Aug  8 15:57:55 2013
@@ -27,8 +27,8 @@ import static org.junit.Assert.fail;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.FileInputStream;
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.Map;
@@ -41,7 +41,8 @@ public class TarArchiveInputStreamTest {
 
     @Test
     public void readSimplePaxHeader() throws Exception {
-        final TarArchiveInputStream tais = new TarArchiveInputStream(null);
+        final InputStream is = new ByteArrayInputStream(new byte[1]);
+        final TarArchiveInputStream tais = new TarArchiveInputStream(is);
         Map<String, String> headers = tais
             .parsePaxHeaders(new ByteArrayInputStream("30 atime=1321711775.972059463\n"
                                                       .getBytes(CharsetNames.UTF_8)));
@@ -52,7 +53,8 @@ public class TarArchiveInputStreamTest {
 
     @Test
     public void readPaxHeaderWithEmbeddedNewline() throws Exception {
-        final TarArchiveInputStream tais = new TarArchiveInputStream(null);
+        final InputStream is = new ByteArrayInputStream(new byte[1]);
+        final TarArchiveInputStream tais = new TarArchiveInputStream(is);
         Map<String, String> headers = tais
             .parsePaxHeaders(new ByteArrayInputStream("28 comment=line1\nline2\nand3\n"
                                                       .getBytes(CharsetNames.UTF_8)));
@@ -66,7 +68,8 @@ public class TarArchiveInputStreamTest {
         String ae = "\u00e4";
         String line = "11 path="+ ae + "\n";
         assertEquals(11, line.getBytes(CharsetNames.UTF_8).length);
-        final TarArchiveInputStream tais = new TarArchiveInputStream(null);
+        final InputStream is = new ByteArrayInputStream(new byte[1]);
+        final TarArchiveInputStream tais = new TarArchiveInputStream(is);
         Map<String, String> headers = tais
             .parsePaxHeaders(new ByteArrayInputStream(line.getBytes(CharsetNames.UTF_8)));
         assertEquals(1, headers.size());

Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java?rev=1511843&r1=1511842&r2=1511843&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java (original)
+++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Thu Aug  8 15:57:55 2013
@@ -482,7 +482,7 @@ public class TarArchiveOutputStreamTest 
         tos.closeArchiveEntry();
         tos.close();
         // test1.xml is small enough to fit into the default blockv size
-        assertEquals(TarBuffer.DEFAULT_BLKSIZE, f.length());
+        assertEquals(TarConstants.DEFAULT_BLKSIZE, f.length());
     }
 
 }



Re: svn commit: r1511843 - in /commons/proper/compress/trunk/src: changes/ main/java/org/apache/commons/compress/archivers/tar/ main/java/org/apache/commons/compress/utils/ test/java/org/apache/commons/compress/archivers/tar/

Posted by sebb <se...@gmail.com>.
On 8 August 2013 16:57,  <bo...@apache.org> wrote:
> Author: bodewig
> Date: Thu Aug  8 15:57:55 2013
> New Revision: 1511843
>
> URL: http://svn.apache.org/r1511843
> Log:
> COMPRESS-234 read/skip performance improvements to TarArchiveInputStream - patch by BELUGA BEHR
>
> Removed:
>     commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarBuffer.java
> Modified:
>     commons/proper/compress/trunk/src/changes/changes.xml
>     commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
>     commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
>     commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
>     commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java
>     commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java
>     commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
>     commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java
>
> Modified: commons/proper/compress/trunk/src/changes/changes.xml
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/changes/changes.xml (original)
> +++ commons/proper/compress/trunk/src/changes/changes.xml Thu Aug  8 15:57:55 2013
> @@ -80,9 +80,10 @@ The <action> type attribute can be add,u
>                due-to="BELUGA BEHR">
>          Readabilty patch to TarArchiveInputStream.
>        </action>
> -      <action type="update" date="2013-07-08" issue="COMPRESS-233"
> +      <action type="update" date="2013-07-08" issue="COMPRESS-234"
>                due-to="BELUGA BEHR">
> -        Performance and readability patch to TarBuffer.
> +        Performance improvements to TarArchiveInputStream, in
> +        particular to the skip method.
>        </action>
>      </release>
>      <release version="1.5" date="2013-03-14"
>
> Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java (original)
> +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java Thu Aug  8 15:57:55 2013
> @@ -36,6 +36,7 @@ import org.apache.commons.compress.archi
>  import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
>  import org.apache.commons.compress.utils.ArchiveUtils;
>  import org.apache.commons.compress.utils.CharsetNames;
> +import org.apache.commons.compress.utils.IOUtils;
>
>  /**
>   * The TarInputStream reads a UNIX tar archive as an InputStream.
> @@ -45,18 +46,33 @@ import org.apache.commons.compress.utils
>   * @NotThreadSafe
>   */
>  public class TarArchiveInputStream extends ArchiveInputStream {
> +
>      private static final int SMALL_BUFFER_SIZE = 256;
> -    private static final int BUFFER_SIZE = 8 * 1024;
>
> -    private final byte[] SKIP_BUF = new byte[BUFFER_SIZE];
>      private final byte[] SMALL_BUF = new byte[SMALL_BUFFER_SIZE];
>
> +    /** The size the TAR header */
> +    private final int recordSize;
> +
> +    /** The size of a block */
> +    private final int blockSize;
> +
> +    /** True if file has hit EOF */
>      private boolean hasHitEOF;
> +
> +    /** Size of the current entry */
>      private long entrySize;
> +
> +    /** How far into the entry the stream is at */
>      private long entryOffset;
> -    private byte[] readBuf;
> -    protected final TarBuffer buffer;
> +
> +    /** An input stream to read from */
> +    private final InputStream is;
> +
> +    /** The meta-data about the current entry */
>      private TarArchiveEntry currEntry;
> +
> +    /** The encoding of the file */
>      private final ZipEncoding encoding;
>
>      /**
> @@ -64,7 +80,7 @@ public class TarArchiveInputStream exten
>       * @param is the input stream to use
>       */
>      public TarArchiveInputStream(InputStream is) {
> -        this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
> +        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
>      }
>
>      /**
> @@ -74,7 +90,8 @@ public class TarArchiveInputStream exten
>       * @since 1.4
>       */
>      public TarArchiveInputStream(InputStream is, String encoding) {
> -        this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
> +        this(is, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE,
> +             encoding);
>      }
>
>      /**
> @@ -83,7 +100,7 @@ public class TarArchiveInputStream exten
>       * @param blockSize the block size to use
>       */
>      public TarArchiveInputStream(InputStream is, int blockSize) {
> -        this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
> +        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE);
>      }
>
>      /**
> @@ -95,7 +112,7 @@ public class TarArchiveInputStream exten
>       */
>      public TarArchiveInputStream(InputStream is, int blockSize,
>                                   String encoding) {
> -        this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
> +        this(is, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
>      }
>
>      /**
> @@ -105,7 +122,7 @@ public class TarArchiveInputStream exten
>       * @param recordSize the record size to use
>       */
>      public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
> -        this(is, blockSize, recordSize, null);
> +        this(is, blockSize, recordSize, null);
>      }
>
>      /**
> @@ -118,10 +135,11 @@ public class TarArchiveInputStream exten
>       */
>      public TarArchiveInputStream(InputStream is, int blockSize, int recordSize,
>                                   String encoding) {
> -        this.buffer = new TarBuffer(is, blockSize, recordSize);
> -        this.readBuf = null;
> +        this.is = is;
>          this.hasHitEOF = false;
>          this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
> +        this.recordSize = recordSize;
> +        this.blockSize = blockSize;
>      }
>
>      /**
> @@ -130,16 +148,16 @@ public class TarArchiveInputStream exten
>       */
>      @Override
>      public void close() throws IOException {
> -        buffer.close();
> +        is.close();
>      }
>
>      /**
> -     * Get the record size being used by this stream's TarBuffer.
> +     * Get the record size being used by this stream's buffer.
>       *
>       * @return The TarBuffer record size.
>       */
>      public int getRecordSize() {
> -        return buffer.getRecordSize();
> +        return recordSize;
>      }
>
>      /**
> @@ -174,21 +192,14 @@ public class TarArchiveInputStream exten
>       */
>      @Override
>      public long skip(long numToSkip) throws IOException {
> -        // REVIEW
> -        // This is horribly inefficient, but it ensures that we
> -        // properly skip over bytes via the TarBuffer...
> -        //
> -        long skip = numToSkip;
> -        while (skip > 0) {
> -            int realSkip = (int) (skip > SKIP_BUF.length
> -                                  ? SKIP_BUF.length : skip);
> -            int numRead = read(SKIP_BUF, 0, realSkip);
> -            if (numRead == -1) {
> -                break;
> -            }
> -            skip -= numRead;
> -        }
> -        return (numToSkip - skip);
> +
> +        long available = (entrySize - entryOffset);
> +        numToSkip = Math.min(numToSkip, available);
> +
> +        long skipped = IOUtils.skip(is, numToSkip);
> +        count(skipped);
> +        entryOffset += skipped;
> +        return skipped;
>      }
>
>      /**
> @@ -217,18 +228,11 @@ public class TarArchiveInputStream exten
>          }
>
>          if (currEntry != null) {
> -            long numToSkip = entrySize - entryOffset;
> +            /* Skip will only go to the end of the current entry */
> +            skip(Long.MAX_VALUE);
>
> -            while (numToSkip > 0) {
> -                long skipped = skip(numToSkip);
> -                if (skipped <= 0) {
> -                    throw new RuntimeException("failed to skip current tar"
> -                                               + " entry");
> -                }
> -                numToSkip -= skipped;
> -            }
> -
> -            readBuf = null;
> +            /* skip to the end of the last record */
> +            skipRecordPadding();
>          }
>
>          byte[] headerBuf = getRecord();
> @@ -246,6 +250,7 @@ public class TarArchiveInputStream exten
>              ioe.initCause(e);
>              throw ioe;
>          }
> +
>          entryOffset = 0;
>          entrySize = currEntry.getSize();
>
> @@ -284,8 +289,22 @@ public class TarArchiveInputStream exten
>          // information, we update entrySize here so that it contains
>          // the correct value.
>          entrySize = currEntry.getSize();
> +
>          return currEntry;
>      }
> +
> +    /**
> +     * The last record block should be written at the full size, so skip any
> +     * additional space used to fill a record after an entry
> +     */
> +    private void skipRecordPadding() throws IOException {
> +        if (this.entrySize > 0 && this.entrySize % this.recordSize != 0) {
> +            long numRecords = (this.entrySize / this.recordSize) + 1;
> +            long padding = (numRecords * this.recordSize) - this.entrySize;
> +            long skipped = IOUtils.skip(is, padding);
> +            count(skipped);
> +        }
> +    }
>
>      /**
>       * Get the next entry in this tar archive as longname data.
> @@ -335,19 +354,46 @@ public class TarArchiveInputStream exten
>       * @throws IOException on error
>       */
>      private byte[] getRecord() throws IOException {
> -        byte[] headerBuf = null;
> -        if (!hasHitEOF) {
> -            headerBuf = buffer.readRecord();
> -            hasHitEOF = buffer.isEOFRecord(headerBuf);
> -            if (hasHitEOF && headerBuf != null) {
> -                buffer.tryToConsumeSecondEOFRecord();
> -                headerBuf = null;
> -            }
> +        byte[] headerBuf = readRecord();
> +        hasHitEOF = isEOFRecord(headerBuf);
> +        if (hasHitEOF && headerBuf != null) {
> +            tryToConsumeSecondEOFRecord();
> +            consumeRemainderOfLastBlock();
> +            headerBuf = null;
>          }
> -
>          return headerBuf;
>      }
>
> +    /**
> +     * Determine if an archive record indicate End of Archive. End of
> +     * archive is indicated by a record that consists entirely of null bytes.
> +     *
> +     * @param record The record data to check.
> +     * @return true if the record data is an End of Archive
> +     */
> +    protected boolean isEOFRecord(byte[] record) {
> +        return record == null || ArchiveUtils.isArrayZero(record, recordSize);
> +    }
> +
> +    /**
> +     * Read a record from the input stream and return the data.
> +     *
> +     * @return The record data or null if EOF has been hit.
> +     * @throws IOException on error
> +     */
> +    protected byte[] readRecord() throws IOException {
> +
> +        byte[] record = new byte[recordSize];
> +
> +        int readNow = is.read(record);

The read() method is not guaranteed to read the full buffer in a
single invocation.
Probably needs to be wrapped in a loop until count is zero.

> +        count(readNow);
> +        if (readNow != recordSize) {
> +            return null;
> +        }
> +
> +        return record;
> +    }
> +
>      private void paxHeaders() throws IOException{
>          Map<String, String> headers = parsePaxHeaders(this);
>          getNextEntry(); // Get the actual file entry
> @@ -468,10 +514,43 @@ public class TarArchiveInputStream exten
>          }
>      }
>
> +    /**
> +     * Returns the next Archive Entry in this Stream.
> +     *
> +     * @return the next entry,
> +     *         or {@code null} if there are no more entries
> +     * @throws IOException if the next entry could not be read
> +     */
>      @Override
>      public ArchiveEntry getNextEntry() throws IOException {
>          return getNextTarEntry();
>      }
> +
> +    /**
> +     * Tries to read the next record rewinding the stream if it is not a EOF record.
> +     *
> +     * <p>This is meant to protect against cases where a tar
> +     * implementation has written only one EOF record when two are
> +     * expected.  Actually this won't help since a non-conforming
> +     * implementation likely won't fill full blocks consisting of - by
> +     * default - ten records either so we probably have already read
> +     * beyond the archive anyway.</p>
> +     */
> +    private void tryToConsumeSecondEOFRecord() throws IOException {
> +        boolean shouldReset = true;
> +        boolean marked = is.markSupported();
> +        if (marked) {
> +            is.mark(recordSize);
> +        }
> +        try {
> +            shouldReset = !isEOFRecord(readRecord());
> +        } finally {
> +            if (shouldReset && marked) {
> +                pushedBackBytes(recordSize);
> +               is.reset();
> +            }
> +        }
> +    }
>
>      /**
>       * Reads bytes from the current tar archive entry.
> @@ -488,69 +567,23 @@ public class TarArchiveInputStream exten
>       */
>      @Override
>      public int read(byte[] buf, int offset, int numToRead) throws IOException {
> -        int totalRead = 0;
> +       int totalRead = 0;
>
> -        if (entryOffset >= entrySize) {
> +        if (hasHitEOF || entryOffset >= entrySize) {
>              return -1;
>          }
>
> -        if ((numToRead + entryOffset) > entrySize) {
> -            numToRead = (int) (entrySize - entryOffset);
> -        }
> -
> -        if (readBuf != null) {
> -            int sz = (numToRead > readBuf.length) ? readBuf.length
> -                : numToRead;
> -
> -            System.arraycopy(readBuf, 0, buf, offset, sz);
> -
> -            if (sz >= readBuf.length) {
> -                readBuf = null;
> -            } else {
> -                int newLen = readBuf.length - sz;
> -                byte[] newBuf = new byte[newLen];
> -
> -                System.arraycopy(readBuf, sz, newBuf, 0, newLen);
> -
> -                readBuf = newBuf;
> -            }
> -
> -            totalRead += sz;
> -            numToRead -= sz;
> -            offset += sz;
> -        }
> -
> -        while (numToRead > 0) {
> -            byte[] rec = buffer.readRecord();
> -
> -            if (rec == null) {
> -                // Unexpected EOF!
> -                throw new IOException("unexpected EOF with " + numToRead
> -                                      + " bytes unread. Occured at byte: " + getBytesRead());
> -            }
> -            count(rec.length);
> -            int sz = numToRead;
> -            int recLen = rec.length;
> -
> -            if (recLen > sz) {
> -                System.arraycopy(rec, 0, buf, offset, sz);
> -
> -                readBuf = new byte[recLen - sz];
> -
> -                System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
> -            } else {
> -                sz = recLen;
> -
> -                System.arraycopy(rec, 0, buf, offset, recLen);
> -            }
> -
> -            totalRead += sz;
> -            numToRead -= sz;
> -            offset += sz;
> +        numToRead = Math.min(numToRead, available());
> +
> +        totalRead = is.read(buf, offset, numToRead);

Ditto

> +        count(totalRead);
> +
> +        if (totalRead == -1) {
> +            hasHitEOF = true;
> +        } else {
> +            entryOffset += (long) totalRead;
>          }
>
> -        entryOffset += totalRead;
> -
>          return totalRead;
>      }
>
> @@ -568,7 +601,12 @@ public class TarArchiveInputStream exten
>          return false;
>      }
>
> -    protected final TarArchiveEntry getCurrentEntry() {
> +    /**
> +     * Get the current TAR Archive Entry that this input stream is processing
> +     *
> +     * @return The current Archive Entry
> +     */
> +    public ArchiveEntry getCurrentEntry() {
>          return currEntry;
>      }
>
> @@ -585,6 +623,19 @@ public class TarArchiveInputStream exten
>      }
>
>      /**
> +     * This method is invoked once the end of the archive is hit, it
> +     * tries to consume the remaining bytes under the assumption that
> +     * the tool creating this archive has padded the last block.
> +     */
> +    private void consumeRemainderOfLastBlock() throws IOException {
> +        long bytesReadOfLastBlock = getBytesRead() % blockSize;
> +        if (bytesReadOfLastBlock > 0) {
> +            long skipped = IOUtils.skip(is, blockSize - bytesReadOfLastBlock);
> +            count(skipped);
> +        }
> +    }
> +
> +    /**
>       * Checks if the signature matches what is expected for a tar file.
>       *
>       * @param signature
>
> Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (original)
> +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java Thu Aug  8 15:57:55 2013
> @@ -67,9 +67,11 @@ public class TarArchiveOutputStream exte
>      private final byte[]    recordBuf;
>      private int       assemLen;
>      private final byte[]    assemBuf;
> -    protected final TarBuffer buffer;
>      private int       longFileMode = LONGFILE_ERROR;
>      private int       bigNumberMode = BIGNUMBER_ERROR;
> +    private int recordsWritten;
> +    private final int recordsPerBlock;
> +    private final int recordSize;
>
>      private boolean closed = false;
>
> @@ -92,7 +94,7 @@ public class TarArchiveOutputStream exte
>       * @param os the output stream to use
>       */
>      public TarArchiveOutputStream(OutputStream os) {
> -        this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
> +        this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE);
>      }
>
>      /**
> @@ -102,7 +104,7 @@ public class TarArchiveOutputStream exte
>       * @since 1.4
>       */
>      public TarArchiveOutputStream(OutputStream os, String encoding) {
> -        this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
> +        this(os, TarConstants.DEFAULT_BLKSIZE, TarConstants.DEFAULT_RCDSIZE, encoding);
>      }
>
>      /**
> @@ -111,7 +113,7 @@ public class TarArchiveOutputStream exte
>       * @param blockSize the block size to use
>       */
>      public TarArchiveOutputStream(OutputStream os, int blockSize) {
> -        this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE);
> +        this(os, blockSize, TarConstants.DEFAULT_RCDSIZE);
>      }
>
>      /**
> @@ -123,7 +125,7 @@ public class TarArchiveOutputStream exte
>       */
>      public TarArchiveOutputStream(OutputStream os, int blockSize,
>                                    String encoding) {
> -        this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
> +        this(os, blockSize, TarConstants.DEFAULT_RCDSIZE, encoding);
>      }
>
>      /**
> @@ -149,10 +151,11 @@ public class TarArchiveOutputStream exte
>          out = new CountingOutputStream(os);
>          this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
>
> -        this.buffer = new TarBuffer(out, blockSize, recordSize);
>          this.assemLen = 0;
>          this.assemBuf = new byte[recordSize];
>          this.recordBuf = new byte[recordSize];
> +        this.recordSize = recordSize;
> +        this.recordsPerBlock = blockSize / recordSize;
>      }
>
>      /**
> @@ -217,7 +220,8 @@ public class TarArchiveOutputStream exte
>          }
>          writeEOFRecord();
>          writeEOFRecord();
> -        buffer.flushBlock();
> +        padAsNeeded();
> +        out.flush();
>          finished = true;
>      }
>
> @@ -227,12 +231,11 @@ public class TarArchiveOutputStream exte
>       */
>      @Override
>      public void close() throws IOException {
> -        if(!finished) {
> +        if (!finished) {
>              finish();
>          }
>
>          if (!closed) {
> -            buffer.close();
>              out.close();
>              closed = true;
>          }
> @@ -244,7 +247,7 @@ public class TarArchiveOutputStream exte
>       * @return The TarBuffer record size.
>       */
>      public int getRecordSize() {
> -        return buffer.getRecordSize();
> +        return this.recordSize;
>      }
>
>      /**
> @@ -317,7 +320,7 @@ public class TarArchiveOutputStream exte
>
>          entry.writeEntryHeader(recordBuf, encoding,
>                                 bigNumberMode == BIGNUMBER_STAR);
> -        buffer.writeRecord(recordBuf);
> +        writeRecord(recordBuf);
>
>          currBytes = 0;
>
> @@ -353,7 +356,7 @@ public class TarArchiveOutputStream exte
>                  assemBuf[i] = 0;
>              }
>
> -            buffer.writeRecord(assemBuf);
> +            writeRecord(assemBuf);
>
>              currBytes += assemLen;
>              assemLen = 0;
> @@ -407,7 +410,7 @@ public class TarArchiveOutputStream exte
>                                   assemLen);
>                  System.arraycopy(wBuf, wOffset, recordBuf,
>                                   assemLen, aLen);
> -                buffer.writeRecord(recordBuf);
> +                writeRecord(recordBuf);
>
>                  currBytes += recordBuf.length;
>                  wOffset += aLen;
> @@ -438,7 +441,7 @@ public class TarArchiveOutputStream exte
>                  break;
>              }
>
> -            buffer.writeRecord(wBuf, wOffset);
> +            writeRecord(wBuf, wOffset);
>
>              int num = recordBuf.length;
>
> @@ -512,7 +515,7 @@ public class TarArchiveOutputStream exte
>       */
>      private void writeEOFRecord() throws IOException {
>          Arrays.fill(recordBuf, (byte) 0);
> -        buffer.writeRecord(recordBuf);
> +        writeRecord(recordBuf);
>      }
>
>      @Override
> @@ -528,6 +531,55 @@ public class TarArchiveOutputStream exte
>          }
>          return new TarArchiveEntry(inputFile, entryName);
>      }
> +
> +    /**
> +     * Write an archive record to the archive.
> +     *
> +     * @param record The record data to write to the archive.
> +     * @throws IOException on error
> +     */
> +    private void writeRecord(byte[] record) throws IOException {
> +        if (record.length != recordSize) {
> +            throw new IOException("record to write has length '"
> +                                  + record.length
> +                                  + "' which is not the record size of '"
> +                                  + recordSize + "'");
> +        }
> +
> +        out.write(record);
> +        recordsWritten++;
> +    }
> +
> +    /**
> +     * Write an archive record to the archive, where the record may be
> +     * inside of a larger array buffer. The buffer must be "offset plus
> +     * record size" long.
> +     *
> +     * @param buf The buffer containing the record data to write.
> +     * @param offset The offset of the record data within buf.
> +     * @throws IOException on error
> +     */
> +    private void writeRecord(byte[] buf, int offset) throws IOException {
> +
> +        if ((offset + recordSize) > buf.length) {
> +            throw new IOException("record has length '" + buf.length
> +                                  + "' with offset '" + offset
> +                                  + "' which is less than the record size of '"
> +                                  + recordSize + "'");
> +        }
> +
> +        out.write(buf, offset, recordSize);
> +        recordsWritten++;
> +    }
> +
> +    private void padAsNeeded() throws IOException {
> +        int start = recordsWritten % recordsPerBlock;
> +        if (start != 0) {
> +            for (int i = start; i < recordsPerBlock; i++) {
> +                writeEOFRecord();
> +            }
> +        }
> +    }
>
>      private void addPaxHeadersForBigNumbers(Map<String, String> paxHeaders,
>                                              TarArchiveEntry entry) {
>
> Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java (original)
> +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java Thu Aug  8 15:57:55 2013
> @@ -27,6 +27,12 @@ package org.apache.commons.compress.arch
>  // CheckStyle:InterfaceIsTypeCheck OFF (bc)
>  public interface TarConstants {
>
> +    /** Default record size */
> +    int DEFAULT_RCDSIZE = (512);
> +
> +    /** Default block size */
> +    int DEFAULT_BLKSIZE = (DEFAULT_RCDSIZE * 20);
> +
>      /**
>       * GNU format as per before tar 1.12.
>       */
>
> Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java (original)
> +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java Thu Aug  8 15:57:55 2013
> @@ -230,5 +230,22 @@ public class ArchiveUtils {
>              final byte[] buffer2, final int offset2, final int length2){
>          return isEqual(buffer1, offset1, length1, buffer2, offset2, length2, true);
>      }
> -
> +
> +    /**
> +     * Returns true if the first N bytes of an array are all zero
> +     *
> +     * @param a
> +     *            The array to check
> +     * @param size
> +     *            The number of characters to check (not the size of the array)
> +     * @return true if the first N bytes are zero
> +     */
> +    public static boolean isArrayZero(byte[] a, int size) {
> +        for (int i = 0; i < size; i++) {
> +            if (a[i] != 0) {
> +                return false;
> +            }
> +        }
> +        return true;
> +    }
>  }
>
> Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java (original)
> +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/IOUtils.java Thu Aug  8 15:57:55 2013
> @@ -70,7 +70,30 @@ public final class IOUtils {
>          }
>          return count;
>      }
> +
> +    /**
> +     * Skips the given number of bytes by repeatedly invoking skip on
> +     * the given input stream if necessary.
> +     *
> +     * <p>This method will only skip less than the requested number of
> +     * bytes if the end of the input stream has been reached.</p>
>
> +     * @param input stream to skip bytes in
> +     * @param numToSkip the number of bytes to skip
> +     * @return the number of bytes actually skipped
> +     * @throws IOException
> +     */
> +    public static long skip(InputStream input, long numToSkip) throws IOException {
> +        long available = numToSkip;
> +        while (numToSkip > 0) {
> +            long skipped = input.skip(numToSkip);
> +            if (skipped == 0) {
> +                break;
> +            }
> +            numToSkip -= skipped;
> +        }
> +        return (available - numToSkip);
> +    }
>
>      // toByteArray(InputStream) copied from:
>      // commons/proper/io/trunk/src/main/java/org/apache/commons/io/IOUtils.java?revision=1428941
>
> Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java (original)
> +++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java Thu Aug  8 15:57:55 2013
> @@ -27,8 +27,8 @@ import static org.junit.Assert.fail;
>  import java.io.ByteArrayInputStream;
>  import java.io.ByteArrayOutputStream;
>  import java.io.FileInputStream;
> -import java.io.InputStream;
>  import java.io.IOException;
> +import java.io.InputStream;
>  import java.util.Calendar;
>  import java.util.Date;
>  import java.util.Map;
> @@ -41,7 +41,8 @@ public class TarArchiveInputStreamTest {
>
>      @Test
>      public void readSimplePaxHeader() throws Exception {
> -        final TarArchiveInputStream tais = new TarArchiveInputStream(null);
> +        final InputStream is = new ByteArrayInputStream(new byte[1]);
> +        final TarArchiveInputStream tais = new TarArchiveInputStream(is);
>          Map<String, String> headers = tais
>              .parsePaxHeaders(new ByteArrayInputStream("30 atime=1321711775.972059463\n"
>                                                        .getBytes(CharsetNames.UTF_8)));
> @@ -52,7 +53,8 @@ public class TarArchiveInputStreamTest {
>
>      @Test
>      public void readPaxHeaderWithEmbeddedNewline() throws Exception {
> -        final TarArchiveInputStream tais = new TarArchiveInputStream(null);
> +        final InputStream is = new ByteArrayInputStream(new byte[1]);
> +        final TarArchiveInputStream tais = new TarArchiveInputStream(is);
>          Map<String, String> headers = tais
>              .parsePaxHeaders(new ByteArrayInputStream("28 comment=line1\nline2\nand3\n"
>                                                        .getBytes(CharsetNames.UTF_8)));
> @@ -66,7 +68,8 @@ public class TarArchiveInputStreamTest {
>          String ae = "\u00e4";
>          String line = "11 path="+ ae + "\n";
>          assertEquals(11, line.getBytes(CharsetNames.UTF_8).length);
> -        final TarArchiveInputStream tais = new TarArchiveInputStream(null);
> +        final InputStream is = new ByteArrayInputStream(new byte[1]);
> +        final TarArchiveInputStream tais = new TarArchiveInputStream(is);
>          Map<String, String> headers = tais
>              .parsePaxHeaders(new ByteArrayInputStream(line.getBytes(CharsetNames.UTF_8)));
>          assertEquals(1, headers.size());
>
> Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java
> URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java?rev=1511843&r1=1511842&r2=1511843&view=diff
> ==============================================================================
> --- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java (original)
> +++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Thu Aug  8 15:57:55 2013
> @@ -482,7 +482,7 @@ public class TarArchiveOutputStreamTest
>          tos.closeArchiveEntry();
>          tos.close();
>          // test1.xml is small enough to fit into the default blockv size
> -        assertEquals(TarBuffer.DEFAULT_BLKSIZE, f.length());
> +        assertEquals(TarConstants.DEFAULT_BLKSIZE, f.length());
>      }
>
>  }
>
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org