You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2012/03/18 18:16:52 UTC

svn commit: r1302170 - in /commons/proper/compress/trunk/src: main/java/org/apache/commons/compress/archivers/tar/ main/java/org/apache/commons/compress/archivers/zip/ test/java/org/apache/commons/compress/archivers/

Author: bodewig
Date: Sun Mar 18 17:16:51 2012
New Revision: 1302170

URL: http://svn.apache.org/viewvc?rev=1302170&view=rev
Log:
infrastructure for non-ASCII encoding of file names in tar.  COMPRESS-183

Modified:
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
    commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java?rev=1302170&r1=1302169&r2=1302170&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java Sun Mar 18 17:16:51 2012
@@ -19,11 +19,13 @@
 package org.apache.commons.compress.archivers.tar;
 
 import java.io.File;
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Date;
 import java.util.Locale;
 
 import org.apache.commons.compress.archivers.ArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
 
 /**
  * This class represents an entry in a Tar archive. It consists
@@ -177,7 +179,7 @@ public class TarArchiveEntry implements 
     /**
      * Construct an empty entry and prepares the header values.
      */
-    private TarArchiveEntry () {
+    private TarArchiveEntry() {
         this.magic = MAGIC_POSIX;
         this.version = VERSION_POSIX;
         this.name = "";
@@ -307,8 +309,30 @@ public class TarArchiveEntry implements 
      * @throws IllegalArgumentException if any of the numeric fields have an invalid format
      */
     public TarArchiveEntry(byte[] headerBuf) {
+        this(headerBuf, null);
+    }
+
+    /**
+     * Construct an entry from an archive's header bytes. File is set
+     * to null.
+     *
+     * @param headerBuf The header bytes from a tar archive entry.
+     * @param encoding encoding to use for file names
+     * @since Commons Compress 1.4
+     * @throws IllegalArgumentException if any of the numeric fields have an invalid format
+     */
+    public TarArchiveEntry(byte[] headerBuf, ZipEncoding encoding) {
         this();
-        parseTarHeader(headerBuf);
+        try {
+            parseTarHeader(headerBuf, encoding);
+        } catch (IOException ex) {
+            try {
+                parseTarHeader(headerBuf, encoding, true);
+            } catch (IOException ex2) {
+                // impossible
+                throw new RuntimeException(ex2);
+            }
+        }
     }
 
     /**
@@ -865,9 +889,39 @@ public class TarArchiveEntry implements 
      * @throws IllegalArgumentException if any of the numeric fields have an invalid format
      */
     public void parseTarHeader(byte[] header) {
+        try {
+            parseTarHeader(header, TarUtils.DEFAULT_ENCODING);
+        } catch (IOException ex) {
+            try {
+                parseTarHeader(header, TarUtils.DEFAULT_ENCODING, true);
+            } catch (IOException ex2) {
+                // not really possible
+                throw new RuntimeException(ex2);
+            }
+        }
+    }
+
+    /**
+     * Parse an entry's header information from a header buffer.
+     *
+     * @param header The tar entry header buffer to get information from.
+     * @param encoding encoding to use for file names
+     * @since Commons Compress 1.4
+     * @throws IllegalArgumentException if any of the numeric fields
+     * have an invalid format
+     */
+    public void parseTarHeader(byte[] header, ZipEncoding encoding)
+        throws IOException {
+        parseTarHeader(header, encoding, false);
+    }
+
+    private void parseTarHeader(byte[] header, ZipEncoding encoding,
+                                final boolean oldStyle)
+        throws IOException {
         int offset = 0;
 
-        name = TarUtils.parseName(header, offset, NAMELEN);
+        name = oldStyle ? TarUtils.parseName(header, offset, NAMELEN)
+            : TarUtils.parseName(header, offset, NAMELEN, encoding);
         offset += NAMELEN;
         mode = (int) TarUtils.parseOctalOrBinary(header, offset, MODELEN);
         offset += MODELEN;
@@ -881,15 +935,18 @@ public class TarArchiveEntry implements 
         offset += MODTIMELEN;
         offset += CHKSUMLEN;
         linkFlag = header[offset++];
-        linkName = TarUtils.parseName(header, offset, NAMELEN);
+        linkName = oldStyle ? TarUtils.parseName(header, offset, NAMELEN)
+            : TarUtils.parseName(header, offset, NAMELEN, encoding);
         offset += NAMELEN;
         magic = TarUtils.parseName(header, offset, MAGICLEN);
         offset += MAGICLEN;
         version = TarUtils.parseName(header, offset, VERSIONLEN);
         offset += VERSIONLEN;
-        userName = TarUtils.parseName(header, offset, UNAMELEN);
+        userName = oldStyle ? TarUtils.parseName(header, offset, UNAMELEN)
+            : TarUtils.parseName(header, offset, UNAMELEN, encoding);
         offset += UNAMELEN;
-        groupName = TarUtils.parseName(header, offset, GNAMELEN);
+        groupName = oldStyle ? TarUtils.parseName(header, offset, GNAMELEN)
+            : TarUtils.parseName(header, offset, GNAMELEN, encoding);
         offset += GNAMELEN;
         devMajor = (int) TarUtils.parseOctalOrBinary(header, offset, DEVLEN);
         offset += DEVLEN;
@@ -913,7 +970,9 @@ public class TarArchiveEntry implements 
         }
         case FORMAT_POSIX:
         default: {
-            String prefix = TarUtils.parseName(header, offset, PREFIXLEN);
+            String prefix = oldStyle
+                ? TarUtils.parseName(header, offset, PREFIXLEN)
+                : TarUtils.parseName(header, offset, PREFIXLEN, encoding);
             // SunOS tar -E does not add / to directory names, so fix
             // up to be consistent
             if (isDirectory() && !name.endsWith("/")){

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java?rev=1302170&r1=1302169&r2=1302170&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java Sun Mar 18 17:16:51 2012
@@ -33,6 +33,8 @@ import java.util.Map.Entry;
 
 import org.apache.commons.compress.archivers.ArchiveEntry;
 import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
+import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
 import org.apache.commons.compress.utils.ArchiveUtils;
 
 /**
@@ -52,6 +54,7 @@ public class TarArchiveInputStream exten
     private byte[] readBuf;
     protected final TarBuffer buffer;
     private TarArchiveEntry currEntry;
+    private final ZipEncoding encoding;
 
     /**
      * Constructor for TarInputStream.
@@ -64,6 +67,16 @@ public class TarArchiveInputStream exten
     /**
      * Constructor for TarInputStream.
      * @param is the input stream to use
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     */
+    public TarArchiveInputStream(InputStream is, String encoding) {
+        this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param is the input stream to use
      * @param blockSize the block size to use
      */
     public TarArchiveInputStream(InputStream is, int blockSize) {
@@ -74,12 +87,38 @@ public class TarArchiveInputStream exten
      * Constructor for TarInputStream.
      * @param is the input stream to use
      * @param blockSize the block size to use
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     */
+    public TarArchiveInputStream(InputStream is, int blockSize,
+                                 String encoding) {
+        this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param is the input stream to use
+     * @param blockSize the block size to use
      * @param recordSize the record size to use
      */
     public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
+        this(is, blockSize, recordSize, null);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param is the input stream to use
+     * @param blockSize the block size to use
+     * @param recordSize the record size to use
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     */
+    public TarArchiveInputStream(InputStream is, int blockSize, int recordSize,
+                                 String encoding) {
         this.buffer = new TarBuffer(is, blockSize, recordSize);
         this.readBuf = null;
         this.hasHitEOF = false;
+        this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
     }
 
     /**
@@ -196,7 +235,7 @@ public class TarArchiveInputStream exten
         }
 
         try {
-            currEntry = new TarArchiveEntry(headerBuf);
+            currEntry = new TarArchiveEntry(headerBuf, encoding);
         } catch (IllegalArgumentException e) {
             IOException ioe = new IOException("Error detected parsing the header");
             ioe.initCause(e);

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java?rev=1302170&r1=1302169&r2=1302170&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java Sun Mar 18 17:16:51 2012
@@ -26,6 +26,8 @@ import java.util.HashMap;
 import java.util.Map;
 import org.apache.commons.compress.archivers.ArchiveEntry;
 import org.apache.commons.compress.archivers.ArchiveOutputStream;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
+import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
 import org.apache.commons.compress.utils.ArchiveUtils;
 import org.apache.commons.compress.utils.CountingOutputStream;
 
@@ -77,6 +79,8 @@ public class TarArchiveOutputStream exte
     
     private final OutputStream out;
 
+    private final ZipEncoding encoding;
+
     /**
      * Constructor for TarInputStream.
      * @param os the output stream to use
@@ -88,6 +92,16 @@ public class TarArchiveOutputStream exte
     /**
      * Constructor for TarInputStream.
      * @param os the output stream to use
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     */
+    public TarArchiveOutputStream(OutputStream os, String encoding) {
+        this(os, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE, encoding);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param os the output stream to use
      * @param blockSize the block size to use
      */
     public TarArchiveOutputStream(OutputStream os, int blockSize) {
@@ -98,10 +112,36 @@ public class TarArchiveOutputStream exte
      * Constructor for TarInputStream.
      * @param os the output stream to use
      * @param blockSize the block size to use
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     */
+    public TarArchiveOutputStream(OutputStream os, int blockSize,
+                                  String encoding) {
+        this(os, blockSize, TarBuffer.DEFAULT_RCDSIZE, encoding);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param os the output stream to use
+     * @param blockSize the block size to use
      * @param recordSize the record size to use
      */
     public TarArchiveOutputStream(OutputStream os, int blockSize, int recordSize) {
+        this(os, blockSize, recordSize, null);
+    }
+
+    /**
+     * Constructor for TarInputStream.
+     * @param os the output stream to use
+     * @param blockSize the block size to use
+     * @param recordSize the record size to use
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     */
+    public TarArchiveOutputStream(OutputStream os, int blockSize,
+                                  int recordSize, String encoding) {
         out = new CountingOutputStream(os);
+        this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
 
         this.buffer = new TarBuffer(out, blockSize, recordSize);
         this.assemLen = 0;

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java?rev=1302170&r1=1302169&r2=1302170&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java Sun Mar 18 17:16:51 2012
@@ -18,7 +18,11 @@
  */
 package org.apache.commons.compress.archivers.tar;
 
+import java.io.IOException;
 import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
+import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
 
 /**
  * This class provides static utility methods to work with byte streams.
@@ -30,6 +34,9 @@ public class TarUtils {
 
     private static final int BYTE_MASK = 255;
 
+    static final ZipEncoding DEFAULT_ENCODING =
+        ZipEncodingHelper.getZipEncoding(null);
+
     /** Private constructor to prevent instantiation of this utility class. */
     private TarUtils(){    
     }
@@ -211,6 +218,19 @@ public class TarUtils {
      * @return The entry name.
      */
     public static String parseName(byte[] buffer, final int offset, final int length) {
+        try {
+            return parseName(buffer, offset, length, DEFAULT_ENCODING);
+        } catch (IOException ex) {
+            return parseNameFallback(buffer, offset, length);
+        }
+    }
+
+    /*
+     * Used if default encoding cannot encode name and no explicit
+     * encoding has been specified.
+     */
+    private static String parseNameFallback(byte[] buffer, final int offset,
+                                            final int length) {
         StringBuffer result = new StringBuffer(length);
         int          end = offset + length;
 
@@ -226,7 +246,38 @@ public class TarUtils {
     }
 
     /**
-     * Copy a name (StringBuffer) into a buffer.
+     * Parse an entry name from a buffer.
+     * Parsing stops when a NUL is found
+     * or the buffer length is reached.
+     *
+     * @param buffer The buffer from which to parse.
+     * @param offset The offset into the buffer from which to parse.
+     * @param length The maximum number of bytes to parse.
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     * @return The entry name.
+     */
+    public static String parseName(byte[] buffer, final int offset,
+                                   final int length,
+                                   final ZipEncoding encoding)
+        throws IOException {
+
+        int len = length;
+        for (; len > 0; len--) {
+            if (buffer[offset + len - 1] != 0) {
+                break;
+            }
+        }
+        if (len > 0) {
+            byte[] b = new byte[len];
+            System.arraycopy(buffer, offset, b, 0, len);
+            return encoding.decode(b);
+        }
+        return "";
+    }
+
+    /**
+     * Copy a name into a buffer.
      * Copies characters from the name into the buffer
      * starting at the specified offset. 
      * If the buffer is longer than the name, the buffer
@@ -241,6 +292,20 @@ public class TarUtils {
      * @return The updated offset, i.e. offset + length
      */
     public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) {
+        try {
+            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
+        } catch (IOException ex) {
+            return formatNameBytesFallback(name, buf, offset, length);
+        }
+    }
+
+    /*
+     * Used if default encoding cannot format name and no explicit encoding
+     * has been specified.
+     */
+    private static int formatNameBytesFallback(String name, byte[] buf,
+                                               final int offset,
+                                               final int length) {
         int i;
 
         // copy until end of input or output is reached.
@@ -257,6 +322,43 @@ public class TarUtils {
     }
 
     /**
+     * Copy a name (StringBuffer) into a buffer.
+     * Copies characters from the name into the buffer
+     * starting at the specified offset. 
+     * If the buffer is longer than the name, the buffer
+     * is filled with trailing NULs.
+     * If the name is longer than the buffer,
+     * the output is truncated.
+     *
+     * @param name The header name from which to copy the characters.
+     * @param buf The buffer where the name is to be stored.
+     * @param offset The starting offset into the buffer
+     * @param length The maximum number of header bytes to copy.
+     * @param encoding name of the encoding to use for file names
+     * @since Commons Compress 1.4
+     * @return The updated offset, i.e. offset + length
+     */
+    public static int formatNameBytes(String name, byte[] buf, final int offset,
+                                      final int length,
+                                      final ZipEncoding encoding)
+        throws IOException {
+        int len = name.length();
+        ByteBuffer b = encoding.encode(name);
+        while (b.limit() > length && len > 0) {
+            b = encoding.encode(name.substring(0, --len));
+        }
+        final int limit = b.limit();
+        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
+
+        // Pad any remaining output bytes with NUL
+        for (int i = limit; i < length; ++i) {
+            buf[offset + i] = 0;
+        }
+
+        return offset + length;
+    }
+
+    /**
      * Fill buffer with unsigned octal number, padded with leading zeroes.
      * 
      * @param value number to convert to octal - treated as unsigned

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java?rev=1302170&r1=1302169&r2=1302170&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java Sun Mar 18 17:16:51 2012
@@ -41,7 +41,7 @@ import java.nio.ByteBuffer;
  * <p>All implementations should implement this interface in a
  * reentrant way.</p>
  */
-interface ZipEncoding {
+public interface ZipEncoding {
     /**
      * Check, whether the given string may be losslessly encoded using this
      * encoding.

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java?rev=1302170&r1=1302169&r2=1302170&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java Sun Mar 18 17:16:51 2012
@@ -27,7 +27,7 @@ import java.util.Map;
 /**
  * Static helper functions for robustly encoding filenames in zip files. 
  */
-abstract class ZipEncodingHelper {
+public abstract class ZipEncodingHelper {
 
     /**
      * A class, which holds the high characters of a simple encoding
@@ -207,7 +207,7 @@ abstract class ZipEncodingHelper {
      *             the platform's default encoding.
      * @return A zip encoding for the given encoding name.
      */
-    static ZipEncoding getZipEncoding(String name) {
+    public static ZipEncoding getZipEncoding(String name) {
  
         // fallback encoding is good enough for utf-8.
         if (isUTF8(name)) {

Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java?rev=1302170&r1=1302169&r2=1302170&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java (original)
+++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/TarTestCase.java Sun Mar 18 17:16:51 2012
@@ -117,7 +117,8 @@ public final class TarTestCase extends A
     public void testCOMPRESS114() throws Exception {
         final File input = getFile("COMPRESS-114.tar");
         final InputStream is = new FileInputStream(input);
-        final ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream("tar", is);
+        final ArchiveInputStream in = new TarArchiveInputStream(is,
+                                                                "iso-8859-1");
         TarArchiveEntry entry = (TarArchiveEntry)in.getNextEntry();
         assertEquals("3\u00b1\u00b1\u00b1F06\u00b1W2345\u00b1ZB\u00b1la\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1\u00b1BLA", entry.getName());
         entry = (TarArchiveEntry)in.getNextEntry();