You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2012/03/24 06:32:31 UTC

svn commit: r1304709 - in /commons/proper/compress/trunk/src: changes/changes.xml main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java

Author: bodewig
Date: Sat Mar 24 05:32:31 2012
New Revision: 1304709

URL: http://svn.apache.org/viewvc?rev=1304709&view=rev
Log:
optionally use PAX headers when writing non-ASCII file names.  COMPRESS-183

Modified:
    commons/proper/compress/trunk/src/changes/changes.xml
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
    commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java

Modified: commons/proper/compress/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1304709&r1=1304708&r2=1304709&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/changes/changes.xml (original)
+++ commons/proper/compress/trunk/src/changes/changes.xml Sat Mar 24 05:32:31 2012
@@ -46,6 +46,17 @@ The <action> type attribute can be add,u
   <body>
     <release version="1.4" date="unreleased"
              description="Release 1.4">
+      <action issue="COMPRESS-183" type="fix" date="2012-03-24">
+        The tar package now allows the encoding of file names to be
+        specified and can optionally use PAX extension headers to
+        write non-ASCII file names.
+        The stream classes now write (or expect to read) archives that
+        use the platform's native encoding for file names.  Apache
+        Commons Compress 1.3 used to strip everything but the lower
+        eight bits of each character which effectively only worked for
+        ASCII and ISO-8859-1 file names.
+        This new default behavior is a breaking change.
+      </action> 
       <action issue="COMPRESS-184" type="fix" date="2012-03-23">
         TarArchiveInputStream failed to parse PAX headers that
         contained non-ASCII characters.

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java?rev=1304709&r1=1304708&r2=1304709&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java Sat Mar 24 05:32:31 2012
@@ -81,6 +81,10 @@ public class TarArchiveOutputStream exte
 
     private final ZipEncoding encoding;
 
+    private boolean addPaxHeadersForNonAsciiNames = false;
+    private static final ZipEncoding ASCII =
+        ZipEncodingHelper.getZipEncoding("ASCII");
+
     /**
      * Constructor for TarInputStream.
      * @param os the output stream to use
@@ -172,6 +176,13 @@ public class TarArchiveOutputStream exte
         this.bigNumberMode = bigNumberMode;
     }
 
+    /**
+     * Whether to add a PAX extension header for non-ASCII file names.
+     * @since Apache Commons Compress 1.4
+     */
+    public void setAddPaxHeadersForNonAsciiNames(boolean b) {
+        addPaxHeadersForNonAsciiNames = b;
+    }
 
     @Deprecated
     @Override
@@ -254,11 +265,14 @@ public class TarArchiveOutputStream exte
         }
         TarArchiveEntry entry = (TarArchiveEntry) archiveEntry;
         Map<String, String> paxHeaders = new HashMap<String, String>();
-        final byte[] nameBytes = encoding.encode(entry.getName()).array();
+        final String entryName = entry.getName();
+        final byte[] nameBytes = encoding.encode(entryName).array();
+        boolean paxHeaderContainsPath = false;
         if (nameBytes.length >= TarConstants.NAMELEN) {
 
             if (longFileMode == LONGFILE_POSIX) {
-                paxHeaders.put("path", entry.getName());
+                paxHeaders.put("path", entryName);
+                paxHeaderContainsPath = true;
             } else if (longFileMode == LONGFILE_GNU) {
                 // create a TarEntry for the LongLink, the contents
                 // of which are the entry's name
@@ -271,7 +285,7 @@ public class TarArchiveOutputStream exte
                 write(0); // NUL terminator
                 closeArchiveEntry();
             } else if (longFileMode != LONGFILE_TRUNCATE) {
-                throw new RuntimeException("file name '" + entry.getName()
+                throw new RuntimeException("file name '" + entryName
                                            + "' is too long ( > "
                                            + TarConstants.NAMELEN + " bytes)");
             }
@@ -283,8 +297,13 @@ public class TarArchiveOutputStream exte
             failForBigNumbers(entry);
         }
 
+        if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath
+            && !ASCII.canEncode(entryName)) {
+            paxHeaders.put("path", entryName);
+        }
+
         if (paxHeaders.size() > 0) {
-            writePaxHeaders(entry.getName(), paxHeaders);
+            writePaxHeaders(entryName, paxHeaders);
         }
 
         entry.writeEntryHeader(recordBuf, encoding,
@@ -298,7 +317,7 @@ public class TarArchiveOutputStream exte
         } else {
             currSize = entry.getSize();
         }
-        currName = entry.getName();
+        currName = entryName;
         haveUnclosedEntry = true;
     }
 
@@ -426,7 +445,7 @@ public class TarArchiveOutputStream exte
      */
     void writePaxHeaders(String entryName,
                          Map<String, String> headers) throws IOException {
-        String name = "./PaxHeaders.X/" + entryName;
+        String name = "./PaxHeaders.X/" + stripTo7Bits(entryName);
         if (name.length() >= TarConstants.NAMELEN) {
             name = name.substring(0, TarConstants.NAMELEN - 1);
         }
@@ -461,6 +480,18 @@ public class TarArchiveOutputStream exte
         closeArchiveEntry();
     }
 
+    private String stripTo7Bits(String name) {
+        final int length = name.length();
+        StringBuffer result = new StringBuffer(length);
+        for (int i = 0; i < length; i++) {
+            char stripped = (char) (name.charAt(i) & 0x7F);
+            if (stripped != 0) { // would be read as Trailing null
+                result.append(stripped);
+            }
+        }
+        return result.toString();
+    }
+
     /**
      * Write an EOF (end of archive) record to the tar archive.
      * An EOF record consists of a record of all zeros.

Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java?rev=1304709&r1=1304708&r2=1304709&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java (original)
+++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Sat Mar 24 05:32:31 2012
@@ -274,4 +274,27 @@ public class TarArchiveOutputStreamTest 
         }
     }
 
+    public void testWriteNonAsciiPathNamePaxHeader() throws Exception {
+        String n = "\u00e4";
+        TarArchiveEntry t = new TarArchiveEntry(n);
+        t.setSize(10 * 1024);
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        TarArchiveOutputStream tos = new TarArchiveOutputStream(bos);
+        tos.setAddPaxHeadersForNonAsciiNames(true);
+        tos.putArchiveEntry(t);
+        tos.write(new byte[10 * 1024]);
+        tos.closeArchiveEntry();
+        tos.close();
+        byte[] data = bos.toByteArray();
+        assertEquals("11 path=" + n + "\n",
+                     new String(data, 512, 11, "UTF-8"));
+        FileOutputStream fos = new FileOutputStream("/tmp/x");
+        fos.write(data);
+        fos.close();
+        TarArchiveInputStream tin =
+            new TarArchiveInputStream(new ByteArrayInputStream(data));
+        TarArchiveEntry e = tin.getNextTarEntry();
+        assertEquals(n, e.getName());
+    }
+
 }
\ No newline at end of file