You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@ant.apache.org by bo...@apache.org on 2009/02/27 18:01:00 UTC

svn commit: r748593 - in /ant/core/trunk: ./ docs/manual/CoreTasks/ src/main/org/apache/tools/ant/taskdefs/ src/main/org/apache/tools/zip/

Author: bodewig
Date: Fri Feb 27 17:00:59 2009
New Revision: 748593

URL: http://svn.apache.org/viewvc?rev=748593&view=rev
Log:
provide options for enhanced encoding support in ZIP, document it.  Many thanks to Wolfgang Glas who provided most of the test input as well as patches the new support is based on.

Modified:
    ant/core/trunk/WHATSNEW
    ant/core/trunk/docs/manual/CoreTasks/ear.html
    ant/core/trunk/docs/manual/CoreTasks/jar.html
    ant/core/trunk/docs/manual/CoreTasks/unzip.html
    ant/core/trunk/docs/manual/CoreTasks/war.html
    ant/core/trunk/docs/manual/CoreTasks/zip.html
    ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Expand.java
    ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Zip.java
    ant/core/trunk/src/main/org/apache/tools/zip/ZipFile.java   (contents, props changed)

Modified: ant/core/trunk/WHATSNEW
URL: http://svn.apache.org/viewvc/ant/core/trunk/WHATSNEW?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/WHATSNEW (original)
+++ ant/core/trunk/WHATSNEW Fri Feb 27 17:00:59 2009
@@ -702,6 +702,10 @@
  * CBZip2OutputStream now has a finish method separate from close.
    Bugzilla Report 42713.
 
+ * the <zip> and <unzip> family of tasks has new option to deal with
+   file name and comment encoding.  Please see the zip tasks'
+   documentation for details.
+
 Changes from Ant 1.7.0 TO Ant 1.7.1
 =============================================
 

Modified: ant/core/trunk/docs/manual/CoreTasks/ear.html
URL: http://svn.apache.org/viewvc/ant/core/trunk/docs/manual/CoreTasks/ear.html?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/docs/manual/CoreTasks/ear.html (original)
+++ ant/core/trunk/docs/manual/CoreTasks/ear.html Fri Feb 27 17:00:59 2009
@@ -83,7 +83,9 @@
     <td valign="top">The character encoding to use for filenames
       inside the archive.  Defaults to UTF8. <strong>It is not
       recommended to change this value as the created archive will most
-      likely be unreadable for Java otherwise.</strong></td>
+      likely be unreadable for Java otherwise.</strong>
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
     <td align="center" valign="top">No</td>
   </tr>
   <tr>
@@ -197,6 +199,25 @@
     </td>
     <td valign="top" align="center">No, default is false</td>
   </tr>
+  <tr>
+    <td valign="top">useLanguageEncodingFlag</td>
+    <td valign="top">Whether to set the language encoding flag if the
+      encoding is UTF-8.  This setting doesn't have any effect if the
+      encoding is not UTF-8.
+      <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
+    <td valign="top" align="center">No, default is true</td>
+  </tr>
+  <tr>
+    <td valign="top">createUnicodeExtraFields</td>
+    <td valign="top">Whether to create unicode extra fields to store
+      the file names a second time inside the entry's metadata.
+      <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
+    <td valign="top" align="center">No, default is false</td>
+  </tr>
 </table>
 
 <h3>Nested elements</h3>

Modified: ant/core/trunk/docs/manual/CoreTasks/jar.html
URL: http://svn.apache.org/viewvc/ant/core/trunk/docs/manual/CoreTasks/jar.html?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/docs/manual/CoreTasks/jar.html (original)
+++ ant/core/trunk/docs/manual/CoreTasks/jar.html Fri Feb 27 17:00:59 2009
@@ -125,8 +125,10 @@
     <td valign="top">encoding</td>
     <td valign="top">The character encoding to use for filenames
       inside the archive.  Defaults to UTF8. <strong>It is not
-      recommended to change this value as the created archive will most
-      likely be unreadable for Java otherwise.</strong></td>
+      recommended to change this value as the created archive will
+      most likely be unreadable for Java otherwise.</strong>
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
     <td align="center" valign="top">No</td>
   </tr>
   <tr>
@@ -251,6 +253,25 @@
     </td>
     <td valign="top" align="center">No, default is false</td>
   </tr>
+  <tr>
+    <td valign="top">useLanguageEncodingFlag</td>
+    <td valign="top">Whether to set the language encoding flag if the
+      encoding is UTF-8.  This setting doesn't have any effect if the
+      encoding is not UTF-8.
+      <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
+    <td valign="top" align="center">No, default is true</td>
+  </tr>
+  <tr>
+    <td valign="top">createUnicodeExtraFields</td>
+    <td valign="top">Whether to create unicode extra fields to store
+      the file names a second time inside the entry's metadata.
+      <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
+    <td valign="top" align="center">No, default is false</td>
+  </tr>
 </table>
 
 <h3>Nested elements</h3>

Modified: ant/core/trunk/docs/manual/CoreTasks/unzip.html
URL: http://svn.apache.org/viewvc/ant/core/trunk/docs/manual/CoreTasks/unzip.html?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/docs/manual/CoreTasks/unzip.html (original)
+++ ant/core/trunk/docs/manual/CoreTasks/unzip.html Fri Feb 27 17:00:59 2009
@@ -107,7 +107,9 @@
     href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.<br>
     Defaults to &quot;UTF8&quot;, use the magic value
     <code>native-encoding</code> for the platform's default character
-    encoding.</td>
+    encoding.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
     <td align="center" valign="top">No</td>
   </tr>
   <tr>
@@ -125,6 +127,16 @@
       any).  <em>since Ant 1.8.0</em></td>
     <td valign="top" align="center">No, defaults to false</td>
   </tr>
+  <tr>
+    <td valign="top">scanForUnicodeExtraFields</td>
+    <td valign="top"><b>Note:</b> This attribute is not available for
+    the <code>untar</code> task.<br>
+      If the archive contains uncode extra fields then use them to set
+      the file names, ignoring the specified encoding.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
+    <td align="center" valign="top">No, defaults to true</td>
+  </tr>
 </table>
 <h3>Examples</h3>
 <pre>

Modified: ant/core/trunk/docs/manual/CoreTasks/war.html
URL: http://svn.apache.org/viewvc/ant/core/trunk/docs/manual/CoreTasks/war.html?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/docs/manual/CoreTasks/war.html (original)
+++ ant/core/trunk/docs/manual/CoreTasks/war.html Fri Feb 27 17:00:59 2009
@@ -116,7 +116,9 @@
     <td valign="top">The character encoding to use for filenames
       inside the archive.  Defaults to UTF8. <strong>It is not
       recommended to change this value as the created archive will most
-      likely be unreadable for Java otherwise.</strong></td>
+      likely be unreadable for Java otherwise.</strong>
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
     <td align="center" valign="top">No</td>
   </tr>
   <tr>
@@ -214,6 +216,25 @@
     </td>
     <td valign="top" align="center">No, default is false</td>
   </tr>
+  <tr>
+    <td valign="top">useLanguageEncodingFlag</td>
+    <td valign="top">Whether to set the language encoding flag if the
+      encoding is UTF-8.  This setting doesn't have any effect if the
+      encoding is not UTF-8.
+      <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
+    <td valign="top" align="center">No, default is true</td>
+  </tr>
+  <tr>
+    <td valign="top">createUnicodeExtraFields</td>
+    <td valign="top">Whether to create unicode extra fields to store
+      the file names a second time inside the entry's metadata.
+      <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="zip.html#encoding">discussion in the
+      zip task page</a></td>
+    <td valign="top" align="center">No, default is false</td>
+  </tr>
 </table>
 
 <h3>Nested elements</h3>

Modified: ant/core/trunk/docs/manual/CoreTasks/zip.html
URL: http://svn.apache.org/viewvc/ant/core/trunk/docs/manual/CoreTasks/zip.html?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/docs/manual/CoreTasks/zip.html (original)
+++ ant/core/trunk/docs/manual/CoreTasks/zip.html Fri Feb 27 17:00:59 2009
@@ -74,7 +74,8 @@
 but causes problems if you try to open them from within Java and your
 filenames contain non US-ASCII characters. Use the encoding attribute
 and set it to UTF8 to create zip files that can safely be read by
-Java.</p>
+Java.  For a more complete discussion,
+see <a href="#encoding">below</a></p>
 
 <p>Starting with Ant 1.5.2, <code>&lt;zip&gt;</code> can store Unix permissions
 inside the archive (see description of the filemode and dirmode
@@ -149,7 +150,8 @@
     <td valign="top">The character encoding to use for filenames
     inside the zip file.  For a list of possible values see <a
     href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
-    Defaults to the platform's default character encoding.</td>
+    Defaults to the platform's default character encoding.
+      <br/>See also the <a href="#encoding">discussion below</a></td>
     <td align="center" valign="top">No</td>
   </tr>
   <tr>
@@ -241,7 +243,127 @@
     </td>
     <td valign="top" align="center">No, default is false</td>
   </tr>
+  <tr>
+    <td valign="top">useLanguageEncodingFlag</td>
+    <td valign="top">Whether to set the language encoding flag if the
+      encoding is UTF-8.  This setting doesn't have any effect if the
+      encoding is not UTF-8.
+      <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="#encoding">discussion below</a></td>
+    <td align="center" valign="top">No, default is true</td>
+  </tr>
+  <tr>
+    <td valign="top">createUnicodeExtraFields</td>
+    <td valign="top">Whether to create unicode extra fields to store
+      the file names a second time inside the entry's metadata.
+      Defaults to false.  <em>Since Ant 1.8.0</em>.
+      <br/>See also the <a href="#encoding">discussion below</a></td>
+    <td align="center" valign="top">No, default is false</td>
+  </tr>
 </table>
+
+<h3><a name="encoding">Encoding of File Names</a></h3>
+
+<p>Traditionally the ZIP archive format uses CodePage 437 as encoding
+  for file name, which is not sufficient for many international
+  character sets.</p>
+
+<p>Over time different archivers have chosen different ways to work
+  around the limitation - the <code>java.util.zip</code> packages
+  simply uses UTF-8 as its encoding for example.</p>
+
+<p>Ant has been offering the encoding attribute of the zip and unzip
+  task as a way to explicitly specify the encoding to use (or expect)
+  since Ant 1.4.  It defaults to the platform's default encoding for
+  zip and UTF-8 for jar and other jar-like tasks (war, ear, ...) as
+  well as the unzip family of tasks.</p>
+
+<p>More recent versions of the ZIP specification introduce something
+  called the &quot;language encoding flag&quot; which can be used to
+  signal that a file name has been encoded using UTF-8.  Starting with
+  Ant 1.8.0 all zip-/jar- and similar archives written by Ant will set
+  this flag, if the encoding has been set to UTF-8.  Our
+  interoperabilty tests with existing archivers didn't show any ill
+  effects (in fact, most archivers ignore the flag to date), but you
+  can turn off the "language encoding flag" by setting the attribute
+  <code>useLanguageEncodingFlag</code> to <code>false</code> on the
+  zip-task if you should encounter problems.</p>
+
+<p>The unzip (and similar tasks) -task will recognize the language
+  encoding flag and ignore the encoding set on the task if it has been
+  found.</p>
+
+<p>The InfoZIP developers have introduced new ZIP extra fields that
+  can be used to add an additional UTF-8 encoded file name to the
+  entry's metadata.  Most archivers ignore these extra fields.  The
+  zip family of tasks support an
+  option <code>createUnicodeExtraFields</code> since Ant 1.8.0 which
+  makes Ant write these extra fields, it defaults to false since it
+  creates a bigger archive.</p>
+
+<p>The unzip-task will recognize the unicode extra fields by default
+  and read the file name information from them, unless you set the
+  optional attribute <code>scanForUnicodeExtraFields</code> to
+  false.</p>
+
+<h4>Recommendations for Interoperability</h4>
+
+<p>The optimal setting of flags depends on the archivers you expect as
+  consumers/producers of the ZIP archives.  Below are some test
+  results which may be superseeded with later versions of each
+  tool.</p>
+
+<ul>
+  <li>The java.util.zip package used by the jar executable or to read
+    jars from your CLASSPATH reads and writes UTF-8 names, it doesn't
+    set or recognize any flags or unicode extra fields.</li>
+
+  <li>7Zip writes CodePage 437 by default but uses UTF-8 and the
+    language encoding flag when writing entries that cannot be encoded
+    as CodePage 437.  It recognizes the language encoding flag when
+    reading and ignores the unicode extra fields.</li>
+
+  <li>WinZIP writes CodePage 437 and uses unicode extra fields by
+    default.  It recognizes the unicode extra field when reading and
+    ignores the language encoding flag.</li>
+
+  <li>Windows' "compressed folder" feature doesn't recognize any flag
+    or extra field and creates archives using the platforms default
+    encoding - and expects archives to be in that encoding when reading
+    them.</li>
+
+  <li>InfoZIP based tools can recognize and write both, it is a
+    compile time option and depends on the platform so your mileage
+    may vary.</li>
+
+  <li>PKWARE zip tools recognize both and prefer the language encoding
+    flag.  They create archives using CodePage 437 if possible and UTF-8
+    plus the language encoding flag for file names that cannot be
+    encoded as CodePage 437.</li>
+</ul>
+
+<p>So, what to do?</p>
+
+<p>If you are creating jars, then java.util.zip is your main
+  consumer.  We recommend you set the encoding to UTF-8 and keep the
+  language encoding flag enabled.  The flag won't help or hurt
+  java.util.zip but archivers that support it will show the correct
+  file names.</p>
+
+<p>For maximum interop it is probably best to set the encoding to
+  UTF-8, enable the language encoding flag and create unicode extra
+  fields when writing ZIPs.  Such archives should be extracted
+  correctly by java.util.zip, 7Zip, WinZIP, PKWARE tools and most
+  likely InfoZIP tools.  They will be unusable with Windows'
+  "compressed folders" feature and bigger than archives without the
+  unicode extra fields, though.</p>
+
+<p>If Windows' "compressed folders" is your primary consumer, then
+  your best option is to explicitly set the encoding to the target
+  platform.  You may want to enable creation of unicode extra fields
+  so the tools that support them will extract the file names
+  correctly.</p>
+
 <h3>Parameters specified as nested elements</h3>
 
 <h4>any resource collection</h4>

Modified: ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Expand.java
URL: http://svn.apache.org/viewvc/ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Expand.java?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Expand.java (original)
+++ ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Expand.java Fri Feb 27 17:00:59 2009
@@ -68,6 +68,7 @@
     private boolean resourcesSpecified = false;
     private boolean failOnEmptyArchive = false;
     private boolean stripAbsolutePathSpec = false;
+    private boolean scanForUnicodeExtraFields = true;
 
     private static final String NATIVE_ENCODING = "native-encoding";
 
@@ -166,7 +167,7 @@
                     getLocation());
         }
         try {
-            zf = new ZipFile(srcF, encoding);
+            zf = new ZipFile(srcF, encoding, scanForUnicodeExtraFields);
             boolean empty = true;
             Enumeration e = zf.getEntries();
             while (e.hasMoreElements()) {
@@ -453,4 +454,12 @@
         stripAbsolutePathSpec = b;
     }
 
+    /**
+     * Whether unicode extra fields will be used if present.
+     *
+     * @since Ant 1.8.0
+     */
+    public void setScanForUnicodeExtraFields(boolean b) {
+        scanForUnicodeExtraFields = b;
+    }
 }

Modified: ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Zip.java
URL: http://svn.apache.org/viewvc/ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Zip.java?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Zip.java (original)
+++ ant/core/trunk/src/main/org/apache/tools/ant/taskdefs/Zip.java Fri Feb 27 17:00:59 2009
@@ -175,6 +175,20 @@
     private boolean preserve0Permissions = false;
 
     /**
+     * Whether to set the language encoding flag when creating the archive.
+     *
+     * @since Ant 1.8.0
+     */
+    private boolean useLanguageEncodingFlag = true;
+
+    /**
+     * Whether to set the language encoding flag when creating the archive.
+     *
+     * @since Ant 1.8.0
+     */
+    private boolean createUnicodeExtraFields = false;
+
+    /**
      * This is the name/location of where to
      * create the .zip file.
      * @param zipFile the path of the zipFile
@@ -453,6 +467,38 @@
     }
 
     /**
+     * Whether to set the language encoding flag.
+     * @since Ant 1.8.0
+     */
+    public void setUseLanguageEncodingFlag(boolean b) {
+        useLanguageEncodingFlag = b;
+    }
+
+    /**
+     * Whether the language encoding flag will be used.
+     * @since Ant 1.8.0
+     */
+    public boolean getUseLanguageEnodingFlag() {
+        return useLanguageEncodingFlag;
+    }
+
+    /**
+     * Whether Unicode extra fields will be created.
+     * @since Ant 1.8.0
+     */
+    public void setCreateUnicodeExtraFields(boolean b) {
+        createUnicodeExtraFields = b;
+    }
+
+    /**
+     * Whether Unicode extra fields will be created.
+     * @since Ant 1.8.0
+     */
+    public boolean getCreateUnicodeExtraFields() {
+        return createUnicodeExtraFields;
+    }
+
+    /**
      * validate and build
      * @throws BuildException on error
      */
@@ -540,6 +586,8 @@
                     zOut = new ZipOutputStream(zipFile);
 
                     zOut.setEncoding(encoding);
+                    zOut.setUseLanguageEncodingFlag(useLanguageEncodingFlag);
+                    zOut.setCreateUnicodeExtraFields(createUnicodeExtraFields);
                     zOut.setMethod(doCompress
                         ? ZipOutputStream.DEFLATED : ZipOutputStream.STORED);
                     zOut.setLevel(level);

Modified: ant/core/trunk/src/main/org/apache/tools/zip/ZipFile.java
URL: http://svn.apache.org/viewvc/ant/core/trunk/src/main/org/apache/tools/zip/ZipFile.java?rev=748593&r1=748592&r2=748593&view=diff
==============================================================================
--- ant/core/trunk/src/main/org/apache/tools/zip/ZipFile.java (original)
+++ ant/core/trunk/src/main/org/apache/tools/zip/ZipFile.java Fri Feb 27 17:00:59 2009
@@ -134,7 +134,7 @@
 
     /**
      * Opens the given file for reading, assuming the specified
-     * encoding for file names and ignoring unicode extra fields.
+     * encoding for file names, scanning unicode extra fields.
      *
      * @param name name of the archive.
      * @param encoding the encoding to use for file names
@@ -142,12 +142,12 @@
      * @throws IOException if an error occurs while reading the file.
      */
     public ZipFile(String name, String encoding) throws IOException {
-        this(new File(name), encoding, false);
+        this(new File(name), encoding, true);
     }
 
     /**
      * Opens the given file for reading, assuming the specified
-     * encoding for file names and ignoring unicode extra fields.
+     * encoding for file names and scanning for unicode extra fields.
      *
      * @param f the archive.
      * @param encoding the encoding to use for file names, use null
@@ -156,7 +156,7 @@
      * @throws IOException if an error occurs while reading the file.
      */
     public ZipFile(File f, String encoding) throws IOException {
-        this(f, encoding, false);
+        this(f, encoding, true);
     }
 
     /**

Propchange: ant/core/trunk/src/main/org/apache/tools/zip/ZipFile.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Feb 27 17:00:59 2009
@@ -1 +1 @@
-/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java:745920,746933,748133
+/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java:745920,746933,748133,748556