You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2009/03/02 17:09:20 UTC
svn commit: r749342 - in /commons/sandbox/compress/trunk/src:
main/java/org/apache/commons/compress/archivers/zip/
test/java/org/apache/commons/compress/archivers/zip/
Author: bodewig
Date: Mon Mar 2 16:09:20 2009
New Revision: 749342
URL: http://svn.apache.org/viewvc?rev=749342&view=rev
Log:
Improved encoding support for ZIPs, now with Cp437 for JDK 1.4. Submitted by Wolfgang Glas. SANDBOX-176
Added:
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java (with props)
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java (with props)
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEnoding.java (with props)
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java (with props)
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/TestZipEncodings.java (with props)
Modified:
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java?rev=749342&r1=749341&r2=749342&view=diff
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java (original)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/AbstractUnicodeExtraField.java Mon Mar 2 16:09:20 2009
@@ -38,11 +38,25 @@
* encoding of the orginal zip entry.
*
* @param text The file name or comment.
- * @param zipEncoding The encoding of the filenames in the zip
- * file, usually <code>"CP437"</code>.
+ * @param bytes The encoded of the filename or comment in the zip
+ * file.
+ * @param off The offset of the encoded filename or comment in
+ * <code>bytes</code>.
+ * @param len The length of the encoded filename or commentin
+ * <code>bytes</code>.
*/
- protected AbstractUnicodeExtraField(String text, String zipEncoding) {
- this(text, ZipEncodingHelper.encodeName(text, zipEncoding));
+ protected AbstractUnicodeExtraField(String text, byte[] bytes, int off,
+ int len) {
+ CRC32 crc32 = new CRC32();
+ crc32.update(bytes, off, len);
+ nameCRC32 = crc32.getValue();
+
+ try {
+ unicodeName = text.getBytes("UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException("FATAL: UTF-8 encoding not supported.",
+ e);
+ }
}
/**
@@ -50,20 +64,12 @@
* encoding of the orginal zip entry.
*
* @param text The file name or comment.
- * @param zipEncoding The encoding of the filenames in the zip
- * file, usually <code>"CP437"</code>.
+ * @param bytes The encoded of the filename or comment in the zip
+ * file.
*/
protected AbstractUnicodeExtraField(String text, byte[] bytes) {
- CRC32 crc32 = new CRC32();
- crc32.update(bytes);
- nameCRC32 = crc32.getValue();
- try {
- unicodeName = text.getBytes("UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("FATAL: UTF-8 encoding not supported.",
- e);
- }
+ this(text, bytes, 0, bytes.length);
}
private void assembleData() {
Added: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java?rev=749342&view=auto
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java (added)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java Mon Mar 2 16:09:20 2009
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * A fallback ZipEncoding, which uses a java.io means to encode names.
+ *
+ * <p>This implementation is not favorable for encodings other than
+ * utf-8, because java.io encodes unmappable character as question
+ * marks leading to unreadable ZIP entries on some operating
+ * systems.</p>
+ *
+ * <p>Furthermore this implementation is unable to tell, whether a
+ * given name can be safely encoded or not.</p>
+ *
+ * <p>This implementation acts as a last resort implementation, when
+ * neither {@see Simple8BitZipEnoding} nor {@see NioZipEncoding} is
+ * available.</p>
+ *
+ * <p>The methods of this class are reentrant.</p>
+ */
+class FallbackZipEncoding implements ZipEncoding {
+ private final String charset;
+
+ /**
+ * Construct a fallback zip encoding, which uses the platform's
+ * default charset.
+ */
+ public FallbackZipEncoding() {
+ this.charset = null;
+ }
+
+ /**
+ * Construct a fallback zip encoding, which uses the given charset.
+ *
+ * @param charset The name of the charset or <code>null</code> for
+ * the platform's default character set.
+ */
+ public FallbackZipEncoding(String charset) {
+ this.charset = charset;
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
+ */
+ public boolean canEncode(String name) {
+ return true;
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
+ */
+ public ByteBuffer encode(String name) throws IOException {
+ if (this.charset == null) {
+ return ByteBuffer.wrap(name.getBytes());
+ } else {
+ return ByteBuffer.wrap(name.getBytes(this.charset));
+ }
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
+ */
+ public String decode(byte[] data) throws IOException {
+ if (this.charset == null) {
+ return new String(data);
+ } else {
+ return new String(data,this.charset);
+ }
+ }
+}
Propchange: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java?rev=749342&view=auto
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java (added)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java Mon Mar 2 16:09:20 2009
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+/**
+ * A ZipEncoding, which uses a java.nio {@link
+ * java.nio.charset.Charset Charset} to encode names.
+ *
+ * <p>This implementation works for all cases under java-1.5 or
+ * later. However, in java-1.4, some charsets don't have a java-nio
+ * implementation, most notably the default ZIP encoding Cp437.</p>
+ *
+ * <p>The methods of this class are reentrant.</p>
+ */
+class NioZipEncoding implements ZipEncoding {
+ private final Charset charset;
+
+ /**
+ * Construct an NIO based zip encoding, which wraps the given
+ * charset.
+ *
+ * @param charset The NIO charset to wrap.
+ */
+ public NioZipEncoding(Charset charset) {
+ this.charset = charset;
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
+ */
+ public boolean canEncode(String name) {
+ CharsetEncoder enc = this.charset.newEncoder();
+ enc.onMalformedInput(CodingErrorAction.REPORT);
+ enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+
+ return enc.canEncode(name);
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
+ */
+ public ByteBuffer encode(String name) {
+ CharsetEncoder enc = this.charset.newEncoder();
+
+ enc.onMalformedInput(CodingErrorAction.REPORT);
+ enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+
+ CharBuffer cb = CharBuffer.wrap(name);
+ ByteBuffer out = ByteBuffer.allocate(name.length()
+ + (name.length() + 1) / 2);
+
+ while (cb.remaining() > 0) {
+ CoderResult res = enc.encode(cb, out,true);
+
+ if (res.isUnmappable() || res.isMalformed()) {
+
+ // write the unmappable characters in utf-16
+ // pseudo-URL encoding style to ByteBuffer.
+ if (res.length() * 6 > out.remaining()) {
+ out = ZipEncodingHelper.growBuffer(out, out.position()
+ + res.length() * 6);
+ }
+
+ for (int i=0; i<res.length(); ++i) {
+ ZipEncodingHelper.appendSurrogate(out,cb.get());
+ }
+
+ } else if (res.isOverflow()) {
+
+ out = ZipEncodingHelper.growBuffer(out, 0);
+
+ } else if (res.isUnderflow()) {
+
+ enc.flush(out);
+ break;
+
+ }
+ }
+
+ out.limit(out.position());
+ out.rewind();
+ return out;
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
+ */
+ public String decode(byte[] data) throws IOException {
+ return this.charset.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT)
+ .decode(ByteBuffer.wrap(data)).toString();
+ }
+}
Propchange: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEnoding.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEnoding.java?rev=749342&view=auto
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEnoding.java (added)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEnoding.java Mon Mar 2 16:09:20 2009
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This ZipEncoding implementation implements a simple 8bit character
+ * set, which mets the following restrictions:
+ *
+ * <ul>
+ * <li>Characters 0x0000 to 0x007f are encoded as the corresponding
+ * byte values 0x00 to 0x7f.</li>
+ * <li>All byte codes from 0x80 to 0xff are mapped to a unique unicode
+ * character in the range 0x0080 to 0x7fff. (No support for
+ * UTF-16 surrogates)
+ * </ul>
+ *
+ * <p>These restrictions most notably apply to the most prominent
+ * omissions of java-1.4's {@link java.nio.charset.Charset Charset}
+ * implementation, Cp437 and Cp850.</p>
+ *
+ * <p>The methods of this class are reentrant.</p>
+ */
+class Simple8BitZipEnoding implements ZipEncoding {
+
+ /**
+ * A character entity, which is put to the reverse mapping table
+ * of a simple encoding.
+ */
+ private static final class Simple8BitChar implements Comparable {
+ public final char unicode;
+ public final byte code;
+
+ Simple8BitChar(byte code, char unicode) {
+ this.code = code;
+ this.unicode = unicode;
+ }
+
+ public int compareTo(Object o) {
+ Simple8BitChar a = (Simple8BitChar) o;
+
+ return this.unicode - a.unicode;
+ }
+
+ public String toString() {
+ return "0x" + Integer.toHexString(0xffff & (int) unicode)
+ + "->0x" + Integer.toHexString(0xff & (int) code);
+ }
+ }
+
+ /**
+ * The characters for byte values of 128 to 255 stored as an array of
+ * 128 chars.
+ */
+ private final char[] highChars;
+
+ /**
+ * A list of {@see Simple8BitChar} objects sorted by the unicode
+ * field. This list is used to binary search reverse mapping of
+ * unicode characters with a character code greater than 127.
+ */
+ private final List reverseMapping;
+
+ /**
+ * @param highChars The characters for byte values of 128 to 255
+ * stored as an array of 128 chars.
+ */
+ public Simple8BitZipEnoding(char[] highChars) {
+ this.highChars = highChars;
+ this.reverseMapping = new ArrayList(this.highChars.length);
+
+ byte code = 127;
+
+ for (int i = 0; i < this.highChars.length; ++i) {
+ this.reverseMapping.add(new Simple8BitChar(++code,
+ this.highChars[i]));
+ }
+
+ Collections.sort(this.reverseMapping);
+ }
+
+ /**
+ * Return the character code for a given encoded byte.
+ *
+ * @param b The byte to decode.
+ * @return The associated character value.
+ */
+ public char decodeByte(byte b) {
+ // code 0-127
+ if (b >= 0) {
+ return (char) b;
+ }
+
+ // byte is signed, so 128 == -128 and 255 == -1
+ return this.highChars[128 + (int) b];
+ }
+
+ /**
+ * @param c The character to encode.
+ * @return Whether the given unicode character is covered by this encoding.
+ */
+ public boolean canEncodeChar(char c) {
+
+ if (c >= 0 && c < 128) {
+ return true;
+ }
+
+ Simple8BitChar r = this.encodeHighChar(c);
+ return r != null;
+ }
+
+ /**
+ * Pushes the encoded form of the given character to the given byte buffer.
+ *
+ * @param bb The byte buffer to write to.
+ * @param c The character to encode.
+ * @return Whether the given unicode character is covered by this encoding.
+ * If <code>false</code> is returned, nothing is pushed to the
+ * byte buffer.
+ */
+ public boolean pushEncodedChar(ByteBuffer bb, char c) {
+
+ if (c >= 0 && c < 128) {
+ bb.put((byte) c);
+ return true;
+ }
+
+ Simple8BitChar r = this.encodeHighChar(c);
+ if (r == null) {
+ return false;
+ }
+ bb.put(r.code);
+ return true;
+ }
+
+ /**
+ * @param c A unicode character in the range from 0x0080 to 0x7f00
+ * @return A Simple8BitChar, if this character is covered by this encoding.
+ * A <code>null</code> value is returned, if this character is not
+ * covered by this encoding.
+ */
+ private Simple8BitChar encodeHighChar(char c) {
+ // for performance an simplicity, yet another reincarnation of
+ // binary search...
+ int i0 = 0;
+ int i1 = this.reverseMapping.size();
+
+ while (i1 > i0) {
+
+ int i = i0 + (i1 - i0) / 2;
+
+ Simple8BitChar m = (Simple8BitChar) this.reverseMapping.get(i);
+
+ if (m.unicode == c) {
+ return m;
+ }
+
+ if (m.unicode < c) {
+ i0 = i + 1;
+ } else {
+ i1 = i;
+ }
+ }
+
+ if (i0 >= this.reverseMapping.size()) {
+ return null;
+ }
+
+ Simple8BitChar r = (Simple8BitChar) this.reverseMapping.get(i0);
+
+ if (r.unicode != c) {
+ return null;
+ }
+
+ return r;
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
+ */
+ public boolean canEncode(String name) {
+
+ for (int i=0;i<name.length();++i) {
+
+ char c = name.charAt(i);
+
+ if (!this.canEncodeChar(c)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
+ */
+ public ByteBuffer encode(String name) {
+ ByteBuffer out = ByteBuffer.allocate(name.length()
+ + 6 + (name.length() + 1) / 2);
+
+ for (int i=0;i<name.length();++i) {
+
+ char c = name.charAt(i);
+
+ if (out.remaining() < 6) {
+ out = ZipEncodingHelper.growBuffer(out,out.position() + 6);
+ }
+
+ if (!this.pushEncodedChar(out,c)) {
+
+ ZipEncodingHelper.appendSurrogate(out,c);
+ }
+ }
+
+ out.limit(out.position());
+ out.rewind();
+ return out;
+ }
+
+ /**
+ * @see
+ * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
+ */
+ public String decode(byte[] data) throws IOException {
+ char [] ret = new char[data.length];
+
+ for (int i=0;i<data.length;++i) {
+ ret[i] = this.decodeByte(data[i]);
+ }
+
+ return new String(ret);
+ }
+
+
+}
Propchange: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEnoding.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java?rev=749342&r1=749341&r2=749342&view=diff
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java (original)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodeCommentExtraField.java Mon Mar 2 16:09:20 2009
@@ -42,15 +42,18 @@
}
/**
- * Assemble as unicode comment extension from the comment and
- * encoding of the original zip entry.
+ * Assemble as unicode comment extension from the name given as
+ * text as well as the encoded bytes actually written to the archive.
*
- * @param comment The file comment
- * @param zipEncoding The encoding of the comment in the zip file,
- * usually <code>"CP437"</code>.
+ * @param name The file name
+ * @param bytes the bytes actually written to the archive
+ * @param off The offset of the encoded comment in <code>bytes</code>.
+ * @param len The length of the encoded comment or comment in
+ * <code>bytes</code>.
*/
- public UnicodeCommentExtraField(String comment, String zipEncoding) {
- super(comment, zipEncoding);
+ public UnicodeCommentExtraField(String text, byte[] bytes, int off,
+ int len) {
+ super(text, bytes, off, len);
}
/**
Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java?rev=749342&r1=749341&r2=749342&view=diff
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java (original)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnicodePathExtraField.java Mon Mar 2 16:09:20 2009
@@ -42,20 +42,22 @@
}
/**
- * Assemble as unicode path extension from the name and encoding
- * of the original zip entry.
+ * Assemble as unicode path extension from the name given as
+ * text as well as the encoded bytes actually written to the archive.
*
* @param name The file name
- * @param zipEncoding The encoding of the filename in the zip
- * file, usually <code>"CP437"</code>.
+ * @param bytes the bytes actually written to the archive
+ * @param off The offset of the encoded filename in <code>bytes</code>.
+ * @param len The length of the encoded filename or comment in
+ * <code>bytes</code>.
*/
- public UnicodePathExtraField(String name, String zipEncoding) {
- super(name, zipEncoding);
+ public UnicodePathExtraField(String text, byte[] bytes, int off, int len) {
+ super(text, bytes, off, len);
}
/**
* Assemble as unicode path extension from the name given as
- * text as well as the bytes actually written to the archive.
+ * text as well as the encoded bytes actually written to the archive.
*
* @param name The file name
* @param bytes the bytes actually written to the archive
Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java?rev=749342&r1=749341&r2=749342&view=diff
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java (original)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java Mon Mar 2 16:09:20 2009
@@ -22,7 +22,7 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
-import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
@@ -87,14 +87,9 @@
public static final int STORED = java.util.zip.ZipEntry.STORED;
/**
- * name of the encoding UTF-8
- */
- static final String UTF8 = "UTF8";
-
- /**
* default encoding for file names and comment.
*/
- static final String DEFAULT_ENCODING = UTF8;
+ static final String DEFAULT_ENCODING = ZipEncodingHelper.UTF8;
/**
* General purpose flag, which indicates that filenames are
@@ -188,7 +183,16 @@
*/
private String encoding = DEFAULT_ENCODING;
- // CheckStyle:VisibilityModifier OFF - bc
+ /**
+ * The zip encoding to use for filenames and the file comment.
+ *
+ * This field is of internal use and will be set in {@link
+ * #setEncoding(String)}.
+ */
+ private ZipEncoding zipEncoding =
+ ZipEncodingHelper.getZipEncoding(DEFAULT_ENCODING);
+
+ // CheckStyle:VisibilityModifier OFF - bc
/**
* This Deflater object is used for output.
@@ -263,8 +267,8 @@
}
/**
- * This method indicates whether this archive is writing to a seekable stream (i.e., to a random
- * access file).
+ * This method indicates whether this archive is writing to a
+ * seekable stream (i.e., to a random access file).
*
* <p>For seekable streams, you don't need to calculate the CRC or
* uncompressed size for {@link #STORED} entries before
@@ -286,7 +290,8 @@
*/
public void setEncoding(final String encoding) {
this.encoding = encoding;
- useEFS &= isUTF8(encoding);
+ this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
+ useEFS &= ZipEncodingHelper.isUTF8(encoding);
}
/**
@@ -305,7 +310,7 @@
* <p>Defaults to true.</p>
*/
public void setUseLanguageEncodingFlag(boolean b) {
- useEFS = b && isUTF8(encoding);
+ useEFS = b && ZipEncodingHelper.isUTF8(encoding);
}
/**
@@ -452,13 +457,14 @@
*
* <p>Default is Deflater.DEFAULT_COMPRESSION.</p>
* @param level the compression level.
- * @throws IllegalArgumentException if an invalid compression level is specified.
+ * @throws IllegalArgumentException if an invalid compression
+ * level is specified.
*/
public void setLevel(int level) {
if (level < Deflater.DEFAULT_COMPRESSION
|| level > Deflater.BEST_COMPRESSION) {
- throw new IllegalArgumentException(
- "Invalid compression level: " + level);
+ throw new IllegalArgumentException("Invalid compression level: "
+ + level);
}
hasCompressionLevelChanged = (this.level != level);
this.level = level;
@@ -606,13 +612,31 @@
*/
protected void writeLocalFileHeader(ZipArchiveEntry ze) throws IOException {
- byte[] name = getBytes(ze.getName());
+ boolean encodable = this.zipEncoding.canEncode(ze.getName());
+ ByteBuffer name = this.zipEncoding.encode(ze.getName());
+
if (createUnicodeExtraFields) {
- ze.addExtraField(new UnicodePathExtraField(ze.getName(), name));
+
+ /* if (!encodable) { -- FIXME decide what to*/
+ ze.addExtraField(new UnicodePathExtraField(ze.getName(),
+ name.array(),
+ name.arrayOffset(),
+ name.limit()));
+ /* } */
+
String comm = ze.getComment();
if (comm != null && !"".equals(comm)) {
- byte[] commentB = getBytes(comm);
- ze.addExtraField(new UnicodeCommentExtraField(comm, commentB));
+
+ boolean commentEncodable = this.zipEncoding.canEncode(comm);
+
+ /* if (!commentEncodable) { -- FIXME decide what to*/
+ ByteBuffer commentB = this.zipEncoding.encode(comm);
+ ze.addExtraField(new UnicodeCommentExtraField(comm,
+ commentB.array(),
+ commentB.arrayOffset(),
+ commentB.limit())
+ );
+ /* } */
}
}
@@ -653,7 +677,7 @@
// CheckStyle:MagicNumber ON
// file name length
- writeOut(ZipShort.getBytes(name.length));
+ writeOut(ZipShort.getBytes(name.limit()));
written += SHORT;
// extra field length
@@ -662,8 +686,8 @@
written += SHORT;
// file name
- writeOut(name);
- written += name.length;
+ writeOut(name.array(), name.arrayOffset(), name.limit());
+ written += name.limit();
// extra field
writeOut(extra);
@@ -727,8 +751,8 @@
// CheckStyle:MagicNumber ON
// file name length
- byte[] name = getBytes(ze.getName());
- writeOut(ZipShort.getBytes(name.length));
+ ByteBuffer name = this.zipEncoding.encode(ze.getName());
+ writeOut(ZipShort.getBytes(name.limit()));
written += SHORT;
// extra field length
@@ -741,8 +765,8 @@
if (comm == null) {
comm = "";
}
- byte[] commentB = getBytes(comm);
- writeOut(ZipShort.getBytes(commentB.length));
+ ByteBuffer commentB = this.zipEncoding.encode(comm);
+ writeOut(ZipShort.getBytes(commentB.limit()));
written += SHORT;
// disk number start
@@ -762,16 +786,16 @@
written += WORD;
// file name
- writeOut(name);
- written += name.length;
+ writeOut(name.array(), name.arrayOffset(), name.limit());
+ written += name.limit();
// extra field
writeOut(extra);
written += extra.length;
// file comment
- writeOut(commentB);
- written += commentB.length;
+ writeOut(commentB.array(), commentB.arrayOffset(), commentB.limit());
+ written += commentB.limit();
}
/**
@@ -795,9 +819,9 @@
writeOut(ZipLong.getBytes(cdOffset));
// ZIP file comment
- byte[] data = getBytes(comment);
- writeOut(ZipShort.getBytes(data.length));
- writeOut(data);
+ ByteBuffer data = this.zipEncoding.encode(comment);
+ writeOut(ZipShort.getBytes(data.limit()));
+ writeOut(data.array(), data.arrayOffset(), data.limit());
}
/**
@@ -841,31 +865,6 @@
}
/**
- * Retrieve the bytes for the given String in the encoding set for
- * this Stream.
- * @param name the string to get bytes from
- * @return the bytes as a byte array
- * @throws ZipException on error
- */
- protected byte[] getBytes(String name) throws ZipException {
- if (encoding == null) {
- return name.getBytes();
- } else {
- try {
- return ZipEncodingHelper.encodeName(name, encoding);
- } catch (java.nio.charset.UnsupportedCharsetException ex) {
- // Java 1.4's NIO doesn't recognize a few names that
- // String.getBytes does
- try {
- return name.getBytes(encoding);
- } catch (UnsupportedEncodingException uee) {
- throw new ZipException(uee.getMessage());
- }
- }
- }
- }
-
- /**
* Write bytes to output or random access file.
* @param data the byte array to write
* @throws IOException on error
@@ -910,19 +909,6 @@
}
}
- /**
- * Whether a given encoding - or the platform's default encoding
- * if the parameter is null - is UTF-8.
- */
- static boolean isUTF8(String encoding) {
- if (encoding == null) {
- // check platform's default encoding
- encoding = System.getProperty("file.encoding");
- }
- return UTF8.equalsIgnoreCase(encoding)
- || "utf-8".equalsIgnoreCase(encoding);
- }
-
private void writeVersionNeededToExtractAndGeneralPurposeBits(final int
zipMethod)
throws IOException {
Added: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java?rev=749342&view=auto
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java (added)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java Mon Mar 2 16:09:20 2009
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+
+/**
+ * An interface for encoders that do a pretty encoding of ZIP
+ * filenames.
+ *
+ * <p>There are mostly two implementations, one that uses java.nio
+ * {@link java.nio.charset.Charset Charset} and one implementation,
+ * which copes with simple 8 bit charsets, because java-1.4 did not
+ * support Cp437 in java.nio.</p>
+ *
+ * <p>The main reason for defining an own encoding layer comes from
+ * the problems with {@link java.lang.String#getBytes(String)
+ * String.getBytes}, which encodes unknown characters as ASCII
+ * quotation marks ('?'), which is per definition an invalid filename
+ * character under some operating systems (Windows, e.g.) leading to
+ * ignored ZIP entries.</p>
+ *
+ * <p>All implementations should implement this interface in a
+ * reentrant way.<(p>
+ */
+interface ZipEncoding {
+ /**
+ * Check, whether the given string may be losslessly encoded using this
+ * encoding.
+ *
+ * @param name A filename or ZIP comment.
+ * @return Whether the given name may be encoded with out any losses.
+ */
+ boolean canEncode(String name);
+
+ /**
+ * Encode a filename or a comment to a byte array suitable for
+ * storing it to a serialized zip entry.
+ *
+ * <p>Examples for CP 437 (in pseudo-notation, right hand side is
+ * C-style notation):</p>
+ * <pre>
+ * encode("\u20AC_for_Dollar.txt") = "%U20AC_for_Dollar.txt"
+ * encode("\u00D6lf\u00E4sser.txt") = "\231lf\204sser.txt"
+ * </pre>
+ *
+ * @param name A filename or ZIP comment.
+ * @return A byte buffer with a backing array containing the
+ * encoded name. Unmappable characters or malformed
+ * character sequences are mapped to a sequence of utf-16
+ * words encoded in the format <code>%Uxxxx</code>. It is
+ * assumed, that the byte buffer is positioned at the
+ * beinning of the encoded result, the byte buffer has a
+ * backing array and the limit of the byte buffer points
+ * to the end of the encoded result.
+ * @throws IOException
+ */
+ ByteBuffer encode(String name) throws IOException;
+
+ /**
+ * @param data The byte values to decode.
+ * @return The decoded string.
+ * @throws IOException
+ */
+ String decode(byte [] data) throws IOException;
+}
Propchange: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncoding.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java?rev=749342&r1=749341&r2=749342&view=diff
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java (original)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java Mon Mar 2 16:09:20 2009
@@ -19,11 +19,10 @@
package org.apache.commons.compress.archivers.zip;
import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
import java.nio.charset.Charset;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
-import java.nio.charset.CodingErrorAction;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.HashMap;
+import java.util.Map;
/**
* Static helper functions for robustly encoding filenames in zip files.
@@ -31,6 +30,109 @@
abstract class ZipEncodingHelper {
/**
+ * A class, which holds the high characters of a simple encoding
+ * and lazily instantiates a Simple8BitZipEnoding instance in a
+ * thread-safe manner.
+ */
+ private static class SimpleEncodingHolder {
+
+ private final char [] highChars;
+ private Simple8BitZipEnoding encoding;
+
+ /**
+ * Instantiate a simple encoding holder.
+ *
+ * @param highChars The characters for byte codes 128 to 255.
+ *
+ * @see Simple8BitZipEnoding#Simple8BitZipEnoding(char[])
+ */
+ SimpleEncodingHolder(char [] highChars) {
+ this.highChars = highChars;
+ }
+
+ /**
+ * @return The associated {@see Simple8BitZipEncoding}, which
+ * is instantiated if not done so far.
+ */
+ public synchronized Simple8BitZipEnoding getEncoding() {
+ if (this.encoding == null) {
+ this.encoding = new Simple8BitZipEnoding(this.highChars);
+ }
+ return this.encoding;
+ }
+ }
+
+ private static final Map simpleEncodings;
+
+ static {
+ simpleEncodings = new HashMap();
+
+ char[] cp437_high_chars =
+ new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
+ 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
+ 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
+ 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
+ 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
+ 0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
+ 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
+ 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
+ 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
+ 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
+ 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
+ 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
+ 0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
+ 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
+ 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
+ 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
+ 0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
+ 0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
+ 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
+ 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
+ 0x25a0, 0x00a0 };
+
+ SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
+
+ simpleEncodings.put("CP437",cp437);
+ simpleEncodings.put("Cp437",cp437);
+ simpleEncodings.put("cp437",cp437);
+ simpleEncodings.put("IBM437",cp437);
+ simpleEncodings.put("ibm437",cp437);
+
+ char[] cp850_high_chars =
+ new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
+ 0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
+ 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
+ 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
+ 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
+ 0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
+ 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
+ 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
+ 0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
+ 0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
+ 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
+ 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
+ 0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
+ 0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
+ 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
+ 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
+ 0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
+ 0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
+ 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
+ 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
+ 0x25a0, 0x00a0 };
+
+ SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
+
+ simpleEncodings.put("CP850",cp850);
+ simpleEncodings.put("Cp850",cp850);
+ simpleEncodings.put("cp850",cp850);
+ simpleEncodings.put("IBM850",cp850);
+ simpleEncodings.put("ibm850",cp850);
+ }
+
+ /**
* Grow a byte buffer, so it has a minimal capacity or at least
* the double capacity of the original buffer
*
@@ -53,7 +155,7 @@
return on;
}
-
+
/**
* The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
* ASCII bytes.
@@ -65,131 +167,79 @@
};
/**
- * Encode a filename or a comment to a byte array suitable for
- * storing it to a serialized zip entry.
+ * Append <code>%Uxxxx</code> to the given byte buffer.
+ * The caller must assure, that <code>bb.remaining()>=6</code>.
*
- * Examples (in pseudo-notation, right hand side is C-style notation):
- * <pre>
- * encodeName("\u20AC_for_Dollar.txt","CP437") = "%U20AC_for_Dollar.txt"
- * encodeName("\u00D6lf\u00E4sser.txt","CP437") = "\231lf\204sser.txt"
- * </pre>
- *
- * @param name The filename or comment with possible non-ASCII
- * unicode characters. Must not be null.
- * @param encoding A valid encoding name. The standard zip
- * encoding is <code>"CP437"</code>,
- * <code>"UTF-8"</code> is supported in ZIP file
- * version <code>6.3</code> or later. If null,
- * will use the platform's {@link
- * java.lang.String#getBytes default encoding}.
- * @return A byte array containing the mapped file
- * name. Unmappable characters or malformed character
- * sequences are mapped to a sequence of utf-16 words
- * encoded in the format <code>%Uxxxx</code>.
+ * @param bb The byte buffer to write to.
+ * @param c The character to write.
*/
- static final byte[] encodeName(String name, String encoding) {
- if (encoding == null) {
- return name.getBytes();
- }
+ static void appendSurrogate(ByteBuffer bb, char c) {
- Charset cs = Charset.forName(encoding);
- CharsetEncoder enc = cs.newEncoder();
+ bb.put((byte) '%');
+ bb.put((byte) 'U');
- enc.onMalformedInput(CodingErrorAction.REPORT);
- enc.onUnmappableCharacter(CodingErrorAction.REPORT);
-
- CharBuffer cb = CharBuffer.wrap(name);
- ByteBuffer out = ByteBuffer.allocate(name.length()
- + (name.length() + 1) / 2);
-
- while (cb.remaining() > 0) {
- CoderResult res = enc.encode(cb, out,true);
+ bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
+ bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
+ bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
+ bb.put(HEX_DIGITS[c & 0x0f]);
+ }
- if (res.isUnmappable() || res.isMalformed()) {
- // write the unmappable characters in utf-16
- // pseudo-URL encoding style to ByteBuffer.
- if (res.length() * 6 > out.remaining()) {
- out = growBuffer(out,out.position() + res.length() * 6);
- }
+ /**
+ * name of the encoding UTF-8
+ */
+ static final String UTF8 = "UTF8";
- for (int i=0; i<res.length(); ++i) {
- out.put((byte) '%');
- out.put((byte) 'U');
+ /**
+ * name of the encoding UTF-8
+ */
+ static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
- char c = cb.get();
+ /**
+ * Instantiates a zip encoding.
+ *
+ * @param name The name of the zip encoding. Specify <code>null</code> for
+ * the platform's default encoding.
+ * @return A zip encoding for the given encoding name.
+ */
+ static ZipEncoding getZipEncoding(String name) {
+
+ // fallback encoding is good enough for utf-8.
+ if (isUTF8(name)) {
+ return UTF8_ZIP_ENCODING;
+ }
- out.put(HEX_DIGITS[(c >> 12)&0x0f]);
- out.put(HEX_DIGITS[(c >> 8)&0x0f]);
- out.put(HEX_DIGITS[(c >> 4)&0x0f]);
- out.put(HEX_DIGITS[c & 0x0f]);
- }
+ if (name == null) {
+ return new FallbackZipEncoding();
+ }
- } else if (res.isOverflow()) {
+ SimpleEncodingHolder h =
+ (SimpleEncodingHolder) simpleEncodings.get(name);
- out = growBuffer(out, 0);
+ if (h!=null) {
+ return h.getEncoding();
+ }
- } else if (res.isUnderflow()) {
+ try {
- enc.flush(out);
- break;
+ Charset cs = Charset.forName(name);
+ return new NioZipEncoding(cs);
- }
+ } catch (UnsupportedCharsetException e) {
+ return new FallbackZipEncoding(name);
}
-
- byte [] ret = new byte[out.position()];
- out.rewind();
- out.get(ret);
-
- return ret;
}
/**
- * Return, whether a filename or a comment may be encoded to a
- * byte array suitable for storing it to a serialized zip entry
- * without any losses.
- *
- * Examples (in pseudo-notation, right hand side is C-style notation):
- * <pre>
- * canEncodeName("\u20AC_for_Dollar.txt","CP437") = false
- * canEncodeName("\u20AC_for_Dollar.txt","UTF-8") = true
- * canEncodeName("\u00D6lf\u00E4sser.txt","CP437") = true
- * </pre>
- *
- * @param name The filename or comment with possible non-ASCII
- * unicode characters.
- * @param encoding A valid encoding name. The standard zip
- * encoding is <code>"CP437"</code>,
- * <code>"UTF-8"</code> is supported in ZIP file
- * version <code>6.3</code> or later.
- * @return Whether the given encoding may encode the given name.
+ * Whether a given encoding - or the platform's default encoding
+ * if the parameter is null - is UTF-8.
*/
- static final boolean canEncodeName(String name, String encoding) {
-
- Charset cs = Charset.forName(encoding);
-
- CharsetEncoder enc = cs.newEncoder();
- enc.onMalformedInput(CodingErrorAction.REPORT);
- enc.onUnmappableCharacter(CodingErrorAction.REPORT);
-
- return enc.canEncode(name);
- }
-
- /**
- * Decode a filename or a comment from a byte array.
- *
- * @param name The filename or comment.
- * @param encoding A valid encoding name. The standard zip
- * encoding is <code>"CP437"</code>,
- * <code>"UTF-8"</code> is supported in ZIP file
- * version <code>6.3</code> or later.
- */
- static final String decodeName(byte[] name, String encoding)
- throws java.nio.charset.CharacterCodingException {
- Charset cs = Charset.forName(encoding);
- return cs.newDecoder()
- .onMalformedInput(CodingErrorAction.REPORT)
- .onUnmappableCharacter(CodingErrorAction.REPORT)
- .decode(ByteBuffer.wrap(name)).toString();
+ static boolean isUTF8(String encoding) {
+ if (encoding == null) {
+ // check platform's default encoding
+ encoding = System.getProperty("file.encoding");
+ }
+ return UTF8.equalsIgnoreCase(encoding)
+ || "utf-8".equalsIgnoreCase(encoding);
}
}
Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java?rev=749342&r1=749341&r2=749342&view=diff
==============================================================================
--- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java (original)
+++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java Mon Mar 2 16:09:20 2009
@@ -21,8 +21,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.CharacterCodingException;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
@@ -99,6 +97,11 @@
private final String encoding;
/**
+ * The zip encoding to use for filenames and the file comment.
+ */
+ private final ZipEncoding zipEncoding;
+
+ /**
* The actual data source.
*/
private RandomAccessFile archive;
@@ -116,7 +119,7 @@
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(File f) throws IOException {
- this(f, ZipArchiveOutputStream.UTF8);
+ this(f, ZipEncodingHelper.UTF8);
}
/**
@@ -127,7 +130,7 @@
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(String name) throws IOException {
- this(new File(name), ZipArchiveOutputStream.UTF8);
+ this(new File(name), ZipEncodingHelper.UTF8);
}
/**
@@ -165,14 +168,15 @@
* @param f the archive.
* @param encoding the encoding to use for file names, use null
* for the platform's default encoding
- * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present)
- * to set the file names.
+ * @param useUnicodeExtraFields whether to use InfoZIP Unicode
+ * Extra Fields (if present) to set the file names.
*
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
throws IOException {
this.encoding = encoding;
+ this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
this.useUnicodeExtraFields = useUnicodeExtraFields;
archive = new RandomAccessFile(f, "r");
boolean success = false;
@@ -247,7 +251,8 @@
* @param ze the entry to get the stream for.
* @return a stream to read the entry from.
* @throws IOException if unable to create an input stream from the zipenty
- * @throws ZipException if the zipentry has an unsupported compression method
+ * @throws ZipException if the zipentry has an unsupported
+ * compression method
*/
public InputStream getInputStream(ZipArchiveEntry ze)
throws IOException, ZipException {
@@ -330,8 +335,8 @@
final int generalPurposeFlag = ZipShort.getValue(cfh, off);
final boolean hasEFS =
(generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
- final String entryEncoding =
- hasEFS ? ZipArchiveOutputStream.UTF8 : encoding;
+ final ZipEncoding entryEncoding =
+ hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
off += SHORT;
@@ -373,7 +378,7 @@
byte[] fileName = new byte[fileNameLen];
archive.readFully(fileName);
- ze.setName(getString(fileName, entryEncoding));
+ ze.setName(entryEncoding.decode(fileName));
// LFH offset,
OffsetEntry offset = new OffsetEntry();
@@ -395,7 +400,7 @@
byte[] comment = new byte[commentLen];
archive.readFully(comment);
- ze.setComment(getString(comment, entryEncoding));
+ ze.setComment(entryEncoding.decode(comment));
archive.readFully(signatureBytes);
sig = ZipLong.getValue(signatureBytes);
@@ -529,7 +534,7 @@
+ SHORT + SHORT + fileNameLen + extraFieldLen));
*/
offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
- + SHORT + SHORT + fileNameLen + extraFieldLen;
+ + SHORT + SHORT + fileNameLen + extraFieldLen;
if (entriesWithoutEFS.containsKey(ze)) {
setNameAndCommentFromExtraFields(ze,
@@ -568,37 +573,6 @@
/**
- * Retrieve a String from the given bytes using the encoding set
- * for this ZipFile.
- *
- * @param bytes the byte array to transform
- * @return String obtained by using the given encoding
- * @throws ZipException if the encoding cannot be recognized.
- */
- protected String getString(byte[] bytes, String enc)
- throws ZipException {
- if (enc == null) {
- return new String(bytes);
- } else {
- try {
- try {
- return ZipEncodingHelper.decodeName(bytes, enc);
- } catch (CharacterCodingException ex) {
- throw new ZipException(ex.getMessage());
- }
- } catch (java.nio.charset.UnsupportedCharsetException ex) {
- // Java 1.4's NIO doesn't recognize a few names that
- // String.getBytes does
- try {
- return new String(bytes, enc);
- } catch (UnsupportedEncodingException uee) {
- throw new ZipException(uee.getMessage());
- }
- }
- }
- }
-
- /**
* Checks whether the archive starts with a LFH. If it doesn't,
* it may be an empty archive.
*/
@@ -659,9 +633,8 @@
if (origCRC32 == f.getNameCRC32()) {
try {
return ZipEncodingHelper
- .decodeName(f.getUnicodeName(),
- ZipArchiveOutputStream.UTF8);
- } catch (CharacterCodingException ex) {
+ .UTF8_ZIP_ENCODING.decode(f.getUnicodeName());
+ } catch (IOException ex) {
// UTF-8 unsupported? should be impossible the
// Unicode*ExtraField must contain some bad bytes
Added: commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/TestZipEncodings.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/TestZipEncodings.java?rev=749342&view=auto
==============================================================================
--- commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/TestZipEncodings.java (added)
+++ commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/TestZipEncodings.java Mon Mar 2 16:09:20 2009
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import junit.framework.TestCase;
+
+/**
+ * Test zip encodings.
+ */
+public class TestZipEncodings extends TestCase {
+ private static final String UNENC_STRING = "\u2016";
+
+ // stress test for internal grow method.
+ private static final String BAD_STRING =
+ "\u2016\u2015\u2016\u2015\u2016\u2015\u2016\u2015\u2016\u2015\u2016";
+
+ private static final String BAD_STRING_ENC =
+ "%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016";
+
+ public void testSimpleCp437Encoding() throws IOException {
+
+ doSimpleEncodingTest("Cp437", null);
+ }
+
+ public void testSimpleCp850Encoding() throws IOException {
+
+ doSimpleEncodingTest("Cp850", null);
+ }
+
+ public void testNioCp1252Encoding() throws IOException {
+ // CP1252 has some undefined code points, these are
+ // the defined ones
+ // retrieved by
+ // awk '/^0x/ && NF>2 {print $1;}' CP1252.TXT
+ byte[] b =
+ new byte[] { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+ (byte) 0x80, (byte) 0x82, (byte) 0x83, (byte) 0x84,
+ (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88,
+ (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C,
+ (byte) 0x8E, (byte) 0x91, (byte) 0x92, (byte) 0x93,
+ (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97,
+ (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B,
+ (byte) 0x9C, (byte) 0x9E, (byte) 0x9F, (byte) 0xA0,
+ (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4,
+ (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8,
+ (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC,
+ (byte) 0xAD, (byte) 0xAE, (byte) 0xAF, (byte) 0xB0,
+ (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4,
+ (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8,
+ (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC,
+ (byte) 0xBD, (byte) 0xBE, (byte) 0xBF, (byte) 0xC0,
+ (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4,
+ (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8,
+ (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC,
+ (byte) 0xCD, (byte) 0xCE, (byte) 0xCF, (byte) 0xD0,
+ (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4,
+ (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8,
+ (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC,
+ (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0xE0,
+ (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4,
+ (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8,
+ (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC,
+ (byte) 0xED, (byte) 0xEE, (byte) 0xEF, (byte) 0xF0,
+ (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4,
+ (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8,
+ (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC,
+ (byte) 0xFD, (byte) 0xFE, (byte) 0xFF };
+
+ doSimpleEncodingTest("Cp1252",b);
+ }
+
+ private static final void assertEquals(byte[] expected, ByteBuffer actual) {
+
+ assertEquals(expected.length, actual.limit());
+
+ for (int i = 0; i < expected.length; ++i) {
+
+ byte a = actual.get();
+ assertEquals(expected[i], a);
+ }
+
+ }
+
+ private void doSimpleEncodingTest(String name, byte[] testBytes)
+ throws IOException {
+
+ ZipEncoding enc = ZipEncodingHelper.getZipEncoding(name);
+
+ if (testBytes == null) {
+
+ testBytes = new byte[256];
+ for (int i = 0; i < 256; ++i) {
+ testBytes[i] = (byte) i;
+ }
+ }
+
+ String decoded = enc.decode(testBytes);
+
+ assertEquals(true, enc.canEncode(decoded));
+
+ ByteBuffer encoded = enc.encode(decoded);
+
+ assertEquals(testBytes, encoded);
+
+ assertEquals(false, enc.canEncode(UNENC_STRING));
+ assertEquals("%U2016".getBytes("US-ASCII"), enc.encode(UNENC_STRING));
+ assertEquals(false, enc.canEncode(BAD_STRING));
+ assertEquals(BAD_STRING_ENC.getBytes("US-ASCII"),
+ enc.encode(BAD_STRING));
+ }
+
+}
Propchange: commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/TestZipEncodings.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java?rev=749342&r1=749341&r2=749342&view=diff
==============================================================================
--- commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java (original)
+++ commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java Mon Mar 2 16:09:20 2009
@@ -22,14 +22,15 @@
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
-import java.net.URL;
import java.net.URISyntaxException;
-import java.nio.charset.Charset;
-import java.nio.charset.UnsupportedCharsetException;
+import java.net.URL;
+import java.nio.ByteBuffer;
import java.util.Enumeration;
-import junit.framework.TestCase;
+import java.util.zip.CRC32;
+
+import org.apache.commons.compress.AbstractTestCase;
-public class UTF8ZipFilesTest extends TestCase {
+public class UTF8ZipFilesTest extends AbstractTestCase {
private static final String UTF_8 = "utf-8";
private static final String CP437 = "cp437";
@@ -119,14 +120,6 @@
boolean withExplicitUnicodeExtra)
throws IOException {
- try {
- Charset.forName(encoding);
- } catch (UnsupportedCharsetException use) {
- System.err.println("Skipping testFileRoundtrip for unsupported "
- + " encoding " + encoding);
- return;
- }
-
File file = File.createTempFile(encoding + "-test", ".zip");
try {
createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
@@ -143,6 +136,8 @@
boolean withExplicitUnicodeExtra)
throws UnsupportedEncodingException, IOException {
+ ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
+
ZipArchiveOutputStream zos = null;
try {
zos = new ZipArchiveOutputStream(file);
@@ -152,10 +147,14 @@
ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
if (withExplicitUnicodeExtra
- && !ZipEncodingHelper.canEncodeName(ze.getName(),
- zos.getEncoding())) {
+ && !zipEncoding.canEncode(ze.getName())) {
+
+ ByteBuffer en = zipEncoding.encode(ze.getName());
+
ze.addExtraField(new UnicodePathExtraField(ze.getName(),
- zos.getEncoding()));
+ en.array(),
+ en.arrayOffset(),
+ en.limit()));
}
zos.putNextEntry(ze);
@@ -164,10 +163,14 @@
ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
if (withExplicitUnicodeExtra
- && !ZipEncodingHelper.canEncodeName(ze.getName(),
- zos.getEncoding())) {
+ && !zipEncoding.canEncode(ze.getName())) {
+
+ ByteBuffer en = zipEncoding.encode(ze.getName());
+
ze.addExtraField(new UnicodePathExtraField(ze.getName(),
- zos.getEncoding()));
+ en.array(),
+ en.arrayOffset(),
+ en.limit()));
}
zos.putNextEntry(ze);
@@ -177,10 +180,14 @@
ze = new ZipArchiveEntry(ASCII_TXT);
if (withExplicitUnicodeExtra
- && !ZipEncodingHelper.canEncodeName(ze.getName(),
- zos.getEncoding())) {
+ && !zipEncoding.canEncode(ze.getName())) {
+
+ ByteBuffer en = zipEncoding.encode(ze.getName());
+
ze.addExtraField(new UnicodePathExtraField(ze.getName(),
- zos.getEncoding()));
+ en.array(),
+ en.arrayOffset(),
+ en.limit()));
}
zos.putNextEntry(ze);
@@ -233,15 +240,18 @@
UnicodePathExtraField ucpf = findUniCodePath(ze);
assertNotNull(ucpf);
- UnicodePathExtraField ucpe = new UnicodePathExtraField(expectedName,
- encoding);
- assertEquals(ucpe.getNameCRC32(), ucpf.getNameCRC32());
+ ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
+ ByteBuffer ne = enc.encode(ze.getName());
+
+ CRC32 crc = new CRC32();
+ crc.update(ne.array(),ne.arrayOffset(),ne.limit());
+
+ assertEquals(crc.getValue(), ucpf.getNameCRC32());
assertEquals(expectedName, new String(ucpf.getUnicodeName(),
UTF_8));
}
}
- /*
public void testUtf8Interoperability() throws IOException {
File file1 = super.getFile("utf8-7zip-test.zip");
File file2 = super.getFile("utf8-winzip-test.zip");
@@ -250,6 +260,6 @@
testFile(file2,CP437);
}
- */
+
}