You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/07/05 15:34:27 UTC

[08/12] commons-compress git commit: COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.

COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.

Signed-off-by: Simon Spero <se...@gmail.com>

(cherry picked from commit 1987719)
Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/db586bae
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/db586bae
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/db586bae

Branch: refs/heads/master
Commit: db586baee29fc90f79898e9a274cc1bd585b5f53
Parents: cb590b3
Author: Simon Spero <se...@gmail.com>
Authored: Fri Jun 16 20:17:13 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/NioZipEncoding.java  | 80 ++++++++++++++------
 1 file changed, 55 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/db586bae/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
index ffd2efd..6f0306b 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
@@ -30,54 +31,84 @@ import java.nio.charset.CodingErrorAction;
 /**
  * A ZipEncoding, which uses a java.nio {@link
  * java.nio.charset.Charset Charset} to encode names.
- *
- * <p>This implementation works for all cases under java-1.5 or
- * later. However, in java-1.4, some charsets don't have a java.nio
- * implementation, most notably the default ZIP encoding Cp437.</p>
- * 
  * <p>The methods of this class are reentrant.</p>
  * @Immutable
  */
-class NioZipEncoding implements ZipEncoding {
+class NioZipEncoding implements ZipEncoding,HasCharset {
+
     private final Charset charset;
+    private  boolean useReplacement= false;
+    private static final byte[] REPLACEMENT_BYTES = new byte[]{'?'};
+    private static final String REPLACEMENT_STRING = "?";
 
     /**
      * Construct an NIO based zip encoding, which wraps the given
      * charset.
-     * 
+     *
      * @param charset The NIO charset to wrap.
      */
-    public NioZipEncoding(final Charset charset) {
+    NioZipEncoding(final Charset charset) {
         this.charset = charset;
     }
 
+    NioZipEncoding(final Charset charset, boolean useReplacement) {
+        this(charset);
+        this.useReplacement = useReplacement;
+
+    }
+
+    @Override
+    public Charset getCharset() {
+        return charset;
+    }
+
     /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
+     * @see  ZipEncoding#canEncode(java.lang.String)
      */
     @Override
     public boolean canEncode(final String name) {
-        final CharsetEncoder enc = this.charset.newEncoder();
-        enc.onMalformedInput(CodingErrorAction.REPORT);
-        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+        final CharsetEncoder enc = newEncoder();
 
         return enc.canEncode(name);
     }
 
+    private CharsetEncoder newEncoder() {
+        if (useReplacement) {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_BYTES);
+        } else {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        }
+    }
+
+    private CharsetDecoder newDecoder() {
+        if (!useReplacement) {
+            return this.charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        } else {
+            return  charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_STRING);
+        }
+    }
+
+
     /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
+     * @see ZipEncoding#encode(java.lang.String)
      */
     @Override
     public ByteBuffer encode(final String name) {
-        final CharsetEncoder enc = this.charset.newEncoder();
-
-        enc.onMalformedInput(CodingErrorAction.REPORT);
-        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+        final CharsetEncoder enc = newEncoder();
 
         final CharBuffer cb = CharBuffer.wrap(name);
-        ByteBuffer out = ByteBuffer.allocate(name.length()
-                                             + (name.length() + 1) / 2);
+        int estimatedSize = (int) Math.ceil(name.length() * enc.averageBytesPerChar());
+        ByteBuffer out = ByteBuffer.allocate(estimatedSize);
 
         while (cb.remaining() > 0) {
             final CoderResult res = enc.encode(cb, out,true);
@@ -114,13 +145,12 @@ class NioZipEncoding implements ZipEncoding {
 
     /**
      * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
+     * ZipEncoding#decode(byte[])
      */
     @Override
     public String decode(final byte[] data) throws IOException {
-        return this.charset.newDecoder()
-            .onMalformedInput(CodingErrorAction.REPORT)
-            .onUnmappableCharacter(CodingErrorAction.REPORT)
+        return newDecoder()
             .decode(ByteBuffer.wrap(data)).toString();
     }
+
 }