You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/07/05 15:34:20 UTC

[01/12] commons-compress git commit: Remove methods and change test + throw to assert to please the coveralls

Repository: commons-compress
Updated Branches:
  refs/heads/master 60a459abe -> 9ae52491c


Remove methods and change test + throw to assert to please the coveralls

Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/90a73a4d
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/90a73a4d
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/90a73a4d

Branch: refs/heads/master
Commit: 90a73a4dee53129e33a552e49cb7835ecebb3a5f
Parents: d7e6e16
Author: Simon Spero <se...@gmail.com>
Authored: Mon Jun 19 06:07:02 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/NioZipEncoding.java  | 20 +++-------
 .../archivers/zip/ZipEncodingHelper.java        | 41 --------------------
 2 files changed, 6 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/90a73a4d/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
index fed597f..606ab12 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -42,15 +42,10 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
     private static final String REPLACEMENT_STRING = "?";
 
     /**
-     * Construct an NIO based zip encoding, which wraps the given
-     * charset.
-     *
-     * @param charset The NIO charset to wrap.
+     * Construct an NioZipEncoding using the given charset.
+     * @param charset  The character set to use.
+     * @param useReplacement should invalid characters be replaced, or reported.
      */
-    NioZipEncoding(final Charset charset) {
-        this(charset, false);
-    }
-
     NioZipEncoding(final Charset charset, boolean useReplacement) {
         this.charset = charset;
         this.useReplacement = useReplacement;
@@ -148,9 +143,8 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
         }
         CoderResult coderResult = enc.encode(cb, out, true);
 
-        if (!coderResult.isUnderflow()) {
-            throw new RuntimeException("unexpected coder result: " + coderResult);
-        }
+        assert coderResult.isUnderflow() : "unexpected coder result: " + coderResult;
+        
 
         out.limit(out.position());
         out.rewind();
@@ -163,9 +157,7 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
             if (result.isOverflow()) {
                 int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
                 out = ZipEncodingHelper.growBufferBy(out, increment);
-            } else {
-                break;
-            }
+            } 
         }
         return out;
     }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/90a73a4d/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
index f31d75c..fb550fd 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
@@ -28,29 +28,6 @@ import org.apache.commons.compress.utils.Charsets;
  */
 public abstract class ZipEncodingHelper {
 
-    /**
-     * Grow a byte buffer, so it has a minimal capacity or at least
-     * the double capacity of the original buffer
-     *
-     * @param b The original buffer.
-     * @param newCapacity The minimal requested new capacity.
-     * @return A byte buffer <code>r</code> with
-     *         <code>r.capacity() = max(b.capacity()*2,newCapacity)</code> and
-     *         all the data contained in <code>b</code> copied to the beginning
-     *         of <code>r</code>.
-     *
-     */
-    static ByteBuffer growBuffer(final ByteBuffer b, final int newCapacity) {
-        b.limit(b.position());
-        b.rewind();
-
-        final int c2 = b.capacity() * 2;
-        final ByteBuffer on = ByteBuffer.allocate(c2 < newCapacity ? newCapacity : c2);
-
-        on.put(b);
-        return on;
-    }
-
 
     /**
      * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
@@ -62,24 +39,6 @@ public abstract class ZipEncodingHelper {
         0x42, 0x43, 0x44, 0x45, 0x46
     };
 
-    /**
-     * Append <code>%Uxxxx</code> to the given byte buffer.
-     * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
-     *
-     * @param bb The byte buffer to write to.
-     * @param c The character to write.
-     */
-    static void appendSurrogate(final ByteBuffer bb, final char c) {
-
-        bb.put((byte) '%');
-        bb.put((byte) 'U');
-
-        bb.put(HEX_DIGITS[(c >> 12)&0x0f]);
-        bb.put(HEX_DIGITS[(c >> 8)&0x0f]);
-        bb.put(HEX_DIGITS[(c >> 4)&0x0f]);
-        bb.put(HEX_DIGITS[c & 0x0f]);
-    }
-
 
     /**
      * name of the encoding UTF-8


[06/12] commons-compress git commit: Do better estimating of required buffer size for character encoding. If an unencodable character is found that requires output buffer expansion, scan buffer for all such characters, and attempt to expand buffer only o

Posted by bo...@apache.org.
Do better estimating of required buffer size for character encoding. If an unencodable character is found that requires output buffer expansion, scan buffer for all such characters, and attempt to expand buffer only once.

Signed-off-by: Simon Spero <se...@gmail.com>

(cherry picked from commit aa30e21)
Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/a67bdc01
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/a67bdc01
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/a67bdc01

Branch: refs/heads/master
Commit: a67bdc013c9fd965abaca375b9b47554a115f40e
Parents: db586ba
Author: Simon Spero <se...@gmail.com>
Authored: Sun Jun 18 18:55:38 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/NioZipEncoding.java  | 109 ++++++++++++++++---
 .../archivers/zip/ZipEncodingHelper.java        |  10 ++
 .../compress/archivers/zip/ZipEncodingTest.java |   5 +-
 3 files changed, 104 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a67bdc01/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
index 6f0306b..fed597f 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -48,11 +48,11 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
      * @param charset The NIO charset to wrap.
      */
     NioZipEncoding(final Charset charset) {
-        this.charset = charset;
+        this(charset, false);
     }
 
     NioZipEncoding(final Charset charset, boolean useReplacement) {
-        this(charset);
+        this.charset = charset;
         this.useReplacement = useReplacement;
 
     }
@@ -107,42 +107,115 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
         final CharsetEncoder enc = newEncoder();
 
         final CharBuffer cb = CharBuffer.wrap(name);
-        int estimatedSize = (int) Math.ceil(name.length() * enc.averageBytesPerChar());
-        ByteBuffer out = ByteBuffer.allocate(estimatedSize);
+        CharBuffer tmp=null;
+        ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
 
         while (cb.remaining() > 0) {
-            final CoderResult res = enc.encode(cb, out,true);
+            final CoderResult res = enc.encode(cb, out, false);
 
             if (res.isUnmappable() || res.isMalformed()) {
 
                 // write the unmappable characters in utf-16
                 // pseudo-URL encoding style to ByteBuffer.
-                if (res.length() * 6 > out.remaining()) {
-                    out = ZipEncodingHelper.growBuffer(out, out.position()
-                                                       + res.length() * 6);
-                }
 
-                for (int i=0; i<res.length(); ++i) {
-                    ZipEncodingHelper.appendSurrogate(out,cb.get());
+                int spaceForSurrogate = estimateIncrementalEncodingSize(enc, (6 * res.length()));
+                if (spaceForSurrogate > out.remaining()) {
+                    // if the destination buffer isn't over sized, assume that the presence of one
+                    // unmappable character makes it likely that there will be more. Find all the
+                    // un-encoded characters and allocate space based on those estimates.
+                    int charCount = 0;
+                    for (int i = cb.position() ; i < cb.limit(); i++) {
+                        if (!enc.canEncode(cb.get(i))) {
+                            charCount+= 6;
+                        } else {
+                            charCount++;
+                        }
+                    }
+                    int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
+                    out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace- out.remaining());
+                }
+                if(tmp == null) {
+                    tmp = CharBuffer.allocate(6);
+                }
+                for (int i = 0; i < res.length(); ++i) {
+                    out = encodeFully(enc, encodeSurrogate(tmp,cb.get()), out);
                 }
 
             } else if (res.isOverflow()) {
+                int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
+                out = ZipEncodingHelper.growBufferBy(out, increment);
+            }
+        }
+        CoderResult coderResult = enc.encode(cb, out, true);
 
-                out = ZipEncodingHelper.growBuffer(out, 0);
+        if (!coderResult.isUnderflow()) {
+            throw new RuntimeException("unexpected coder result: " + coderResult);
+        }
 
-            } else if (res.isUnderflow()) {
+        out.limit(out.position());
+        out.rewind();
+        return out;
+    }
 
-                enc.flush(out);
+    private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) {
+        while (cb.hasRemaining()) {
+            CoderResult result = enc.encode(cb, out, false);
+            if (result.isOverflow()) {
+                int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
+                out = ZipEncodingHelper.growBufferBy(out, increment);
+            } else {
                 break;
-
             }
         }
-
-        out.limit(out.position());
-        out.rewind();
         return out;
     }
 
+    static char[] HEX_CHARS = new char[]{
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+    };
+
+    private CharBuffer encodeSurrogate( CharBuffer cb,char c) {
+        cb.position(0).limit(6);
+        cb.put('%');
+        cb.put('U');
+
+        cb.put(HEX_CHARS[(c >> 12) & 0x0f]);
+        cb.put(HEX_CHARS[(c >> 8) & 0x0f]);
+        cb.put(HEX_CHARS[(c >> 4) & 0x0f]);
+        cb.put(HEX_CHARS[c & 0x0f]);
+        cb.flip();
+        return cb;
+    }
+
+    /**
+     * Estimate the initial encoded size (in bytes) for a character buffer.
+     * <p>
+     * The estimate assumes that one character consumes uses the maximum length encoding,
+     * whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at
+     * the expense of a couple of extra bytes for UTF-8 encoded ASCII.
+     * </p>
+     *
+     * @param enc        encoder to use for estimates
+     * @param charChount number of characters in string
+     * @return estimated size in bytes.
+     */
+    private int estimateInitialBufferSize(CharsetEncoder enc, int charChount) {
+        float first = enc.maxBytesPerChar();
+        float rest = (charChount - 1) * enc.averageBytesPerChar();
+        return (int) Math.ceil(first + rest);
+    }
+
+    /**
+     * Estimate the size needed for remaining characters
+     *
+     * @param enc       encoder to use for estimates
+     * @param charCount number of characters remaining
+     * @return estimated size in bytes.
+     */
+    private static int estimateIncrementalEncodingSize(CharsetEncoder enc, int charCount) {
+        return (int) Math.ceil(charCount * enc.averageBytesPerChar());
+    }
+
     /**
      * @see
      * ZipEncoding#decode(byte[])

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a67bdc01/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
index 18ad103..f31d75c 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
@@ -136,4 +136,14 @@ public abstract class ZipEncodingHelper {
         }
         return false;
     }
+
+    static ByteBuffer growBufferBy(ByteBuffer buffer, int increment) {
+        buffer.limit(buffer.position());
+        buffer.rewind();
+
+        final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
+
+        on.put(buffer);
+        return on;
+    }
 }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a67bdc01/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
index f3e5127..ce0934f 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
@@ -180,8 +180,9 @@ public class ZipEncodingTest {
         assertFalse(enc.canEncode(UNENC_STRING));
         assertEquals("%U2016".getBytes(CharsetNames.US_ASCII), enc.encode(UNENC_STRING));
         assertFalse(enc.canEncode(BAD_STRING));
-        assertEquals(BAD_STRING_ENC.getBytes(CharsetNames.US_ASCII),
-                     enc.encode(BAD_STRING));
+        byte[] expected = BAD_STRING_ENC.getBytes(CharsetNames.US_ASCII);
+        ByteBuffer actual = enc.encode(BAD_STRING);
+        assertEquals(expected, actual);
     }
 
 }


[03/12] commons-compress git commit: Test that ebcidic encoding is supported (making sure "%Uxxxx" replacement strings don't use ascii encodings)

Posted by bo...@apache.org.
Test that ebcidic encoding is supported (making sure "%Uxxxx" replacement strings don't use ascii encodings)

Signed-off-by: Simon Spero <se...@gmail.com>

(cherry picked from commit f1ec715)
Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/b745af39
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/b745af39
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/b745af39

Branch: refs/heads/master
Commit: b745af3975a0bb3acddbc59ef85860d08221a010
Parents: 11fcc89
Author: Simon Spero <se...@gmail.com>
Authored: Sun Jun 18 19:27:42 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/ZipEncodingTest.java      | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/b745af39/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
index ce0934f..34a9cb8 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
@@ -27,7 +27,6 @@ import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
-import org.apache.commons.compress.utils.CharsetNames;
 import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
 import org.junit.Test;
@@ -78,6 +77,13 @@ public class ZipEncodingTest {
     }
 
 
+    @Test
+    public void testEbcidic() throws IOException {
+
+        doSimpleEncodingTest("IBM1047", null);
+    }
+
+
     private void doSimpleEncodingsTest(int n) throws IOException {
 
         doSimpleEncodingTest("Cp" + n, null);
@@ -178,11 +184,10 @@ public class ZipEncodingTest {
         assertEquals(testBytes, encoded);
 
         assertFalse(enc.canEncode(UNENC_STRING));
-        assertEquals("%U2016".getBytes(CharsetNames.US_ASCII), enc.encode(UNENC_STRING));
+        assertEquals("%U2016".getBytes(name), enc.encode(UNENC_STRING));
         assertFalse(enc.canEncode(BAD_STRING));
-        byte[] expected = BAD_STRING_ENC.getBytes(CharsetNames.US_ASCII);
-        ByteBuffer actual = enc.encode(BAD_STRING);
-        assertEquals(expected, actual);
+        assertEquals(BAD_STRING_ENC.getBytes(name), enc.encode(BAD_STRING));
+        assertEquals(BAD_STRING_ENC.getBytes(name), enc.encode(BAD_STRING));
     }
 
 }


[04/12] commons-compress git commit: COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.

Posted by bo...@apache.org.
COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.

Signed-off-by: Simon Spero <se...@gmail.com>

(cherry picked from commit 0d41ac4)
Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/cec72ce6
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/cec72ce6
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/cec72ce6

Branch: refs/heads/master
Commit: cec72ce690353c90f3867191d7e657ba59ed2612
Parents: 60a459a
Author: Simon Spero <se...@gmail.com>
Authored: Fri Jun 16 20:17:13 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../archivers/zip/FallbackZipEncoding.java      |  96 -------
 .../compress/archivers/zip/HasCharset.java      |  12 +
 .../archivers/zip/Simple8BitZipEncoding.java    | 279 -------------------
 .../archivers/zip/ZipEncodingHelper.java        | 165 ++---------
 .../compress/archivers/zip/ZipEncodingTest.java |  51 +++-
 5 files changed, 77 insertions(+), 526 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
deleted file mode 100644
index 757bcbd..0000000
--- a/src/main/java/org/apache/commons/compress/archivers/zip/FallbackZipEncoding.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.commons.compress.archivers.zip;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-/**
- * A fallback ZipEncoding, which uses a java.io means to encode names.
- *
- * <p>This implementation is not suitable for encodings other than
- * UTF-8, because java.io encodes unmappable character as question
- * marks leading to unreadable ZIP entries on some operating
- * systems.</p>
- * 
- * <p>Furthermore this implementation is unable to tell whether a
- * given name can be safely encoded or not.</p>
- * 
- * <p>This implementation acts as a last resort implementation, when
- * neither {@link Simple8BitZipEnoding} nor {@link NioZipEncoding} is
- * available.</p>
- * 
- * <p>The methods of this class are reentrant.</p>
- * @Immutable
- */
-class FallbackZipEncoding implements ZipEncoding {
-    private final String charsetName;
-
-    /**
-     * Construct a fallback zip encoding, which uses the platform's
-     * default charset.
-     */
-    public FallbackZipEncoding() {
-        this.charsetName = null;
-    }
-
-    /**
-     * Construct a fallback zip encoding, which uses the given charset.
-     * 
-     * @param charsetName The name of the charset or {@code null} for
-     *                the platform's default character set.
-     */
-    public FallbackZipEncoding(final String charsetName) {
-        this.charsetName = charsetName;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
-     */
-    @Override
-    public boolean canEncode(final String name) {
-        return true;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
-     */
-    @Override
-    public ByteBuffer encode(final String name) throws IOException {
-        if (this.charsetName == null) { // i.e. use default charset, see no-args constructor
-            return ByteBuffer.wrap(name.getBytes());
-        }
-        return ByteBuffer.wrap(name.getBytes(this.charsetName));
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
-     */
-    @Override
-    public String decode(final byte[] data) throws IOException {
-        if (this.charsetName == null) { // i.e. use default charset, see no-args constructor
-            return new String(data);
-        }
-        return new String(data,this.charsetName);
-    }
-}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
new file mode 100644
index 0000000..09dfced
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
@@ -0,0 +1,12 @@
+package org.apache.commons.compress.archivers.zip;
+
+import java.nio.charset.Charset;
+
+public interface HasCharset {
+
+    /**
+     *
+     * @return the character set associated with this object
+     */
+    Charset getCharset();
+}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java
deleted file mode 100644
index 1bd0f9c..0000000
--- a/src/main/java/org/apache/commons/compress/archivers/zip/Simple8BitZipEncoding.java
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.commons.compress.archivers.zip;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * This ZipEncoding implementation implements a simple 8bit character
- * set, which mets the following restrictions:
- * 
- * <ul>
- * <li>Characters 0x0000 to 0x007f are encoded as the corresponding
- *        byte values 0x00 to 0x7f.</li>
- * <li>All byte codes from 0x80 to 0xff are mapped to a unique unicode
- *       character in the range 0x0080 to 0x7fff. (No support for
- *       UTF-16 surrogates)
- * </ul>
- * 
- * <p>These restrictions most notably apply to the most prominent
- * omissions of java-1.4's {@link java.nio.charset.Charset Charset}
- * implementation, Cp437 and Cp850.</p>
- * 
- * <p>The methods of this class are reentrant.</p>
- * @Immutable
- */
-class Simple8BitZipEncoding implements ZipEncoding {
-
-    /**
-     * A character entity, which is put to the reverse mapping table
-     * of a simple encoding.
-     */
-    private static final class Simple8BitChar implements Comparable<Simple8BitChar> {
-        public final char unicode;
-        public final byte code;
-
-        Simple8BitChar(final byte code, final char unicode) {
-            this.code = code;
-            this.unicode = unicode;
-        }
-
-        @Override
-        public int compareTo(final Simple8BitChar a) {
-            return this.unicode - a.unicode;
-        }
-
-        @Override
-        public String toString() {
-            return "0x" + Integer.toHexString(0xffff & unicode)
-                + "->0x" + Integer.toHexString(0xff & code);
-        }
-
-        @Override
-        public boolean equals(final Object o) {
-            if (o instanceof Simple8BitChar) {
-                final Simple8BitChar other = (Simple8BitChar) o;
-                return unicode == other.unicode && code == other.code;
-            }
-            return false;
-        }
-
-        @Override
-        public int hashCode() {
-            return unicode;
-        }
-    }
-
-    /**
-     * The characters for byte values of 128 to 255 stored as an array of
-     * 128 chars.
-     */
-    private final char[] highChars;
-
-    /**
-     * A list of {@link Simple8BitChar} objects sorted by the unicode
-     * field.  This list is used to binary search reverse mapping of
-     * unicode characters with a character code greater than 127.
-     */
-    private final List<Simple8BitChar> reverseMapping;
-
-    /**
-     * @param highChars The characters for byte values of 128 to 255
-     * stored as an array of 128 chars.
-     */
-    public Simple8BitZipEncoding(final char[] highChars) {
-        this.highChars = highChars.clone();
-        final List<Simple8BitChar> temp =
-            new ArrayList<>(this.highChars.length);
-
-        byte code = 127;
-
-        for (final char highChar : this.highChars) {
-            temp.add(new Simple8BitChar(++code, highChar));
-        }
-
-        Collections.sort(temp);
-        this.reverseMapping = Collections.unmodifiableList(temp);
-    }
-
-    /**
-     * Return the character code for a given encoded byte.
-     * 
-     * @param b The byte to decode.
-     * @return The associated character value.
-     */
-    public char decodeByte(final byte b) {
-        // code 0-127
-        if (b >= 0) {
-            return (char) b;
-        }
-
-        // byte is signed, so 128 == -128 and 255 == -1
-        return this.highChars[128 + b];
-    }
-
-    /**
-     * @param c The character to encode.
-     * @return Whether the given unicode character is covered by this encoding.
-     */
-    public boolean canEncodeChar(final char c) {
-
-        if (c >= 0 && c < 128) {
-            return true;
-        }
-
-        final Simple8BitChar r = this.encodeHighChar(c);
-        return r != null;
-    }
-
-    /**
-     * Pushes the encoded form of the given character to the given byte buffer.
-     * 
-     * @param bb The byte buffer to write to.
-     * @param c The character to encode.
-     * @return Whether the given unicode character is covered by this encoding.
-     *         If {@code false} is returned, nothing is pushed to the
-     *         byte buffer. 
-     */
-    public boolean pushEncodedChar(final ByteBuffer bb, final char c) {
-
-        if (c >= 0 && c < 128) {
-            bb.put((byte) c);
-            return true;
-        }
-
-        final Simple8BitChar r = this.encodeHighChar(c);
-        if (r == null) {
-            return false;
-        }
-        bb.put(r.code);
-        return true;
-    }
-
-    /**
-     * @param c A unicode character in the range from 0x0080 to 0x7f00
-     * @return A Simple8BitChar, if this character is covered by this encoding.
-     *         A {@code null} value is returned, if this character is not
-     *         covered by this encoding.
-     */
-    private Simple8BitChar encodeHighChar(final char c) {
-        // for performance an simplicity, yet another reincarnation of
-        // binary search...
-        int i0 = 0;
-        int i1 = this.reverseMapping.size();
-
-        while (i1 > i0) {
-
-            final int i = i0 + (i1 - i0) / 2;
-
-            final Simple8BitChar m = this.reverseMapping.get(i);
-
-            if (m.unicode == c) {
-                return m;
-            }
-
-            if (m.unicode < c) {
-                i0 = i + 1;
-            } else {
-                i1 = i;
-            }
-        }
-
-        if (i0 >= this.reverseMapping.size()) {
-            return null;
-        }
-
-        final Simple8BitChar r = this.reverseMapping.get(i0);
-
-        if (r.unicode != c) {
-            return null;
-        }
-
-        return r;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
-     */
-    @Override
-    public boolean canEncode(final String name) {
-
-        for (int i=0;i<name.length();++i) {
-
-            final char c = name.charAt(i);
-
-            if (!this.canEncodeChar(c)) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
-     */
-    @Override
-    public ByteBuffer encode(final String name) {
-        ByteBuffer out = ByteBuffer.allocate(name.length()
-                                             + 6 + (name.length() + 1) / 2);
-
-        for (int i=0;i<name.length();++i) {
-
-            final char c = name.charAt(i);
-
-            if (out.remaining() < 6) {
-                out = ZipEncodingHelper.growBuffer(out,out.position() + 6);
-            }
-
-            if (!this.pushEncodedChar(out,c)) {
-
-                ZipEncodingHelper.appendSurrogate(out,c);
-            }
-        }
-
-        out.limit(out.position());
-        out.rewind();
-        return out;
-    }
-
-    /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
-     */
-    @Override
-    public String decode(final byte[] data) throws IOException {
-        final char [] ret = new char[data.length];
-
-        for (int i=0;i<data.length;++i) {
-            ret[i] = this.decodeByte(data[i]);
-        }
-
-        return new String(ret);
-    }
-
-
-}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
index bcfb8cf..18ad103 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
@@ -21,126 +21,17 @@ package org.apache.commons.compress.archivers.zip;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.UnsupportedCharsetException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
 import org.apache.commons.compress.utils.Charsets;
 
 /**
- * Static helper functions for robustly encoding filenames in zip files. 
+ * Static helper functions for robustly encoding filenames in zip files.
  */
 public abstract class ZipEncodingHelper {
 
     /**
-     * A class, which holds the high characters of a simple encoding
-     * and lazily instantiates a Simple8BitZipEncoding instance in a
-     * thread-safe manner.
-     */
-    private static class SimpleEncodingHolder {
-
-        private final char [] highChars;
-        private Simple8BitZipEncoding encoding;
-
-        /**
-         * Instantiate a simple encoding holder.
-         * 
-         * @param highChars The characters for byte codes 128 to 255.
-         * 
-         * @see Simple8BitZipEncoding#Simple8BitZipEncoding(char[])
-         */
-        SimpleEncodingHolder(final char [] highChars) {
-            this.highChars = highChars;
-        }
-
-        /**
-         * @return The associated {@link Simple8BitZipEncoding}, which
-         *         is instantiated if not done so far.
-         */
-        public synchronized Simple8BitZipEncoding getEncoding() {
-            if (this.encoding == null) {
-                this.encoding = new Simple8BitZipEncoding(this.highChars);
-            }
-            return this.encoding;
-        }
-    }
-
-    private static final Map<String, SimpleEncodingHolder> simpleEncodings;
-
-    static {
-        final Map<String, SimpleEncodingHolder> se =
-            new HashMap<>();
-
-        final char[] cp437_high_chars =
-            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
-                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
-                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
-                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
-                         0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5,
-                         0x20a7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
-                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x2310,
-                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
-                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561,
-                         0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557,
-                         0x255d, 0x255c, 0x255b, 0x2510, 0x2514, 0x2534,
-                         0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
-                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
-                         0x256c, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559,
-                         0x2558, 0x2552, 0x2553, 0x256b, 0x256a, 0x2518,
-                         0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
-                         0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3,
-                         0x00b5, 0x03c4, 0x03a6, 0x0398, 0x03a9, 0x03b4,
-                         0x221e, 0x03c6, 0x03b5, 0x2229, 0x2261, 0x00b1,
-                         0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248,
-                         0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2,
-                         0x25a0, 0x00a0 };
-
-        final SimpleEncodingHolder cp437 = new SimpleEncodingHolder(cp437_high_chars);
-
-        se.put("CP437", cp437);
-        se.put("Cp437", cp437);
-        se.put("cp437", cp437);
-        se.put("IBM437", cp437);
-        se.put("ibm437", cp437);
-
-        final char[] cp850_high_chars =
-            new char[] { 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0,
-                         0x00e5, 0x00e7, 0x00ea, 0x00eb, 0x00e8, 0x00ef,
-                         0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x00c9, 0x00e6,
-                         0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
-                         0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8,
-                         0x00d7, 0x0192, 0x00e1, 0x00ed, 0x00f3, 0x00fa,
-                         0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, 0x00ae,
-                         0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
-                         0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1,
-                         0x00c2, 0x00c0, 0x00a9, 0x2563, 0x2551, 0x2557,
-                         0x255d, 0x00a2, 0x00a5, 0x2510, 0x2514, 0x2534,
-                         0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
-                         0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550,
-                         0x256c, 0x00a4, 0x00f0, 0x00d0, 0x00ca, 0x00cb,
-                         0x00c8, 0x0131, 0x00cd, 0x00ce, 0x00cf, 0x2518,
-                         0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
-                         0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5,
-                         0x00b5, 0x00fe, 0x00de, 0x00da, 0x00db, 0x00d9,
-                         0x00fd, 0x00dd, 0x00af, 0x00b4, 0x00ad, 0x00b1,
-                         0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
-                         0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2,
-                         0x25a0, 0x00a0 };
-
-        final SimpleEncodingHolder cp850 = new SimpleEncodingHolder(cp850_high_chars);
-
-        se.put("CP850", cp850);
-        se.put("Cp850", cp850);
-        se.put("cp850", cp850);
-        se.put("IBM850", cp850);
-        se.put("ibm850", cp850);
-        simpleEncodings = Collections.unmodifiableMap(se);
-    }
-
-    /**
      * Grow a byte buffer, so it has a minimal capacity or at least
-     * the double capacity of the original buffer 
-     * 
+     * the double capacity of the original buffer
+     *
      * @param b The original buffer.
      * @param newCapacity The minimal requested new capacity.
      * @return A byte buffer <code>r</code> with
@@ -160,7 +51,7 @@ public abstract class ZipEncodingHelper {
         return on;
     }
 
- 
+
     /**
      * The hexadecimal digits <code>0,...,9,A,...,F</code> encoded as
      * ASCII bytes.
@@ -174,7 +65,7 @@ public abstract class ZipEncodingHelper {
     /**
      * Append <code>%Uxxxx</code> to the given byte buffer.
      * The caller must assure, that <code>bb.remaining()&gt;=6</code>.
-     * 
+     *
      * @param bb The byte buffer to write to.
      * @param c The character to write.
      */
@@ -198,47 +89,37 @@ public abstract class ZipEncodingHelper {
     /**
      * name of the encoding UTF-8
      */
-    static final ZipEncoding UTF8_ZIP_ENCODING = new FallbackZipEncoding(UTF8);
+    static final ZipEncoding UTF8_ZIP_ENCODING = getZipEncoding("UTF-8");
 
     /**
-     * Instantiates a zip encoding.
-     * 
+     * Instantiates a zip encoding. An NIO based character set encoder/decoder will be returned.
+     * As a special case, if the character set is UTF-8, the nio encoder will be configured  replace malformed and
+     * unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
+     * <p>
+     *     If the requested characer set cannot be found, the platform default will
+     *     be used instead.
+     * </p>
      * @param name The name of the zip encoding. Specify {@code null} for
      *             the platform's default encoding.
      * @return A zip encoding for the given encoding name.
      */
     public static ZipEncoding getZipEncoding(final String name) {
- 
-        // fallback encoding is good enough for UTF-8.
-        if (isUTF8(name)) {
-            return UTF8_ZIP_ENCODING;
-        }
-
-        if (name == null) {
-            return new FallbackZipEncoding();
-        }
-
-        final SimpleEncodingHolder h = simpleEncodings.get(name);
-
-        if (h!=null) {
-            return h.getEncoding();
+        Charset cs = Charset.defaultCharset();
+        if (name != null) {
+            try {
+                cs = Charset.forName(name);
+            } catch (UnsupportedCharsetException e) {
+            }
         }
+        boolean useReplacement = cs.name().equals("UTF-8");
+        return new NioZipEncoding(cs, useReplacement);
 
-        try {
-
-            final Charset cs = Charset.forName(name);
-            return new NioZipEncoding(cs);
-
-        } catch (final UnsupportedCharsetException e) {
-            return new FallbackZipEncoding(name);
-        }
     }
 
     /**
      * Returns whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
-     * 
-     * @param charsetName
-     *            If the given name is null, then check the platform's default encoding.
+     *
+     * @param charsetName If the given name is null, then check the platform's default encoding.
      */
     static boolean isUTF8(String charsetName) {
         if (charsetName == null) {

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cec72ce6/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
index f0c049a..f3e5127 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
@@ -19,13 +19,16 @@
 
 package org.apache.commons.compress.archivers.zip;
 
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
 import java.io.IOException;
 import java.nio.ByteBuffer;
-
+import java.nio.charset.Charset;
 import org.apache.commons.compress.utils.CharsetNames;
-
-import static org.junit.Assert.*;
-
+import org.hamcrest.core.IsInstanceOf;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -33,6 +36,7 @@ import org.junit.Test;
  * Test zip encodings.
  */
 public class ZipEncodingTest {
+
     private static final String UNENC_STRING = "\u2016";
 
     // stress test for internal grow method.
@@ -43,15 +47,44 @@ public class ZipEncodingTest {
         "%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016%U2015%U2016";
 
     @Test
-    public void testSimpleCp437Encoding() throws IOException {
-
-        doSimpleEncodingTest("Cp437", null);
+    public void testNothingToMakeCoverallsHappier() {
+        Object o = new ZipEncodingHelper() {
+        };
+        assertNotNull(o);
+    }
+    @Test
+    public void testGetNonexistentEncodng() throws IOException {
+        ZipEncoding ze = ZipEncodingHelper.getZipEncoding("I-am-a-banana");
+        assertNotNull(ze);
+        if (ze instanceof HasCharset) {
+            HasCharset hasCharset = (HasCharset) ze;
+            Assert.assertEquals(Charset.defaultCharset(),hasCharset.getCharset());
+        }
     }
 
     @Test
+    public void testIsUTF8() throws IOException {
+       assertTrue(ZipEncodingHelper.isUTF8("UTF-8"));
+       assertTrue(ZipEncodingHelper.isUTF8("UTF8"));
+       Assert.assertEquals(Charset.defaultCharset().name().equals("UTF-8"),ZipEncodingHelper.isUTF8(null));
+    }
+    @Test
+    public void testSimpleCp437Encoding() throws IOException {
+        doSimpleEncodingsTest(437);
+    }
+    @Test
     public void testSimpleCp850Encoding() throws IOException {
+        doSimpleEncodingsTest(850);
+    }
+
 
-        doSimpleEncodingTest("Cp850", null);
+    private void doSimpleEncodingsTest(int n) throws IOException {
+
+        doSimpleEncodingTest("Cp" + n, null);
+        doSimpleEncodingTest("cp" + n, null);
+        doSimpleEncodingTest("CP" + n, null);
+        doSimpleEncodingTest("IBM" + n, null);
+        doSimpleEncodingTest("ibm" + n, null);
     }
 
     @Test
@@ -127,7 +160,7 @@ public class ZipEncodingTest {
         throws IOException {
 
         final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(name);
-
+        assertThat(enc, IsInstanceOf.instanceOf(NioZipEncoding.class));
         if (testBytes == null) {
 
             testBytes = new byte[256];


[02/12] commons-compress git commit: Add licence comment to HasCharset

Posted by bo...@apache.org.
Add licence comment to HasCharset

Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/d162732f
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/d162732f
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/d162732f

Branch: refs/heads/master
Commit: d162732f76f7db7097b7be6acc75b6938c453902
Parents: b745af3
Author: Simon Spero <se...@gmail.com>
Authored: Sun Jun 18 20:04:35 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/HasCharset.java       | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d162732f/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
index 2e392a8..7581c18 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 package org.apache.commons.compress.archivers.zip;
 
 import java.nio.charset.Charset;


[09/12] commons-compress git commit: Redoing more buffer stuff

Posted by bo...@apache.org.
Redoing more buffer stuff

Signed-off-by: Simon Spero <se...@gmail.com>

(cherry picked from commit 330c8b3)
Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/11fcc89f
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/11fcc89f
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/11fcc89f

Branch: refs/heads/master
Commit: 11fcc89fec70aae7eeda710e2bd26d93cb3c05c6
Parents: a67bdc0
Author: Simon Spero <se...@gmail.com>
Authored: Thu Jun 15 14:27:48 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../commons/compress/internal/charset/HasCharset.java     | 10 ++++++++++
 1 file changed, 10 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/11fcc89f/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java b/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java
new file mode 100644
index 0000000..0406432
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java
@@ -0,0 +1,10 @@
+package org.apache.commons.compress.internal.charset;
+
+import java.nio.charset.Charset;
+
+/**
+ * Interface to allow access to a character set associated with an object
+ */
+public interface HasCharset {
+  Charset getCharset();
+}


[07/12] commons-compress git commit: Resurrected HasCharset in the wrong place (not beyond the grave).

Posted by bo...@apache.org.
Resurrected HasCharset in the wrong place (not beyond the grave).

Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/d7e6e16e
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/d7e6e16e
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/d7e6e16e

Branch: refs/heads/master
Commit: d7e6e16ee2ea78d358ca0ed52163a63cc8f34743
Parents: d162732
Author: Simon Spero <se...@gmail.com>
Authored: Sun Jun 18 20:10:46 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../commons/compress/internal/charset/HasCharset.java     | 10 ----------
 1 file changed, 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d7e6e16e/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java b/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java
deleted file mode 100644
index 0406432..0000000
--- a/src/main/java/org/apache/commons/compress/internal/charset/HasCharset.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package org.apache.commons.compress.internal.charset;
-
-import java.nio.charset.Charset;
-
-/**
- * Interface to allow access to a character set associated with an object
- */
-public interface HasCharset {
-  Charset getCharset();
-}


[05/12] commons-compress git commit: javadoc for HasCharset

Posted by bo...@apache.org.
javadoc for HasCharset

Signed-off-by: Simon Spero <se...@gmail.com>

(cherry picked from commit b70c7c2)
Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/cb590b38
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/cb590b38
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/cb590b38

Branch: refs/heads/master
Commit: cb590b38af2827acb4dc5ab3919415d3cc75d88c
Parents: cec72ce
Author: Simon Spero <se...@gmail.com>
Authored: Sat Jun 17 12:45:44 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../commons/compress/archivers/zip/HasCharset.java    | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/cb590b38/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
index 09dfced..2e392a8 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
@@ -2,10 +2,22 @@ package org.apache.commons.compress.archivers.zip;
 
 import java.nio.charset.Charset;
 
+/**
+ * An interface added to allow access to the character set associated with an {@link NioZipEncoding},
+ * without requiring a new method to be added to {@link ZipEncoding}.
+ * <p>
+ * This avoids introducing a
+ * potentially breaking change, or making {@link NioZipEncoding} a public class.
+ * </p>
+ */
 public interface HasCharset {
 
     /**
-     *
+     * Provides access to the character set associated with an object.
+     * <p>
+     *     This allows nio oriented code to use more natural character encoding/decoding methods,
+     *     whilst allowing existing code to continue to rely on special-case error handling for UTF-8.
+     * </p>
      * @return the character set associated with this object
      */
     Charset getCharset();


[10/12] commons-compress git commit: mostly cosmetic changes

Posted by bo...@apache.org.
mostly cosmetic changes


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/fde66702
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/fde66702
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/fde66702

Branch: refs/heads/master
Commit: fde66702c33d3888b682adc7e6c35dd21cb1e8da
Parents: 90a73a4
Author: Stefan Bodewig <bo...@apache.org>
Authored: Wed Jul 5 17:31:40 2017 +0200
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 17:31:40 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/HasCharset.java      |  1 +
 .../compress/archivers/zip/NioZipEncoding.java  | 29 ++++++++++----------
 .../archivers/zip/ZipEncodingHelper.java        |  9 +++---
 .../compress/archivers/zip/ZipEncodingTest.java |  9 +++---
 4 files changed, 24 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fde66702/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
index 7581c18..b370a01 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/HasCharset.java
@@ -28,6 +28,7 @@ import java.nio.charset.Charset;
  * This avoids introducing a
  * potentially breaking change, or making {@link NioZipEncoding} a public class.
  * </p>
+ * @since 1.15
  */
 public interface HasCharset {
 

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fde66702/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
index 606ab12..4b0069f 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -34,12 +34,13 @@ import java.nio.charset.CodingErrorAction;
  * <p>The methods of this class are reentrant.</p>
  * @Immutable
  */
-class NioZipEncoding implements ZipEncoding,HasCharset {
+class NioZipEncoding implements ZipEncoding, HasCharset {
 
     private final Charset charset;
-    private  boolean useReplacement= false;
-    private static final byte[] REPLACEMENT_BYTES = new byte[]{'?'};
-    private static final String REPLACEMENT_STRING = "?";
+    private final boolean useReplacement;
+    private static final char REPLACEMENT = '?';
+    private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
+    private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
 
     /**
      * Construct an NioZipEncoding using the given charset.
@@ -49,7 +50,6 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
     NioZipEncoding(final Charset charset, boolean useReplacement) {
         this.charset = charset;
         this.useReplacement = useReplacement;
-
     }
 
     @Override
@@ -102,7 +102,7 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
         final CharsetEncoder enc = newEncoder();
 
         final CharBuffer cb = CharBuffer.wrap(name);
-        CharBuffer tmp=null;
+        CharBuffer tmp = null;
         ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
 
         while (cb.remaining() > 0) {
@@ -127,13 +127,13 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
                         }
                     }
                     int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
-                    out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace- out.remaining());
+                    out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining());
                 }
-                if(tmp == null) {
+                if (tmp == null) {
                     tmp = CharBuffer.allocate(6);
                 }
                 for (int i = 0; i < res.length(); ++i) {
-                    out = encodeFully(enc, encodeSurrogate(tmp,cb.get()), out);
+                    out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
                 }
 
             } else if (res.isOverflow()) {
@@ -143,8 +143,7 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
         }
         CoderResult coderResult = enc.encode(cb, out, true);
 
-        assert coderResult.isUnderflow() : "unexpected coder result: " + coderResult;
-        
+        // may have caused underflow, but that's been ignored traditionally
 
         out.limit(out.position());
         out.rewind();
@@ -157,16 +156,16 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
             if (result.isOverflow()) {
                 int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
                 out = ZipEncodingHelper.growBufferBy(out, increment);
-            } 
+            }
         }
         return out;
     }
 
-    static char[] HEX_CHARS = new char[]{
+    private static final char[] HEX_CHARS = new char[] {
         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
     };
 
-    private CharBuffer encodeSurrogate( CharBuffer cb,char c) {
+    private static CharBuffer encodeSurrogate(CharBuffer cb, char c) {
         cb.position(0).limit(6);
         cb.put('%');
         cb.put('U');
@@ -191,7 +190,7 @@ class NioZipEncoding implements ZipEncoding,HasCharset {
      * @param charChount number of characters in string
      * @return estimated size in bytes.
      */
-    private int estimateInitialBufferSize(CharsetEncoder enc, int charChount) {
+    private static int estimateInitialBufferSize(CharsetEncoder enc, int charChount) {
         float first = enc.maxBytesPerChar();
         float rest = (charChount - 1) * enc.averageBytesPerChar();
         return (int) Math.ceil(first + rest);

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fde66702/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
index fb550fd..68a6305 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
@@ -46,9 +46,9 @@ public abstract class ZipEncodingHelper {
     static final String UTF8 = "UTF8";
 
     /**
-     * name of the encoding UTF-8
+     * the encoding UTF-8
      */
-    static final ZipEncoding UTF8_ZIP_ENCODING = getZipEncoding("UTF-8");
+    static final ZipEncoding UTF8_ZIP_ENCODING = getZipEncoding(UTF8);
 
     /**
      * Instantiates a zip encoding. An NIO based character set encoder/decoder will be returned.
@@ -67,12 +67,11 @@ public abstract class ZipEncodingHelper {
         if (name != null) {
             try {
                 cs = Charset.forName(name);
-            } catch (UnsupportedCharsetException e) {
+            } catch (UnsupportedCharsetException e) { // NOSONAR we use the default encoding instead
             }
         }
-        boolean useReplacement = cs.name().equals("UTF-8");
+        boolean useReplacement = isUTF8(cs.name());
         return new NioZipEncoding(cs, useReplacement);
-
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/fde66702/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
index 34a9cb8..15273c7 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
@@ -51,13 +51,14 @@ public class ZipEncodingTest {
         };
         assertNotNull(o);
     }
+
     @Test
     public void testGetNonexistentEncodng() throws IOException {
         ZipEncoding ze = ZipEncodingHelper.getZipEncoding("I-am-a-banana");
         assertNotNull(ze);
         if (ze instanceof HasCharset) {
             HasCharset hasCharset = (HasCharset) ze;
-            Assert.assertEquals(Charset.defaultCharset(),hasCharset.getCharset());
+            Assert.assertEquals(Charset.defaultCharset(), hasCharset.getCharset());
         }
     }
 
@@ -65,18 +66,19 @@ public class ZipEncodingTest {
     public void testIsUTF8() throws IOException {
        assertTrue(ZipEncodingHelper.isUTF8("UTF-8"));
        assertTrue(ZipEncodingHelper.isUTF8("UTF8"));
-       Assert.assertEquals(Charset.defaultCharset().name().equals("UTF-8"),ZipEncodingHelper.isUTF8(null));
+       Assert.assertEquals(Charset.defaultCharset().name().equals("UTF-8"), ZipEncodingHelper.isUTF8(null));
     }
+
     @Test
     public void testSimpleCp437Encoding() throws IOException {
         doSimpleEncodingsTest(437);
     }
+
     @Test
     public void testSimpleCp850Encoding() throws IOException {
         doSimpleEncodingsTest(850);
     }
 
-
     @Test
     public void testEbcidic() throws IOException {
 
@@ -187,7 +189,6 @@ public class ZipEncodingTest {
         assertEquals("%U2016".getBytes(name), enc.encode(UNENC_STRING));
         assertFalse(enc.canEncode(BAD_STRING));
         assertEquals(BAD_STRING_ENC.getBytes(name), enc.encode(BAD_STRING));
-        assertEquals(BAD_STRING_ENC.getBytes(name), enc.encode(BAD_STRING));
     }
 
 }


[11/12] commons-compress git commit: move methods around

Posted by bo...@apache.org.
move methods around


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/0065a044
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/0065a044
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/0065a044

Branch: refs/heads/master
Commit: 0065a044e998559df5127a7494953fe863016dca
Parents: fde6670
Author: Stefan Bodewig <bo...@apache.org>
Authored: Wed Jul 5 17:31:53 2017 +0200
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 17:31:53 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/NioZipEncoding.java  | 73 ++++++++++----------
 1 file changed, 36 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/0065a044/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
index 4b0069f..75787cc 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -67,33 +67,6 @@ class NioZipEncoding implements ZipEncoding, HasCharset {
         return enc.canEncode(name);
     }
 
-    private CharsetEncoder newEncoder() {
-        if (useReplacement) {
-            return charset.newEncoder()
-                .onMalformedInput(CodingErrorAction.REPLACE)
-                .onUnmappableCharacter(CodingErrorAction.REPLACE)
-                .replaceWith(REPLACEMENT_BYTES);
-        } else {
-            return charset.newEncoder()
-                .onMalformedInput(CodingErrorAction.REPORT)
-                .onUnmappableCharacter(CodingErrorAction.REPORT);
-        }
-    }
-
-    private CharsetDecoder newDecoder() {
-        if (!useReplacement) {
-            return this.charset.newDecoder()
-                .onMalformedInput(CodingErrorAction.REPORT)
-                .onUnmappableCharacter(CodingErrorAction.REPORT);
-        } else {
-            return  charset.newDecoder()
-                .onMalformedInput(CodingErrorAction.REPLACE)
-                .onUnmappableCharacter(CodingErrorAction.REPLACE)
-                .replaceWith(REPLACEMENT_STRING);
-        }
-    }
-
-
     /**
      * @see ZipEncoding#encode(java.lang.String)
      */
@@ -150,6 +123,16 @@ class NioZipEncoding implements ZipEncoding, HasCharset {
         return out;
     }
 
+    /**
+     * @see
+     * ZipEncoding#decode(byte[])
+     */
+    @Override
+    public String decode(final byte[] data) throws IOException {
+        return newDecoder()
+            .decode(ByteBuffer.wrap(data)).toString();
+    }
+
     private static ByteBuffer encodeFully(CharsetEncoder enc, CharBuffer cb, ByteBuffer out) {
         while (cb.hasRemaining()) {
             CoderResult result = enc.encode(cb, out, false);
@@ -178,6 +161,32 @@ class NioZipEncoding implements ZipEncoding, HasCharset {
         return cb;
     }
 
+    private CharsetEncoder newEncoder() {
+        if (useReplacement) {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_BYTES);
+        } else {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        }
+    }
+
+    private CharsetDecoder newDecoder() {
+        if (!useReplacement) {
+            return this.charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        } else {
+            return  charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_STRING);
+        }
+    }
+
     /**
      * Estimate the initial encoded size (in bytes) for a character buffer.
      * <p>
@@ -207,14 +216,4 @@ class NioZipEncoding implements ZipEncoding, HasCharset {
         return (int) Math.ceil(charCount * enc.averageBytesPerChar());
     }
 
-    /**
-     * @see
-     * ZipEncoding#decode(byte[])
-     */
-    @Override
-    public String decode(final byte[] data) throws IOException {
-        return newDecoder()
-            .decode(ByteBuffer.wrap(data)).toString();
-    }
-
 }


[08/12] commons-compress git commit: COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.

Posted by bo...@apache.org.
COMPRESS-410 Remove Non-NIO character set encoders. As a special case, the UTF-8 encoder will replace malformed / unmappable input with '?'. This behavior is required for compatibility with existing behavior.

Signed-off-by: Simon Spero <se...@gmail.com>

(cherry picked from commit 1987719)
Signed-off-by: Simon Spero <se...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/db586bae
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/db586bae
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/db586bae

Branch: refs/heads/master
Commit: db586baee29fc90f79898e9a274cc1bd585b5f53
Parents: cb590b3
Author: Simon Spero <se...@gmail.com>
Authored: Fri Jun 16 20:17:13 2017 -0400
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 16:30:00 2017 +0200

----------------------------------------------------------------------
 .../compress/archivers/zip/NioZipEncoding.java  | 80 ++++++++++++++------
 1 file changed, 55 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/db586bae/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
index ffd2efd..6f0306b 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
@@ -30,54 +31,84 @@ import java.nio.charset.CodingErrorAction;
 /**
  * A ZipEncoding, which uses a java.nio {@link
  * java.nio.charset.Charset Charset} to encode names.
- *
- * <p>This implementation works for all cases under java-1.5 or
- * later. However, in java-1.4, some charsets don't have a java.nio
- * implementation, most notably the default ZIP encoding Cp437.</p>
- * 
  * <p>The methods of this class are reentrant.</p>
  * @Immutable
  */
-class NioZipEncoding implements ZipEncoding {
+class NioZipEncoding implements ZipEncoding,HasCharset {
+
     private final Charset charset;
+    private  boolean useReplacement= false;
+    private static final byte[] REPLACEMENT_BYTES = new byte[]{'?'};
+    private static final String REPLACEMENT_STRING = "?";
 
     /**
      * Construct an NIO based zip encoding, which wraps the given
      * charset.
-     * 
+     *
      * @param charset The NIO charset to wrap.
      */
-    public NioZipEncoding(final Charset charset) {
+    NioZipEncoding(final Charset charset) {
         this.charset = charset;
     }
 
+    NioZipEncoding(final Charset charset, boolean useReplacement) {
+        this(charset);
+        this.useReplacement = useReplacement;
+
+    }
+
+    @Override
+    public Charset getCharset() {
+        return charset;
+    }
+
     /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String)
+     * @see  ZipEncoding#canEncode(java.lang.String)
      */
     @Override
     public boolean canEncode(final String name) {
-        final CharsetEncoder enc = this.charset.newEncoder();
-        enc.onMalformedInput(CodingErrorAction.REPORT);
-        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+        final CharsetEncoder enc = newEncoder();
 
         return enc.canEncode(name);
     }
 
+    private CharsetEncoder newEncoder() {
+        if (useReplacement) {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_BYTES);
+        } else {
+            return charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        }
+    }
+
+    private CharsetDecoder newDecoder() {
+        if (!useReplacement) {
+            return this.charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        } else {
+            return  charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                .replaceWith(REPLACEMENT_STRING);
+        }
+    }
+
+
     /**
-     * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#encode(java.lang.String)
+     * @see ZipEncoding#encode(java.lang.String)
      */
     @Override
     public ByteBuffer encode(final String name) {
-        final CharsetEncoder enc = this.charset.newEncoder();
-
-        enc.onMalformedInput(CodingErrorAction.REPORT);
-        enc.onUnmappableCharacter(CodingErrorAction.REPORT);
+        final CharsetEncoder enc = newEncoder();
 
         final CharBuffer cb = CharBuffer.wrap(name);
-        ByteBuffer out = ByteBuffer.allocate(name.length()
-                                             + (name.length() + 1) / 2);
+        int estimatedSize = (int) Math.ceil(name.length() * enc.averageBytesPerChar());
+        ByteBuffer out = ByteBuffer.allocate(estimatedSize);
 
         while (cb.remaining() > 0) {
             final CoderResult res = enc.encode(cb, out,true);
@@ -114,13 +145,12 @@ class NioZipEncoding implements ZipEncoding {
 
     /**
      * @see
-     * org.apache.commons.compress.archivers.zip.ZipEncoding#decode(byte[])
+     * ZipEncoding#decode(byte[])
      */
     @Override
     public String decode(final byte[] data) throws IOException {
-        return this.charset.newDecoder()
-            .onMalformedInput(CodingErrorAction.REPORT)
-            .onUnmappableCharacter(CodingErrorAction.REPORT)
+        return newDecoder()
             .decode(ByteBuffer.wrap(data)).toString();
     }
+
 }


[12/12] commons-compress git commit: COMPRESS-410 record change

Posted by bo...@apache.org.
COMPRESS-410 record change

closes #37


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/9ae52491
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/9ae52491
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/9ae52491

Branch: refs/heads/master
Commit: 9ae52491c2f3510d83d08489632177b134868fc4
Parents: 0065a04
Author: Stefan Bodewig <bo...@apache.org>
Authored: Wed Jul 5 17:33:52 2017 +0200
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Wed Jul 5 17:33:52 2017 +0200

----------------------------------------------------------------------
 src/changes/changes.xml | 5 +++++
 1 file changed, 5 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/9ae52491/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 79405a9..f217052 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -108,6 +108,11 @@ wanted to create such files.">
         assumed the time was stored as unsigned 32bit int and thus
         created incorrect results for years after 2037.
       </action>
+      <action issue="COMPRESS-410" type="fix" date="2017-07-05"
+              due-to="Simon Spero ">
+        Removed ZipEncoding code that became obsolete when we started
+        to require Java5 as baseline long ago.
+      </action>
     </release>
     <release version="1.14" date="2017-05-14"
              description="Release 1.14">